From 4a4472fdc098fb78f52a0848788faf46674a8423 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 10:43:17 +0100 Subject: of: clk: Make of_clk_get_parent_{count,name}() parameter const of_clk_get_parent_count() and of_clk_get_parent_name() never modify the device nodes passed, so they can be const. Signed-off-by: Geert Uytterhoeven Link: https://lkml.kernel.org/r/20200212094317.1150-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- include/linux/of_clk.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h index c86fcad23fc2..31b73a0da9db 100644 --- a/include/linux/of_clk.h +++ b/include/linux/of_clk.h @@ -11,17 +11,17 @@ struct of_device_id; #if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF) -unsigned int of_clk_get_parent_count(struct device_node *np); -const char *of_clk_get_parent_name(struct device_node *np, int index); +unsigned int of_clk_get_parent_count(const struct device_node *np); +const char *of_clk_get_parent_name(const struct device_node *np, int index); void of_clk_init(const struct of_device_id *matches); #else /* !CONFIG_COMMON_CLK || !CONFIG_OF */ -static inline unsigned int of_clk_get_parent_count(struct device_node *np) +static inline unsigned int of_clk_get_parent_count(const struct device_node *np) { return 0; } -static inline const char *of_clk_get_parent_name(struct device_node *np, +static inline const char *of_clk_get_parent_name(const struct device_node *np, int index) { return NULL; -- cgit v1.2.3 From dbb92f88648d6206bf22fcb764fb9fe2939d401a Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Wed, 22 Jan 2020 19:39:52 +0100 Subject: workqueue: Document (some) memory-ordering properties of {queue,schedule}_work() It's desirable to be able to rely on the following property: All stores preceding (in program order) a call to a successful queue_work() will be visible from the CPU which will execute the queued work by the time such work executes, e.g., { x is initially 0 } CPU0 CPU1 WRITE_ONCE(x, 1); [ "work" is being executed ] r0 = queue_work(wq, work); r1 = READ_ONCE(x); Forbids: r0 == true && r1 == 0 The current implementation of queue_work() provides such memory-ordering property: - In __queue_work(), the ->lock spinlock is acquired. - On the other side, in worker_thread(), this same ->lock is held when dequeueing work. So the locking ordering makes things work out. Add this property to the DocBook headers of {queue,schedule}_work(). Suggested-by: Paul E. McKenney Signed-off-by: Andrea Parri Acked-by: Paul E. McKenney Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4261d1c6e87b..e48554e6526c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -487,6 +487,19 @@ extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Memory-ordering properties: If it returns %true, guarantees that all stores + * preceding the call to queue_work() in the program order will be visible from + * the CPU which will execute @work by the time such work executes, e.g., + * + * { x is initially 0 } + * + * CPU0 CPU1 + * + * WRITE_ONCE(x, 1); [ @work is being executed ] + * r0 = queue_work(wq, work); r1 = READ_ONCE(x); + * + * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) @@ -546,6 +559,9 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work) * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. + * + * Shares the same memory-ordering properties of queue_work(), cf. the + * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { -- cgit v1.2.3 From 9c974c77246460fa6a92c18554c3311c8c83c160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 12:40:15 +0100 Subject: cgroup: Iterate tasks that did not finish do_exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PF_EXITING is set earlier than actual removal from css_set when a task is exitting. This can confuse cgroup.procs readers who see no PF_EXITING tasks, however, rmdir is checking against css_set membership so it can transitionally fail with EBUSY. Fix this by listing tasks that weren't unlinked from css_set active lists. It may happen that other users of the task iterator (without CSS_TASK_ITER_PROCS) spot a PF_EXITING task before cgroup_exit(). This is equal to the state before commit c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") but it may be reviewed later. Reported-by: Suren Baghdasaryan Fixes: c03cd7738a83 ("cgroup: Include dying leaders with live threads in PROCS iterations") Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d7ddebd0cdec..e75d2191226b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,6 +62,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; -- cgit v1.2.3 From 249bc9744e165abe74ae326f43e9d70bad54c3b7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 1 Mar 2020 21:36:09 +0100 Subject: net: phy: avoid clearing PHY interrupts twice in irq handler On all PHY drivers that implement did_interrupt() reading the interrupt status bits clears them. This means we may loose an interrupt that is triggered between calling did_interrupt() and phy_clear_interrupt(). As part of the fix make it a requirement that did_interrupt() clears the interrupt. The Fixes tag refers to the first commit where the patch applies cleanly. Fixes: 49644e68f472 ("net: phy: add callback for custom interrupt handler to struct phy_driver") Reported-by: Michael Walle Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index c570e162e05e..22f5e763e894 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -557,6 +557,7 @@ struct phy_driver { /* * Checks if the PHY generated an interrupt. * For multi-PHY devices with shared PHY interrupt pin + * Set interrupt bits have to be cleared. */ int (*did_interrupt)(struct phy_device *phydev); -- cgit v1.2.3 From 02d715b4a8182f4887d82df82a7b83aced647760 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sun, 23 Feb 2020 22:25:39 +0530 Subject: iommu/vt-d: Fix RCU list debugging warnings dmar_drhd_units is traversed using list_for_each_entry_rcu() outside of an RCU read side critical section but under the protection of dmar_global_lock. Hence add corresponding lockdep expression to silence the following false-positive warnings: [ 1.603975] ============================= [ 1.603976] WARNING: suspicious RCU usage [ 1.603977] 5.5.4-stable #17 Not tainted [ 1.603978] ----------------------------- [ 1.603980] drivers/iommu/intel-iommu.c:4769 RCU-list traversed in non-reader section!! [ 1.603869] ============================= [ 1.603870] WARNING: suspicious RCU usage [ 1.603872] 5.5.4-stable #17 Not tainted [ 1.603874] ----------------------------- [ 1.603875] drivers/iommu/dmar.c:293 RCU-list traversed in non-reader section!! Tested-by: Madhuparna Bhowmik Signed-off-by: Amol Grover Cc: stable@vger.kernel.org Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f64ca27dc210..712be8bc6a7c 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -69,8 +69,9 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) +#define for_each_drhd_unit(drhd) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) #define for_each_active_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ @@ -81,7 +82,8 @@ extern struct list_head dmar_drhd_units; if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (i=drhd->iommu, 0) {} else static inline bool dmar_rcu_check(void) -- cgit v1.2.3 From 8019ad13ef7f64be44d4f892af9c840179009254 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Mar 2020 11:28:31 +0100 Subject: futex: Fix inode life-time issue As reported by Jann, ihold() does not in fact guarantee inode persistence. And instead of making it so, replace the usage of inode pointers with a per boot, machine wide, unique inode identifier. This sequence number is global, but shared (file backed) futexes are rare enough that this should not become a performance issue. Reported-by: Jann Horn Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) --- include/linux/fs.h | 1 + include/linux/futex.h | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..abedbffe2c9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -698,6 +698,7 @@ struct inode { struct rcu_head i_rcu; }; atomic64_t i_version; + atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; diff --git a/include/linux/futex.h b/include/linux/futex.h index 5cc3fed27d4c..b70df27d7e85 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -31,23 +31,26 @@ struct task_struct; union futex_key { struct { + u64 i_seq; unsigned long pgoff; - struct inode *inode; - int offset; + unsigned int offset; } shared; struct { + union { + struct mm_struct *mm; + u64 __tmp; + }; unsigned long address; - struct mm_struct *mm; - int offset; + unsigned int offset; } private; struct { + u64 ptr; unsigned long word; - void *ptr; - int offset; + unsigned int offset; } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX enum { -- cgit v1.2.3 From aeaa925bff844d225f259215a250d2811e436499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Thu, 5 Mar 2020 17:05:16 +0100 Subject: rhashtable: Document the right function parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rhashtable_lookup_get_insert_key doesn't have a parameter `data`. It does have a parameter `key`, however. Signed-off-by: Jonathan Neuschäfer Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index beb9a9da1699..70ebef866cc8 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -972,9 +972,9 @@ static inline int rhashtable_lookup_insert_key( /** * rhashtable_lookup_get_insert_key - lookup and insert object into hash table * @ht: hash table + * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters - * @data: pointer to element data already in hashes * * Just like rhashtable_lookup_insert_key(), but this function returns the * object if it exists, NULL if it does not and the insertion was successful, -- cgit v1.2.3 From 83f73c5bb7b9a9135173f0ba2b1aa00c06664ff9 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 15:33:12 +0300 Subject: inet_diag: return classid for all socket types In commit 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") croup classid reporting was fixed. But this works only for TCP sockets because for other socket types icsk parameter can be NULL and classid code path is skipped. This change moves classid handling to inet_diag_msg_attrs_fill() function. Also inet_diag_msg_attrs_size() helper was added and addends in nlmsg_new() were reordered to save order from inet_sk_diag_fill(). Fixes: 1ec17dbd90f8 ("inet_diag: fix reporting cgroup classid and fallback to priority") Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..c91cf2dee12a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); -- cgit v1.2.3 From f5152416528c2295f35dd9c9bd4fb27c4032413d Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 5 Mar 2020 15:15:02 -0500 Subject: iommu/vt-d: Silence RCU-list debugging warnings Similar to the commit 02d715b4a818 ("iommu/vt-d: Fix RCU list debugging warnings"), there are several other places that call list_for_each_entry_rcu() outside of an RCU read side critical section but with dmar_global_lock held. Silence those false positives as well. drivers/iommu/intel-iommu.c:4288 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffff935892c8 (dmar_global_lock){+.+.}, at: intel_iommu_init+0x1ad/0xb97 drivers/iommu/dmar.c:366 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffff935892c8 (dmar_global_lock){+.+.}, at: intel_iommu_init+0x125/0xb97 drivers/iommu/intel-iommu.c:5057 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffffa71892c8 (dmar_global_lock){++++}, at: intel_iommu_init+0x61a/0xb13 Signed-off-by: Qian Cai Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 712be8bc6a7c..d7bf029df737 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -74,11 +74,13 @@ extern struct list_head dmar_drhd_units; dmar_rcu_check()) #define for_each_active_drhd_unit(drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (drhd->ignored) {} else #define for_each_active_iommu(i, drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ -- cgit v1.2.3 From 1292e3efb149ee21d8d33d725eeed4e6b1ade963 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 10 Mar 2020 12:49:43 +0100 Subject: mmc: core: Allow host controllers to require R1B for CMD6 It has turned out that some host controllers can't use R1B for CMD6 and other commands that have R1B associated with them. Therefore invent a new host cap, MMC_CAP_NEED_RSP_BUSY to let them specify this. In __mmc_switch(), let's check the flag and use it to prevent R1B responses from being converted into R1. Note that, this also means that the host are on its own, when it comes to manage the busy timeout. Suggested-by: Sowjanya Komatineni Cc: Tested-by: Anders Roxell Tested-by: Sowjanya Komatineni Tested-by: Faiz Abbas Tested-By: Peter Geis Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ba703384bea0..4c5eb3aa8e72 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -333,6 +333,7 @@ struct mmc_host { MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \ MMC_CAP_UHS_DDR50) #define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */ +#define MMC_CAP_NEED_RSP_BUSY (1 << 22) /* Commands with R1B can't use R1. */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ -- cgit v1.2.3 From e3a36eb6dfaeea8175c05d5915dcf0b939be6dab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Mar 2020 17:07:10 +0100 Subject: driver code: clarify and fix platform device DMA mask allocation This does three inter-related things to clarify the usage of the platform device dma_mask field. In the process, fix the bug introduced by cdfee5623290 ("driver core: initialize a default DMA mask for platform device") that caused Artem Tashkinov's laptop to not boot with newer Fedora kernels. This does: - First off, rename the field to "platform_dma_mask" to make it greppable. We have way too many different random fields called "dma_mask" in various data structures, where some of them are actual masks, and some of them are just pointers to the mask. And the structures all have pointers to each other, or embed each other inside themselves, and "pdev" sometimes means "platform device" and sometimes it means "PCI device". So to make it clear in the code when you actually use this new field, give it a unique name (it really should be something even more unique like "platform_device_dma_mask", since it's per platform device, not per platform, but that gets old really fast, and this is unique enough in context). To further clarify when the field gets used, initialize it when we actually start using it with the default value. - Then, use this field instead of the random one-off allocation in platform_device_register_full() that is now unnecessary since we now already have a perfectly fine allocation for it in the platform device structure. - The above then allows us to fix the actual bug, where the error path of platform_device_register_full() would unconditionally free the platform device DMA allocation with 'kfree()'. That kfree() was dont regardless of whether the allocation had been done earlier with the (now removed) kmalloc, or whether setup_pdev_dma_masks() had already been used and the dma_mask pointer pointed to the mask that was part of the platform device. It seems most people never triggered the error path, or only triggered it from a call chain that set an explicit pdevinfo->dma_mask value (and thus caused the unnecessary allocation that was "cleaned up" in the error path) before calling platform_device_register_full(). Robin Murphy points out that in Artem's case the wdat_wdt driver failed in platform_device_add(), and that was the one that had called platform_device_register_full() with pdevinfo.dma_mask = 0, and would have caused that kfree() of pdev.dma_mask corrupting the heap. A later unrelated kmalloc() then oopsed due to the heap corruption. Fixes: cdfee5623290 ("driver core: initialize a default DMA mask for platform device") Reported-bisected-and-tested-by: Artem S. Tashkinov Reviewed-by: Robin Murphy Cc: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 276a03c24691..041bfa412aa0 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -24,7 +24,7 @@ struct platform_device { int id; bool id_auto; struct device dev; - u64 dma_mask; + u64 platform_dma_mask; u32 num_resources; struct resource *resource; -- cgit v1.2.3 From b53df2e7442c73a932fb74228147fb946e531585 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 21 Feb 2020 10:37:08 +0900 Subject: block: Fix partition support for host aware zoned block devices Commit b72053072c0b ("block: allow partitions on host aware zone devices") introduced the helper function disk_has_partitions() to check if a given disk has valid partitions. However, since this function result directly depends on the disk partition table length rather than the actual existence of valid partitions in the table, it returns true even after all partitions are removed from the disk. For host aware zoned block devices, this results in zone management support to be kept disabled even after removing all partitions. Fix this by changing disk_has_partitions() to walk through the partition table entries and return true if and only if a valid non-zero size partition is found. Fixes: b72053072c0b ("block: allow partitions on host aware zone devices") Cc: stable@vger.kernel.org # 5.5 Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fbe58538ad6..07dc91835b98 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -245,18 +245,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk) !(disk->flags & GENHD_FL_NO_PART_SCAN); } -static inline bool disk_has_partitions(struct gendisk *disk) -{ - bool ret = false; - - rcu_read_lock(); - if (rcu_dereference(disk->part_tbl)->len > 1) - ret = true; - rcu_read_unlock(); - - return ret; -} - static inline dev_t disk_devt(struct gendisk *disk) { return MKDEV(disk->major, disk->first_minor); @@ -298,6 +286,7 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +bool disk_has_partitions(struct gendisk *disk); /* * Macros to operate on percpu disk statistics: -- cgit v1.2.3 From 611d779af7cad2b87487ff58e4931a90c20b113c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 12 Mar 2020 22:25:20 +0100 Subject: net: phy: fix MDIO bus PM PHY resuming So far we have the unfortunate situation that mdio_bus_phy_may_suspend() is called in suspend AND resume path, assuming that function result is the same. After the original change this is no longer the case, resulting in broken resume as reported by Geert. To fix this call mdio_bus_phy_may_suspend() in the suspend path only, and let the phy_device store the info whether it was suspended by MDIO bus PM. Fixes: 503ba7c69610 ("net: phy: Avoid multiple suspends") Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 22f5e763e894..452e8ba8665f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -357,6 +357,7 @@ struct macsec_ops; * is_gigabit_capable: Set to true if PHY supports 1000Mbps * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -396,6 +397,7 @@ struct phy_device { unsigned is_gigabit_capable:1; unsigned has_fixups:1; unsigned suspended:1; + unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; -- cgit v1.2.3 From ba3b01d7a6f4ab9f8a0557044c9a7678f64ae070 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 9 Mar 2020 13:09:46 -0700 Subject: iommu/vt-d: Fix debugfs register reads Commit 6825d3ea6cde ("iommu/vt-d: Add debugfs support to show register contents") dumps the register contents for all IOMMU devices. Currently, a 64 bit read(dmar_readq) is done for all the IOMMU registers, even though some of the registers are 32 bits, which is incorrect. Use the correct read function variant (dmar_readl/dmar_readq) while reading the contents of 32/64 bit registers respectively. Signed-off-by: Megha Dey Link: https://lore.kernel.org/r/1583784587-26126-2-git-send-email-megha.dey@linux.intel.com Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4a16b39ae353..980234ae0312 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -123,6 +123,8 @@ #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) +#define dmar_readl(a) readl(a) +#define dmar_writel(a, v) writel(v, a) #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) -- cgit v1.2.3 From f1388ec4a144f40348321a0915c5535d623e165c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 15 Mar 2020 18:17:48 +0100 Subject: netlink: add nl_set_extack_cookie_u32() Similar to existing nl_set_extack_cookie_u64(), add new helper nl_set_extack_cookie_u32() which sets extack cookie to a u32 value. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..4090524c3462 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -119,6 +119,15 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, extack->cookie_len = sizeof(__cookie); } +static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, + u32 cookie) +{ + u32 __cookie = cookie; + + memcpy(extack->cookie, &__cookie, sizeof(__cookie)); + extack->cookie_len = sizeof(__cookie); +} + void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From 8e7ae2518f5265f0ef09d561748098fde5a87ccd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 13 Mar 2020 18:02:09 -0700 Subject: bpf: Sanitize the bpf_struct_ops tcp-cc name The bpf_struct_ops tcp-cc name should be sanitized in order to avoid problematic chars (e.g. whitespaces). This patch reuses the bpf_obj_name_cpy() for accepting the same set of characters in order to keep a consistent bpf programming experience. A "size" param is added. Also, the strlen is returned on success so that the caller (like the bpf_tcp_ca here) can error out on empty name. The existing callers of the bpf_obj_name_cpy() only need to change the testing statement to "if (err < 0)". For all these existing callers, the err will be overwritten later, so no extra change is needed for the new strlen return value. v3: - reverse xmas tree style v2: - Save the orig_src to avoid "end - size" (Andrii) Fixes: 0baf26b0fcd7 ("bpf: tcp: Support tcp_congestion_ops in bpf") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200314010209.1131542-1-kafai@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49b1a70e12c8..212991f6f2a5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -160,6 +160,7 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); +int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; struct bpf_offloaded_map; -- cgit v1.2.3 From 4022e7af86be2dd62975dedb6b7ea551d108695e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Mar 2020 19:23:18 -0600 Subject: io_uring: make sure openat/openat2 honor rlimit nofile Dmitry reports that a test case shows that io_uring isn't honoring a modified rlimit nofile setting. get_unused_fd_flags() checks the task signal->rlimi[] for the limits. As this isn't easily inheritable, provide a __get_unused_fd_flags() that takes the value instead. Then we can grab it when the request is prepared (from the original task), and pass that in when we do the async part part of the open. Reported-by: Dmitry Kadashev Tested-by: Dmitry Kadashev Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index c6c7b24ea9f7..142d102f285e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); +extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); -- cgit v1.2.3 From 09952e3e7826119ddd4357c453d54bcc7ef25156 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Mar 2020 20:16:56 -0600 Subject: io_uring: make sure accept honor rlimit nofile Just like commit 4022e7af86be, this fixes the fact that IORING_OP_ACCEPT ends up using get_unused_fd_flags(), which checks current->signal->rlim[] for limits. Add an extra argument to __sys_accept4_file() that allows us to pass in the proper nofile limit, and grab it at request prep time. Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/socket.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 2d2313403101..15f3412d481e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -401,7 +401,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, int addr_len); extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags); + int __user *upeer_addrlen, int flags, + unsigned long nofile); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); -- cgit v1.2.3 From d72520ad004a8ce18a6ba6cde317f0081b27365a Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 21 Mar 2020 18:22:17 -0700 Subject: page-flags: fix a crash at SetPageError(THP_SWAP) Commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") supported writing THP to a swap device but forgot to upgrade an older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related flags on compound pages") which could trigger a crash during THP swapping out with DEBUG_VM_PGFLAGS=y, kernel BUG at include/linux/page-flags.h:317! page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page)) page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0 compound_pincount:0 anon flags: 0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked) end_swap_bio_write() SetPageError(page) VM_BUG_ON_PAGE(1 && PageCompound(page)) bio_endio+0x297/0x560 dec_pending+0x218/0x430 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x297/0x560 blk_update_request+0x201/0x920 scsi_end_request+0x6b/0x4b0 scsi_io_completion+0x509/0x7e0 scsi_finish_command+0x1ed/0x2a0 scsi_softirq_done+0x1c9/0x1d0 __blk_mqnterrupt+0xf/0x20 Fix by checking PF_NO_TAIL in those places instead. Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: "Huang, Ying" Acked-by: Rafael Aquini Cc: Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1bf83c8fcaa7..77de28bfefb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) +PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) __SETPAGEFLAG(Referenced, referenced, PF_HEAD) -- cgit v1.2.3 From 763802b53a427ed3cbd419dbba255c414fdd9e7c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 21 Mar 2020 18:22:41 -0700 Subject: x86/mm: split vmalloc_sync_all() Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Reported-by: kernel test robot Reported-by: Shile Zhang Signed-off-by: Joerg Roedel Signed-off-by: Andrew Morton Tested-by: Borislav Petkov Acked-by: Rafael J. Wysocki [GHES] Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ec3813236699..0507a162ccd0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ -- cgit v1.2.3 From 692b65c84f0bb0ca70c11e62c865cbaf7e8e3afd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2020 15:54:22 +0100 Subject: i2c: fix a doc warning Don't let non-letters inside a literal block without escaping it, as the toolchain would mis-interpret it: ./include/linux/i2c.h:518: WARNING: Inline strong start-string without end-string. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f834687989f7..f6b942150631 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -506,7 +506,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @functionality: Return the flags that this algorithm/adapter pair supports - * from the I2C_FUNC_* flags. + * from the ``I2C_FUNC_*`` flags. * @reg_slave: Register given client to I2C slave mode of this adapter * @unreg_slave: Unregister given client from I2C slave mode of this adapter * @@ -515,7 +515,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584 * to name two of the most common. * - * The return codes from the @master_xfer{_atomic} fields should indicate the + * The return codes from the ``master_xfer{_atomic}`` fields should indicate the * type of error code that occurred during the transfer, as documented in the * Kernel Documentation file Documentation/i2c/fault-codes.rst. */ -- cgit v1.2.3 From 7614209736fbc4927584d4387faade4f31444fce Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 9 Mar 2020 12:03:14 +0100 Subject: ceph: check POOL_FLAG_FULL/NEARFULL in addition to OSDMAP_FULL/NEARFULL CEPH_OSDMAP_FULL/NEARFULL aren't set since mimic, so we need to consult per-pool flags as well. Unfortunately the backwards compatibility here is lacking: - the change that deprecated OSDMAP_FULL/NEARFULL went into mimic, but was guarded by require_osd_release >= RELEASE_LUMINOUS - it was subsequently backported to luminous in v12.2.2, but that makes no difference to clients that only check OSDMAP_FULL/NEARFULL because require_osd_release is not client-facing -- it is for OSDs Since all kernels are affected, the best we can do here is just start checking both map flags and pool flags and send that to stable. These checks are best effort, so take osdc->lock and look up pool flags just once. Remove the FIXME, since filesystem quotas are checked above and RADOS quotas are reflected in POOL_FLAG_FULL: when the pool reaches its quota, both POOL_FLAG_FULL and POOL_FLAG_FULL_QUOTA are set. Cc: stable@vger.kernel.org Reported-by: Yanhu Cao Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Acked-by: Sage Weil --- include/linux/ceph/osdmap.h | 4 ++++ include/linux/ceph/rados.h | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e081b56f1c1d..5e601975745f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -37,6 +37,9 @@ int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ #define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ +#define CEPH_POOL_FLAG_FULL_QUOTA (1ULL << 10) /* pool ran out of quota, + will set FULL too */ +#define CEPH_POOL_FLAG_NEARFULL (1ULL << 11) /* pool is nearfull */ struct ceph_pg_pool_info { struct rb_node node; @@ -304,5 +307,6 @@ extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); +u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 59bdfd470100..88ed3c5c04c5 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -143,8 +143,10 @@ extern const char *ceph_osd_state_name(int s); /* * osd map flag bits */ -#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ -#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC), + not set since ~luminous */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC), + not set since ~luminous */ #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ -- cgit v1.2.3 From e886274031200bb60965c1b9c49b7acda56a93bd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 10 Mar 2020 16:19:01 +0100 Subject: libceph: fix alloc_msg_with_page_vector() memory leaks Make it so that CEPH_MSG_DATA_PAGES data item can own pages, fixing a bunch of memory leaks for a page vector allocated in alloc_msg_with_page_vector(). Currently, only watch-notify messages trigger this allocation, and normally the page vector is freed either in handle_watch_notify() or by the caller of ceph_osdc_notify(). But if the message is freed before that (e.g. if the session faults while reading in the message or if the notify is stale), we leak the page vector. This was supposed to be fixed by switching to a message-owned pagelist, but that never happened. Fixes: 1907920324f1 ("libceph: support for sending notifies") Reported-by: Roman Penyaev Signed-off-by: Ilya Dryomov Reviewed-by: Roman Penyaev --- include/linux/ceph/messenger.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c4458dc6a757..76371aaae2d1 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -175,9 +175,10 @@ struct ceph_msg_data { #endif /* CONFIG_BLOCK */ struct ceph_bvec_iter bvec_pos; struct { - struct page **pages; /* NOT OWNER. */ + struct page **pages; size_t length; /* total # bytes */ unsigned int alignment; /* first page */ + bool own_pages; }; struct ceph_pagelist *pagelist; }; @@ -356,8 +357,8 @@ extern void ceph_con_keepalive(struct ceph_connection *con); extern bool ceph_con_keepalive_expired(struct ceph_connection *con, unsigned long interval); -extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, - size_t length, size_t alignment); +void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, + size_t length, size_t alignment, bool own_pages); extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 55b474c41e586a5c21c7ab81ff474eb6bacb4322 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sat, 21 Mar 2020 00:46:50 +0100 Subject: netlink: check for null extack in cookie helpers Unlike NL_SET_ERR_* macros, nl_set_extack_cookie_u64() and nl_set_extack_cookie_u32() helpers do not check extack argument for null and neither do their callers, as syzbot recently discovered for ethnl_parse_header(). Instead of fixing the callers and leaving the trap in place, add check of null extack to both helpers to make them consistent with NL_SET_ERR_* macros. v2: drop incorrect second Fixes tag Fixes: 2363d73a2f3e ("ethtool: reject unrecognized request flags") Reported-by: syzbot+258a9089477493cea67b@syzkaller.appspotmail.com Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4090524c3462..60739d0cbf93 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -115,6 +115,8 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, { u64 __cookie = cookie; + if (!extack) + return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } @@ -124,6 +126,8 @@ static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, { u32 __cookie = cookie; + if (!extack) + return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } -- cgit v1.2.3 From e80f40cbe4dd51371818e967d40da8fe305db5e4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 24 Mar 2020 11:45:34 +0200 Subject: net: dsa: tag_8021q: replace dsa_8021q_remove_header with __skb_vlan_pop Not only did this wheel did not need reinventing, but there is also an issue with it: It doesn't remove the VLAN header in a way that preserves the L2 payload checksum when that is being provided by the DSA master hw. It should recalculate checksum both for the push, before removing the header, and for the pull afterwards. But the current implementation is quite dizzying, with pulls followed immediately afterwards by pushes, the memmove is done before the push, etc. This makes a DSA master with RX checksumming offload to print stack traces with the infamous 'hw csum failure' message. So remove the dsa_8021q_remove_header function and replace it with something that actually works with inet checksumming. Fixes: d461933638ae ("net: dsa: tag_8021q: Create helper function for removing VLAN header") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 0aa803c451a3..c620d9139c28 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -28,8 +28,6 @@ int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb); - #else int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, @@ -64,11 +62,6 @@ int dsa_8021q_rx_source_port(u16 vid) return 0; } -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) -{ - return NULL; -} - #endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */ #endif /* _NET_DSA_8021Q_H */ -- cgit v1.2.3 From 575a97acc3b7446094b0dcaf6285c7c6934c2477 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 25 Mar 2020 09:09:19 +0100 Subject: ieee80211: fix HE SPR size calculation The he_sr_control field is just a u8, so le32_to_cpu() shouldn't be applied to it; this was evidently copied from ieee80211_he_oper_size(). Fix it, and also adjust the type of the local variable. Fixes: ef11a931bd1c ("mac80211: HE: add Spatial Reuse element parsing support") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200325090918.dfe483b49e06.Ia53622f23b2610a2ae6ea39a199866196fe946c1@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7d3f2ced92d1..73c66a3a33ae 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2102,14 +2102,14 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) { struct ieee80211_he_spr *he_spr = (void *)he_spr_ie; u8 spr_len = sizeof(struct ieee80211_he_spr); - u32 he_spr_params; + u8 he_spr_params; /* Make sure the input is not NULL */ if (!he_spr_ie) return 0; /* Calc required length */ - he_spr_params = le32_to_cpu(he_spr->he_sr_control); + he_spr_params = he_spr->he_sr_control; if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) spr_len++; if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) -- cgit v1.2.3 From 2c64605b590edadb3fb46d1ec6badb49e940b479 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Mar 2020 13:47:18 +0100 Subject: net: Fix CONFIG_NET_CLS_ACT=n and CONFIG_NFT_FWD_NETDEV={y, m} build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netfilter/nft_fwd_netdev.c: In function ‘nft_fwd_netdev_eval’: net/netfilter/nft_fwd_netdev.c:32:10: error: ‘struct sk_buff’ has no member named ‘tc_redirected’ pkt->skb->tc_redirected = 1; ^~ net/netfilter/nft_fwd_netdev.c:33:10: error: ‘struct sk_buff’ has no member named ‘tc_from_ingress’ pkt->skb->tc_from_ingress = 1; ^~ To avoid a direct dependency with tc actions from netfilter, wrap the redirect bits around CONFIG_NET_REDIRECT and move helpers to include/linux/skbuff.h. Turn on this toggle from the ifb driver, the only existing client of these bits in the tree. This patch adds skb_set_redirected() that sets on the redirected bit on the skbuff, it specifies if the packet was redirect from ingress and resets the timestamp (timestamp reset was originally missing in the netfilter bugfix). Fixes: bcfabee1afd99484 ("netfilter: nft_fwd_netdev: allow to redirect to ifb via ingress") Reported-by: noreply@ellerman.id.au Reported-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b50278c4bc8..e59620234415 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,8 +645,8 @@ typedef unsigned char *sk_buff_data_t; * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress - * @tc_redirected: packet was redirected by a tc action - * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect + * @redirected: packet was redirected by packet classifier + * @from_ingress: packet was redirected from the ingress path * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -848,8 +848,10 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_redirected:1; - __u8 tc_from_ingress:1; +#endif +#ifdef CONFIG_NET_REDIRECT + __u8 redirected:1; + __u8 from_ingress:1; #endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; @@ -4579,5 +4581,31 @@ static inline __wsum lco_csum(struct sk_buff *skb) return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } +static inline bool skb_is_redirected(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + return skb->redirected; +#else + return false; +#endif +} + +static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 1; + skb->from_ingress = from_ingress; + if (skb->from_ingress) + skb->tstamp = 0; +#endif +} + +static inline void skb_reset_redirect(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 0; +#endif +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 4e934301203648b1705360c1c52d4ce2e2acec5e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 24 Mar 2020 19:22:57 -0700 Subject: clk: Pass correct arguments to __clk_hw_register_gate() I copy/pasted these macros and forgot to update the argument names and where they're passed to. Fix it so that these macros make sense. Reported-by: Maxime Ripard Fixes: 194efb6e2667 ("clk: gate: Add support for specifying parents via DT/pointers") Signed-off-by: Stephen Boyd Link: https://lkml.kernel.org/r/20200325022257.148244-1-sboyd@kernel.org Tested-by: Maxime Ripard --- include/linux/clk-provider.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 952ac035bab9..bd1ee9039558 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -522,9 +522,9 @@ struct clk *clk_register_gate(struct device *dev, const char *name, * @clk_gate_flags: gate-specific flags for this clock * @lock: shared register lock for this clock */ -#define clk_hw_register_gate_parent_hw(dev, name, parent_name, flags, reg, \ +#define clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags, reg, \ bit_idx, clk_gate_flags, lock) \ - __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \ + __clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw), \ NULL, (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) /** @@ -539,10 +539,10 @@ struct clk *clk_register_gate(struct device *dev, const char *name, * @clk_gate_flags: gate-specific flags for this clock * @lock: shared register lock for this clock */ -#define clk_hw_register_gate_parent_data(dev, name, parent_name, flags, reg, \ +#define clk_hw_register_gate_parent_data(dev, name, parent_data, flags, reg, \ bit_idx, clk_gate_flags, lock) \ - __clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \ - NULL, (flags), (reg), (bit_idx), \ + __clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \ + (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) void clk_unregister_gate(struct clk *clk); void clk_hw_unregister_gate(struct clk_hw *hw); -- cgit v1.2.3 From 8380ce479010f2f779587b462a9b4681934297c3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 28 Mar 2020 19:17:25 -0700 Subject: mm: fork: fix kernel_stack memcg stats for various stack implementations Depending on CONFIG_VMAP_STACK and the THREAD_SIZE / PAGE_SIZE ratio the space for task stacks can be allocated using __vmalloc_node_range(), alloc_pages_node() and kmem_cache_alloc_node(). In the first and the second cases page->mem_cgroup pointer is set, but in the third it's not: memcg membership of a slab page should be determined using the memcg_from_slab_page() function, which looks at page->slab_cache->memcg_params.memcg . In this case, using mod_memcg_page_state() (as in account_kernel_stack()) is incorrect: page->mem_cgroup pointer is NULL even for pages charged to a non-root memory cgroup. It can lead to kernel_stack per-memcg counters permanently showing 0 on some architectures (depending on the configuration). In order to fix it, let's introduce a mod_memcg_obj_state() helper, which takes a pointer to a kernel object as a first argument, uses mem_cgroup_from_obj() to get a RCU-protected memcg pointer and calls mod_memcg_state(). It allows to handle all possible configurations (CONFIG_VMAP_STACK and various THREAD_SIZE/PAGE_SIZE values) without spilling any memcg/kmem specifics into fork.c . Note: This is a special version of the patch created for stable backports. It contains code from the following two patches: - mm: memcg/slab: introduce mem_cgroup_from_obj() - mm: fork: fix kernel_stack memcg stats for various stack implementations [guro@fb.com: introduce mem_cgroup_from_obj()] Link: http://lkml.kernel.org/r/20200324004221.GA36662@carbon.dhcp.thefacebook.com Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Bharata B Rao Cc: Shakeel Butt Cc: Link: http://lkml.kernel.org/r/20200303233550.251375-1-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a7a0a1a5c8d5..e9ba01336d4e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +void mod_memcg_obj_state(void *p, int idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) @@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } +static inline void mod_memcg_obj_state(void *p, int idx, int val) +{ +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } +struct mem_cgroup *mem_cgroup_from_obj(void *p); + #else static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) @@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(void) { } +static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + return NULL; +} + #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3