From 92ee76d4a926843d0e135aa0c2d9f57504c6876c Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 6 Dec 2019 15:31:03 +0100 Subject: trivial: mac80211: fix indentation Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191206143103.3645-1-john@phrozen.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7d3f2ced92d1..e172d0c7bf74 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2047,7 +2047,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000 #define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000 -#define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 +#define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 -- cgit v1.2.3 From 30b2f0be23fb40e58d0ad2caf8702c2a44cda2e1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 13 Jan 2020 21:59:40 -0800 Subject: mac80211: add ieee80211_is_any_nullfunc() commit 08a5bdde3812 ("mac80211: consider QoS Null frames for STA_NULLFUNC_ACKED") Fixed a bug where we failed to take into account a nullfunc frame can be either non-QoS or QoS. It turns out there is at least one more bug in ieee80211_sta_tx_notify(), introduced in commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing"), where we forgot to check for the QoS variant and so assumed the QoS nullfunc frame never went out Fix this by adding a helper ieee80211_is_any_nullfunc() which consolidates the check for non-QoS and QoS nullfunc frames. Replace existing compound conditionals and add a couple more missing checks for QoS variant. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200114055940.18502-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e172d0c7bf74..1c4409b4c012 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -619,6 +619,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } +/** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder -- cgit v1.2.3 From 24a448b165253b6f2ab1e0bcdba9a733007681d6 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 6 Dec 2019 18:29:03 +0800 Subject: XArray: Fix incorrect comment in header file 256 means Retry entry and 257 means Zero entry, so fix it. Signed-off-by: Chengguang Xu Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index f73e1775ded0..a491653d8882 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -32,8 +32,8 @@ * The following internal entries have a special meaning: * * 0-62: Sibling entries - * 256: Zero entry - * 257: Retry entry + * 256: Retry entry + * 257: Zero entry * * Errors are also represented as internal entries, but use the negative * space (-4094 to -2). They're never stored in the slots array; only -- cgit v1.2.3 From f93d6b21a93ceb02140eafd84e4fd77f5d00180a Mon Sep 17 00:00:00 2001 From: Zvika Yehudai Date: Mon, 3 Feb 2020 10:08:23 +0200 Subject: ieee80211: fix 'the' doubling in comments Remove redundant 'the' where 'the the' was written. Signed-off-by: Zvika Yehudai Link: https://lore.kernel.org/r/20200203080823.24949-1-zvikayeh@gmail.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c4409b4c012..095a7108c394 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2062,7 +2062,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size - * @he_oper_ie: byte data of the He Operations IE, stating from the the byte + * @he_oper_ie: byte data of the He Operations IE, stating from the byte * after the ext ID byte. It is assumed that he_oper_ie has at least * sizeof(struct ieee80211_he_operation) bytes, the caller must have * validated this. @@ -2100,7 +2100,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) /* * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size - * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the the byte + * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the byte * after the ext ID byte. It is assumed that he_spr_ie has at least * sizeof(struct ieee80211_he_spr) bytes, the caller must have validated * this @@ -2743,7 +2743,7 @@ enum ieee80211_tdls_actioncode { */ #define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT BIT(6) -/* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */ +/* TDLS capabilities in the 4th byte of @WLAN_EID_EXT_CAPABILITY */ #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) #define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) #define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) -- cgit v1.2.3 From c0058df73309906ef4d5383fbaa10c43ebddc48a Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 31 Jan 2020 13:12:57 +0200 Subject: mac80211: parse also the RSNXE IE Parse also the RSN Extension IE when parsing the rest of the IEs. It will be used in a later patch. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200131111300.891737-21-luca@coelho.fi Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 095a7108c394..6f3e7c5c600a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2532,6 +2532,7 @@ enum ieee80211_eid { WLAN_EID_FILS_INDICATION = 240, WLAN_EID_DILS = 241, WLAN_EID_FRAGMENT = 242, + WLAN_EID_RSNX = 244, WLAN_EID_EXTENSION = 255 }; @@ -3421,4 +3422,11 @@ static inline bool for_each_element_completed(const struct element *element, return (const u8 *)element == (const u8 *)data + datalen; } +/** + * RSNX Capabilities: + * bits 0-3: Field length (n-1) + */ +#define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) +#define WLAN_RSNX_CAPA_SAE_H2E BIT(5) + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 046e14afb3561523efd0047c35c20793ae5f8848 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2020 15:43:36 +0200 Subject: gpio: Avoid kernel.h inclusion where it's possible Inclusion of kernel.h increases the mess with the header dependencies. Avoid kernel.h inclusion where it's possible. Besides that, clean up a bit other inclusions inside GPIO subsystem headers. It includes: - removal pin control bits (forward declaration and header) from linux/gpio.h - removal of.h from asm-generic/gpio.h - use of explicit headers in gpio/consumer.h - add FIXME note with regard to gpio.h inclusion in of_gpio,h Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200205134336.20197-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- include/linux/gpio.h | 2 -- include/linux/gpio/consumer.h | 5 ++++- include/linux/of_gpio.h | 9 ++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 2157717c2136..008ad3ee56b7 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -102,11 +102,9 @@ void devm_gpio_free(struct device *dev, unsigned int gpio); #include #include #include -#include struct device; struct gpio_chip; -struct pinctrl_dev; static inline bool gpio_is_valid(int number) { diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index bf2d017dd7b7..0a72fccf60ff 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -2,9 +2,10 @@ #ifndef __LINUX_GPIO_CONSUMER_H #define __LINUX_GPIO_CONSUMER_H +#include #include +#include #include -#include struct device; @@ -189,6 +190,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, #else /* CONFIG_GPIOLIB */ +#include + static inline int gpiod_count(struct device *dev, const char *con_id) { return 0; diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 16967390a3fe..f821095218b0 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -11,9 +11,8 @@ #define __LINUX_OF_GPIO_H #include -#include -#include -#include +#include +#include /* FIXME: Shouldn't be here */ #include struct device_node; @@ -34,6 +33,8 @@ enum of_gpio_flags { #ifdef CONFIG_OF_GPIO +#include + /* * OF GPIO chip for memory mapped banks */ @@ -63,6 +64,8 @@ extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); #else /* CONFIG_OF_GPIO */ +#include + /* Drivers may not strictly depend on the GPIO support, so let them link. */ static inline int of_get_named_gpio_flags(struct device_node *np, const char *list_name, int index, enum of_gpio_flags *flags) -- cgit v1.2.3 From ec2daf6e33f9f9113ba085b6ff88592907b6f1ce Mon Sep 17 00:00:00 2001 From: Jon Flatley Date: Fri, 24 Jan 2020 15:18:32 -0800 Subject: platform: chrome: Add cros-usbpd-notify driver ChromiumOS uses ACPI device with HID "GOOG0003" for power delivery related events. The existing cros-usbpd-charger driver relies on these events without ever actually receiving them on ACPI platforms. This is because in the ChromeOS kernel trees, the GOOG0003 device is owned by an ACPI driver that offers firmware updates to USB-C chargers. Introduce a new platform driver under cros-ec, the ChromeOS embedded controller, that handles these PD events and dispatches them appropriately over a notifier chain to all drivers that use them. On platforms that don't have the ACPI device defined, the driver gets instantiated for ECs which support the EC_FEATURE_USB_PD feature bit, and the notification events will get delivered using the MKBP event handling mechanism. Co-Developed-by: Prashant Malani Reviewed-by: Gwendal Grignou Reviewed-by: Benson Leung Signed-off-by: Jon Flatley Signed-off-by: Prashant Malani Acked-By: Enric Balletbo i Serra Signed-off-by: Benson Leung --- include/linux/platform_data/cros_usbpd_notify.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/platform_data/cros_usbpd_notify.h (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_usbpd_notify.h b/include/linux/platform_data/cros_usbpd_notify.h new file mode 100644 index 000000000000..4f2791722b6d --- /dev/null +++ b/include/linux/platform_data/cros_usbpd_notify.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ChromeOS EC Power Delivery Notifier Driver + * + * Copyright 2020 Google LLC + */ + +#ifndef __LINUX_PLATFORM_DATA_CROS_USBPD_NOTIFY_H +#define __LINUX_PLATFORM_DATA_CROS_USBPD_NOTIFY_H + +#include + +int cros_usbpd_register_notify(struct notifier_block *nb); + +void cros_usbpd_unregister_notify(struct notifier_block *nb); + +#endif /* __LINUX_PLATFORM_DATA_CROS_USBPD_NOTIFY_H */ -- cgit v1.2.3 From cc7eac1e4afdd151085be4d0341a155760388653 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 27 Jan 2020 17:37:42 +0900 Subject: usb: host: ehci-platform: add a quirk to avoid stuck Since EHCI/OHCI controllers on R-Car Gen3 SoCs are possible to be getting stuck very rarely after a full/low usb device was disconnected. To detect/recover from such a situation, the controllers require a special way which poll the EHCI PORTSC register and changes the OHCI functional state. So, this patch adds a polling timer into the ehci-platform driver, and if the ehci driver detects the issue by the EHCI PORTSC register, the ehci driver removes a companion device (= the OHCI controller) to change the OHCI functional state to USB Reset once. And then, the ehci driver adds the companion device again. Signed-off-by: Yoshihiro Shimoda Acked-by: Alan Stern Link: https://lore.kernel.org/r/1580114262-25029-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ehci_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index a15ce99dfc2d..78e006355557 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -151,7 +151,7 @@ struct ehci_regs { #define PORT_OWNER (1<<13) /* true: companion hc owns this port */ #define PORT_POWER (1<<12) /* true: has power (see PPC) */ #define PORT_USB11(x) (((x)&(3<<10)) == (1<<10)) /* USB 1.1 device */ -/* 11:10 for detecting lowspeed devices (reset vs release ownership) */ +#define PORT_LS_MASK (3<<10) /* Link status (SE0, K or J */ /* 9 reserved */ #define PORT_LPM (1<<9) /* LPM transaction */ #define PORT_RESET (1<<8) /* reset port */ -- cgit v1.2.3 From 469e1906a1b121ecb0c2ef43f28a98fd9d453831 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sat, 8 Feb 2020 20:44:07 +0200 Subject: platform: constify properties in platform_device Constify 'struct property_entry *properties' in platform_device. It is always passed around as a pointer const struct. Reviewed-by: Andy Shevchenko Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20200208184407.1294-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 276a03c24691..8e83c6ff140d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -89,7 +89,7 @@ struct platform_device_info { size_t size_data; u64 dma_mask; - struct property_entry *properties; + const struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo); -- cgit v1.2.3 From 901cff7cb96140a658a848a568b606ba764239bc Mon Sep 17 00:00:00 2001 From: Topi Miettinen Date: Thu, 23 Jan 2020 14:58:38 +0200 Subject: firmware_loader: load files from the mount namespace of init I have an experimental setup where almost every possible system service (even early startup ones) runs in separate namespace, using a dedicated, minimal file system. In process of minimizing the contents of the file systems with regards to modules and firmware files, I noticed that in my system, the firmware files are loaded from three different mount namespaces, those of systemd-udevd, init and systemd-networkd. The logic of the source namespace is not very clear, it seems to depend on the driver, but the namespace of the current process is used. So, this patch tries to make things a bit clearer and changes the loading of firmware files only from the mount namespace of init. This may also improve security, though I think that using firmware files as attack vector could be too impractical anyway. Later, it might make sense to make the mount namespace configurable, for example with a new file in /proc/sys/kernel/firmware_config/. That would allow a dedicated file system only for firmware files and those need not be present anywhere else. This configurability would make more sense if made also for kernel modules and /sbin/modprobe. Modules are already loaded from init namespace (usermodehelper uses kthreadd namespace) except when directly loaded by systemd-udevd. Instead of using the mount namespace of the current process to load firmware files, use the mount namespace of init process. Link: https://lore.kernel.org/lkml/bb46ebae-4746-90d9-ec5b-fce4c9328c86@gmail.com/ Link: https://lore.kernel.org/lkml/0e3f7653-c59d-9341-9db2-c88f5b988c68@gmail.com/ Signed-off-by: Topi Miettinen Link: https://lore.kernel.org/r/20200123125839.37168-1-toiwoton@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..f814ccd8d929 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3012,6 +3012,8 @@ extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t, enum kernel_read_file_id); +extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, loff_t, + enum kernel_read_file_id); extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); -- cgit v1.2.3 From 8673e944b50ec6e5afd4f599cf12b2798b629f3d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Feb 2020 11:48:28 +0200 Subject: platform/chrome: wilco_ec: Platform data shouldn't include kernel.h Replace with appropriate types.h. Also there is no need to include device.h, but mutex.h. For the pointers to unknown structures use forward declarations. In the *.c files we need to include all headers that provide APIs being used in the module. Signed-off-by: Andy Shevchenko Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/wilco-ec.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index afede15a95bf..25f46a939637 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -8,8 +8,8 @@ #ifndef WILCO_EC_H #define WILCO_EC_H -#include -#include +#include +#include /* Message flags for using the mailbox() interface */ #define WILCO_EC_FLAG_NO_RESPONSE BIT(0) /* EC does not respond */ @@ -17,6 +17,10 @@ /* Normal commands have a maximum 32 bytes of data */ #define EC_MAILBOX_DATA_SIZE 32 +struct device; +struct resource; +struct platform_device; + /** * struct wilco_ec_device - Wilco Embedded Controller handle. * @dev: Device handle. -- cgit v1.2.3 From 42cd0ab476e2daffc23982c37822a78f9a53cdd5 Mon Sep 17 00:00:00 2001 From: Yicheng Li Date: Mon, 3 Feb 2020 14:53:56 -0800 Subject: platform/chrome: cros_ec: Query EC protocol version if EC transitions between RO/RW RO and RW of EC may have different EC protocol version. If EC transitions between RO and RW, but AP does not reboot (this is true for fingerprint microcontroller / cros_fp, but not true for main ec / cros_ec), the AP still uses the protocol version queried before transition, which can cause problems. In the case of fingerprint microcontroller, this causes AP to send the wrong version of EC_CMD_GET_NEXT_EVENT to RO in the interrupt handler, which in turn prevents RO to clear the interrupt line to AP, in an infinite loop. Once an EC_HOST_EVENT_INTERFACE_READY is received, we know that there might have been a transition between RO and RW, so re-query the protocol. Signed-off-by: Yicheng Li Tested-by: Marek Szyprowski Reviewed-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index ba5914770191..383243326676 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -125,6 +125,9 @@ struct cros_ec_command { * @host_event_wake_mask: Mask of host events that cause wake from suspend. * @last_event_time: exact time from the hard irq when we got notified of * a new event. + * @notifier_ready: The notifier_block to let the kernel re-query EC + * communication protocol when the EC sends + * EC_HOST_EVENT_INTERFACE_READY. * @ec: The platform_device used by the mfd driver to interface with the * main EC. * @pd: The platform_device used by the mfd driver to interface with the @@ -166,6 +169,7 @@ struct cros_ec_device { u32 host_event_wake_mask; u32 last_resume_result; ktime_t last_event_time; + struct notifier_block notifier_ready; /* The platform devices used by the mfd driver */ struct platform_device *ec; -- cgit v1.2.3 From ed31685c96e18f773ca11dd1a637974d62130673 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Feb 2020 15:31:30 +0200 Subject: console: Introduce ->exit() callback Some consoles might require special operations on unregistering. For instance, serial console, when registered in the kernel, keeps power on for entire time, until it gets unregistered. Example of use: ->setup(console): pm_runtime_get(...); ->exit(console): pm_runtime_put(...); For such cases to have a balance we would provide ->exit() callback. Link: http://lkml.kernel.org/r/20200203133130.11591-7-andriy.shevchenko@linux.intel.com To: linux-kernel@vger.kernel.org To: Steven Rostedt Signed-off-by: Andy Shevchenko Reviewed-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek --- include/linux/console.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index d09951d5a94e..0cbd2a36aa00 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -149,6 +149,7 @@ struct console { struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); int (*setup)(struct console *, char *); + int (*exit)(struct console *); int (*match)(struct console *, char *name, int idx, char *options); short flags; short index; -- cgit v1.2.3 From 1751060e2527462714359573a39dca10451ffbf8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2019 20:01:26 +0100 Subject: locking/percpu-rwsem, lockdep: Make percpu-rwsem use its own lockdep_map As preparation for replacing the embedded rwsem, give percpu-rwsem its own lockdep_map. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Davidlohr Bueso Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Juri Lelli Link: https://lkml.kernel.org/r/20200131151539.927625541@infradead.org --- include/linux/percpu-rwsem.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index ad2ca2a89d5b..f2c36fb5e661 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -15,8 +15,17 @@ struct percpu_rw_semaphore { struct rw_semaphore rw_sem; /* slowpath */ struct rcuwait writer; /* blocked writer */ int readers_block; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }, +#else +#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) +#endif + #define __DEFINE_PERCPU_RWSEM(name, is_static) \ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ is_static struct percpu_rw_semaphore name = { \ @@ -24,7 +33,9 @@ is_static struct percpu_rw_semaphore name = { \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ + __PERCPU_RWSEM_DEP_MAP_INIT(name) \ } + #define DEFINE_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, /* not static */) #define DEFINE_STATIC_PERCPU_RWSEM(name) \ @@ -37,7 +48,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) { might_sleep(); - rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); + rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); preempt_disable(); /* @@ -76,13 +87,15 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) */ if (ret) - rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_); + rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); return ret; } static inline void percpu_up_read(struct percpu_rw_semaphore *sem) { + rwsem_release(&sem->dep_map, _RET_IP_); + preempt_disable(); /* * Same as in percpu_down_read(). @@ -92,8 +105,6 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) else __percpu_up_read(sem); /* Unconditional memory barrier */ preempt_enable(); - - rwsem_release(&sem->rw_sem.dep_map, _RET_IP_); } extern void percpu_down_write(struct percpu_rw_semaphore *); @@ -110,15 +121,13 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) -#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem) - -#define percpu_rwsem_assert_held(sem) \ - lockdep_assert_held(&(sem)->rw_sem) +#define percpu_rwsem_is_held(sem) lockdep_is_held(sem) +#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_release(&sem->rw_sem.dep_map, ip); + lock_release(&sem->dep_map, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); @@ -128,7 +137,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); + lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) atomic_long_set(&sem->rw_sem.owner, (long)current); -- cgit v1.2.3 From 206c98ffbeda588dbbd9d272505c42acbc364a30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2019 20:12:37 +0100 Subject: locking/percpu-rwsem: Convert to bool Use bool where possible. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Davidlohr Bueso Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Juri Lelli Link: https://lkml.kernel.org/r/20200131151539.984626569@infradead.org --- include/linux/percpu-rwsem.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index f2c36fb5e661..4ceaa1921951 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -41,7 +41,7 @@ is_static struct percpu_rw_semaphore name = { \ #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern int __percpu_down_read(struct percpu_rw_semaphore *, int); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); extern void __percpu_up_read(struct percpu_rw_semaphore *); static inline void percpu_down_read(struct percpu_rw_semaphore *sem) @@ -69,9 +69,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } -static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) +static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { - int ret = 1; + bool ret = true; preempt_disable(); /* -- cgit v1.2.3 From 71365d40232110f7b029befc9033ea311d680611 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2019 20:17:51 +0100 Subject: locking/percpu-rwsem: Move __this_cpu_inc() into the slowpath As preparation to rework __percpu_down_read() move the __this_cpu_inc() into it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Davidlohr Bueso Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Juri Lelli Link: https://lkml.kernel.org/r/20200131151540.041600199@infradead.org --- include/linux/percpu-rwsem.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 4ceaa1921951..bb5b71c1e446 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -59,8 +59,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ - __this_cpu_inc(*sem->read_count); - if (unlikely(!rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) + __this_cpu_inc(*sem->read_count); + else __percpu_down_read(sem, false); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from @@ -77,8 +78,9 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) /* * Same as in percpu_down_read(). */ - __this_cpu_inc(*sem->read_count); - if (unlikely(!rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) + __this_cpu_inc(*sem->read_count); + else ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ preempt_enable(); /* -- cgit v1.2.3 From 7f26482a872c36b2ee87ea95b9dcd96e3d5805df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2019 20:30:41 +0100 Subject: locking/percpu-rwsem: Remove the embedded rwsem The filesystem freezer uses percpu-rwsem in a way that is effectively write_non_owner() and achieves this with a few horrible hacks that rely on the rwsem (!percpu) implementation. When PREEMPT_RT replaces the rwsem implementation with a PI aware variant this comes apart. Remove the embedded rwsem and implement it using a waitqueue and an atomic_t. - make readers_block an atomic, and use it, with the waitqueue for a blocking test-and-set write-side. - have the read-side wait for the 'lock' state to clear. Have the waiters use FIFO queueing and mark them (reader/writer) with a new WQ_FLAG. Use a custom wake_function to wake either a single writer or all readers until a writer. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Davidlohr Bueso Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Juri Lelli Link: https://lkml.kernel.org/r/20200204092403.GB14879@hirez.programming.kicks-ass.net --- include/linux/percpu-rwsem.h | 19 ++++++------------- include/linux/wait.h | 1 + 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index bb5b71c1e446..f5ecf6a8a1dd 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -3,18 +3,18 @@ #define _LINUX_PERCPU_RWSEM_H #include -#include #include #include +#include #include #include struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; /* slowpath */ - struct rcuwait writer; /* blocked writer */ - int readers_block; + struct rcuwait writer; + wait_queue_head_t waiters; + atomic_t block; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif @@ -31,8 +31,9 @@ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ is_static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss), \ .read_count = &__percpu_rwsem_rc_##name, \ - .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ .writer = __RCUWAIT_INITIALIZER(name.writer), \ + .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \ + .block = ATOMIC_INIT(0), \ __PERCPU_RWSEM_DEP_MAP_INIT(name) \ } @@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { lock_release(&sem->dep_map, ip); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); -#endif } static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, (long)current); -#endif } #endif diff --git a/include/linux/wait.h b/include/linux/wait.h index 3283c8d02137..feeb6be5cad6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -20,6 +20,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 #define WQ_FLAG_BOOKMARK 0x04 +#define WQ_FLAG_CUSTOM 0x08 /* * A single wait-queue entry structure: -- cgit v1.2.3 From bcba67cd806800fa8e973ac49dbc7d2d8fb3e55e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Feb 2020 09:34:37 +0100 Subject: locking/rwsem: Remove RWSEM_OWNER_UNKNOWN Remove the now unused RWSEM_OWNER_UNKNOWN hack. This hack breaks PREEMPT_RT and getting rid of it was the entire motivation for re-writing the percpu rwsem. The biggest problem is that it is fundamentally incompatible with any form of Priority Inheritance, any exclusively held lock must have a distinct owner. Requested-by: Christoph Hellwig Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Davidlohr Bueso Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Juri Lelli Link: https://lkml.kernel.org/r/20200204092228.GP14946@hirez.programming.kicks-ass.net --- include/linux/rwsem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 00d6054687dd..8a418d9eeb7a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -53,12 +53,6 @@ struct rw_semaphore { #endif }; -/* - * Setting all bits of the owner field except bit 0 will indicate - * that the rwsem is writer-owned with an unknown owner. - */ -#define RWSEM_OWNER_UNKNOWN (-2L) - /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { -- cgit v1.2.3 From ac8dec420970f5cbaf2f6eda39153a60ec5b257b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 18 Nov 2019 15:19:35 -0800 Subject: locking/percpu-rwsem: Fold __percpu_up_read() Now that __percpu_up_read() is only ever used from percpu_up_read() merge them, it's a small function. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Will Deacon Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20200131151540.212415454@infradead.org --- include/linux/percpu-rwsem.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index f5ecf6a8a1dd..5e033fe1ff4e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -43,7 +43,6 @@ is_static struct percpu_rw_semaphore name = { \ __DEFINE_PERCPU_RWSEM(name, static) extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); -extern void __percpu_up_read(struct percpu_rw_semaphore *); static inline void percpu_down_read(struct percpu_rw_semaphore *sem) { @@ -103,10 +102,22 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) /* * Same as in percpu_down_read(). */ - if (likely(rcu_sync_is_idle(&sem->rss))) + if (likely(rcu_sync_is_idle(&sem->rss))) { __this_cpu_dec(*sem->read_count); - else - __percpu_up_read(sem); /* Unconditional memory barrier */ + } else { + /* + * slowpath; reader will only ever wake a single blocked + * writer. + */ + smp_mb(); /* B matches C */ + /* + * In other words, if they see our decrement (presumably to + * aggregate zero, as that is the only time it matters) they + * will also see our critical section. + */ + __this_cpu_dec(*sem->read_count); + rcuwait_wake_up(&sem->writer); + } preempt_enable(); } -- cgit v1.2.3 From bbfd5e4fab63703375eafaf241a0c696024a59e1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 27 Jan 2020 08:53:54 -0800 Subject: perf/core: Add new branch sample type for HW index of raw branch records The low level index is the index in the underlying hardware buffer of the most recently captured taken branch which is always saved in branch_entries[0]. It is very useful for reconstructing the call stack. For example, in Intel LBR call stack mode, the depth of reconstructed LBR call stack limits to the number of LBR registers. With the low level index information, perf tool may stitch the stacks of two samples. The reconstructed LBR call stack can break the HW limitation. Add a new branch sample type to retrieve low level index of raw branch records. The low level index is between -1 (unknown) and max depth which can be retrieved in /sys/devices/cpu/caps/branches. Only when the new branch sample type is set, the low level index information is dumped into the PERF_SAMPLE_BRANCH_STACK output. Perf tool should check the attr.branch_sample_type, and apply the corresponding format for PERF_SAMPLE_BRANCH_STACK samples. Otherwise, some user case may be broken. For example, users may parse a perf.data, which include the new branch sample type, with an old version perf tool (without the check). Users probably get incorrect information without any warning. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200127165355.27495-2-kan.liang@linux.intel.com --- include/linux/perf_event.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..68e21e828893 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -93,14 +93,26 @@ struct perf_raw_record { /* * branch stack layout: * nr: number of taken branches stored in entries[] + * hw_idx: The low level index of raw branch records + * for the most recent branch. + * -1ULL means invalid/unknown. * * Note that nr can vary from sample to sample * branches (to, from) are stored from most recent * to least recent, i.e., entries[0] contains the most * recent branch. + * The entries[] is an abstraction of raw branch records, + * which may not be stored in age order in HW, e.g. Intel LBR. + * The hw_idx is to expose the low level index of raw + * branch record for the most recent branch aka entries[0]. + * The hw_idx index is between -1 (unknown) and max depth, + * which can be retrieved in /sys/devices/cpu/caps/branches. + * For the architectures whose raw branch records are + * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { __u64 nr; + __u64 hw_idx; struct perf_branch_entry entries[0]; }; -- cgit v1.2.3 From 3f2e4c11e136e2cffd60dbc840b59ff65f017328 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 17 Dec 2019 18:48:55 +0100 Subject: kfifo: provide noirqsave variants of spinlocked in and out helpers Provide variants of spinlocked kfifo_in() and kfifo_out() routines which don't disable interrupts. Signed-off-by: Bartosz Golaszewski Acked-by: Stefani Seibold --- include/linux/kfifo.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index fc4b0b10210f..123c200ed7cb 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -517,6 +517,26 @@ __kfifo_uint_must_check_helper( \ __ret; \ }) +/** + * kfifo_in_spinlocked_noirqsave - put data into fifo using a spinlock for + * locking, don't disable interrupts + * @fifo: address of the fifo to be used + * @buf: the data to be added + * @n: number of elements to be added + * @lock: pointer to the spinlock to use for locking + * + * This is a variant of kfifo_in_spinlocked() but uses spin_lock/unlock() + * for locking and doesn't disable interrupts. + */ +#define kfifo_in_spinlocked_noirqsave(fifo, buf, n, lock) \ +({ \ + unsigned int __ret; \ + spin_lock(lock); \ + __ret = kfifo_in(fifo, buf, n); \ + spin_unlock(lock); \ + __ret; \ +}) + /* alias for kfifo_in_spinlocked, will be removed in a future release */ #define kfifo_in_locked(fifo, buf, n, lock) \ kfifo_in_spinlocked(fifo, buf, n, lock) @@ -569,6 +589,28 @@ __kfifo_uint_must_check_helper( \ }) \ ) +/** + * kfifo_out_spinlocked_noirqsave - get data from the fifo using a spinlock + * for locking, don't disable interrupts + * @fifo: address of the fifo to be used + * @buf: pointer to the storage buffer + * @n: max. number of elements to get + * @lock: pointer to the spinlock to use for locking + * + * This is a variant of kfifo_out_spinlocked() which uses spin_lock/unlock() + * for locking and doesn't disable interrupts. + */ +#define kfifo_out_spinlocked_noirqsave(fifo, buf, n, lock) \ +__kfifo_uint_must_check_helper( \ +({ \ + unsigned int __ret; \ + spin_lock(lock); \ + __ret = kfifo_out(fifo, buf, n); \ + spin_unlock(lock); \ + __ret; \ +}) \ +) + /* alias for kfifo_out_spinlocked, will be removed in a future release */ #define kfifo_out_locked(fifo, buf, n, lock) \ kfifo_out_spinlocked(fifo, buf, n, lock) -- cgit v1.2.3 From 5195a89e8583bba43ec13871a7226763e401b44e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 17 Dec 2019 11:30:59 +0100 Subject: kfifo: provide kfifo_is_empty_spinlocked() Provide two spinlocked versions of kfifo_is_empty() to be used with spinlocked variants of kfifo_in() and kfifo_out(). Signed-off-by: Bartosz Golaszewski Acked-by: Stefani Seibold --- include/linux/kfifo.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 123c200ed7cb..86249476b57f 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -246,6 +246,37 @@ __kfifo_int_must_check_helper(int val) __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) +/** + * kfifo_is_empty_spinlocked - returns true if the fifo is empty using + * a spinlock for locking + * @fifo: address of the fifo to be used + * @lock: spinlock to be used for locking + */ +#define kfifo_is_empty_spinlocked(fifo, lock) \ +({ \ + unsigned long __flags; \ + bool __ret; \ + spin_lock_irqsave(lock, __flags); \ + __ret = kfifo_is_empty(fifo); \ + spin_unlock_irqrestore(lock, __flags); \ + __ret; \ +}) + +/** + * kfifo_is_empty_spinlocked_noirqsave - returns true if the fifo is empty + * using a spinlock for locking, doesn't disable interrupts + * @fifo: address of the fifo to be used + * @lock: spinlock to be used for locking + */ +#define kfifo_is_empty_spinlocked_noirqsave(fifo, lock) \ +({ \ + bool __ret; \ + spin_lock(lock); \ + __ret = kfifo_is_empty(fifo); \ + spin_unlock(lock); \ + __ret; \ +}) + /** * kfifo_is_full - returns true if the fifo is full * @fifo: address of the fifo to be used -- cgit v1.2.3 From 684cb4b79e594f786b942a3ac5097ecf0e9e3e08 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 11 Feb 2020 14:25:29 +0300 Subject: usb: typec: Fix the description of struct typec_capability Removing descriptions of the mux and sw members. They are no longer part of the structure. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200211112531.86510-5-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index c358b3fd05c9..44d28387ced4 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -198,8 +198,6 @@ struct typec_operations { * @pd_revision: USB Power Delivery Specification revision if supported * @prefer_role: Initial role preference (DRP ports). * @accessory: Supported Accessory Modes - * @sw: Cable plug orientation switch - * @mux: Multiplexer switch for Alternate/Accessory Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info * @ops: Port operations vector -- cgit v1.2.3 From bbe80c9a89b868e98ef0710cb03ee68dd78a4d8d Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 11 Feb 2020 14:25:30 +0300 Subject: usb: typec: altmode: Remove the notification chain Using the generic notification chain is not reasonable with the alternate modes because it would require dependencies between the drivers of the components that need the notifications, and the typec drivers. There are no users for the alternate mode notifications, so removing the chain and the API for it completely. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200211112531.86510-6-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_altmode.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 923ff3af0628..d834e236c6df 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -126,13 +126,6 @@ void typec_altmode_put_plug(struct typec_altmode *plug); struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes, size_t n, u16 svid, u8 mode); -struct typec_altmode * -typec_altmode_register_notifier(struct device *dev, u16 svid, u8 mode, - struct notifier_block *nb); - -void typec_altmode_unregister_notifier(struct typec_altmode *adev, - struct notifier_block *nb); - /** * typec_altmode_get_orientation - Get cable plug orientation * altmode: Handle to the alternate mode -- cgit v1.2.3 From c9d503370f240934f3c1c5da4c6c2452a7d05db2 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 16 Oct 2019 11:39:29 +0200 Subject: USB: Make it possible to "subclass" usb_device_driver The kernel currenly has only 2 usb_device_drivers, one generic one, one that completely replaces the generic one to make USB devices usable over a network. Use the newly exported generic driver functions when a driver declares to want them run, in addition to its own code. This makes it possible to write drivers that extend the generic USB driver. Note that this patch is not enough for another driver to automatically get selected. Signed-off-by: Bastien Nocera Acked-by: Alan Stern Link: https://lore.kernel.org/r/20191016093933.693-3-hadess@hadess.net Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e656e7b4b1e4..94bd3b48a485 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1228,6 +1228,9 @@ struct usb_driver { * @drvwrap: Driver-model core structure wrapper. * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend * for devices bound to this driver. + * @generic_subclass: if set to 1, the generic USB driver's probe, disconnect, + * resume and suspend functions will be called in addition to the driver's + * own, so this part of the setup does not need to be replicated. * * USB drivers must provide all the fields listed above except drvwrap. */ @@ -1242,6 +1245,7 @@ struct usb_device_driver { const struct attribute_group **dev_groups; struct usbdrv_wrap drvwrap; unsigned int supports_autosuspend:1; + unsigned int generic_subclass:1; }; #define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ drvwrap.driver) -- cgit v1.2.3 From 88b7381a939de0fa1f1b1629c56b03dca7077309 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 16 Oct 2019 11:39:31 +0200 Subject: USB: Select better matching USB drivers when available Now that USB device drivers can reuse code from the generic USB device driver, we need to make sure that they get selected rather than the generic driver. Add an id_table and match vfunc to the usb_device_driver struct, which will get used to select a better matching driver at ->probe time. This is a similar mechanism to that used in the HID drivers, with the generic driver being selected unless there's a better matching one found in the registered drivers (see hid_generic_match() in drivers/hid/hid-generic.c). Signed-off-by: Bastien Nocera Acked-by: Alan Stern Link: https://lore.kernel.org/r/20191016093933.693-5-hadess@hadess.net Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 94bd3b48a485..3663bd7b1fa4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1237,6 +1237,7 @@ struct usb_driver { struct usb_device_driver { const char *name; + bool (*match) (struct usb_device *udev); int (*probe) (struct usb_device *udev); void (*disconnect) (struct usb_device *udev); @@ -1244,6 +1245,7 @@ struct usb_device_driver { int (*resume) (struct usb_device *udev, pm_message_t message); const struct attribute_group **dev_groups; struct usbdrv_wrap drvwrap; + const struct usb_device_id *id_table; unsigned int supports_autosuspend:1; unsigned int generic_subclass:1; }; -- cgit v1.2.3 From 77419aa403ca1395f66e1e3de87743f54ba144b6 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 16 Oct 2019 11:39:32 +0200 Subject: USB: Fallback to generic driver when specific driver fails If ->probe fails for a device specific driver, ask the driver core to reprobe us, after having flagged the device for the generic driver to be forced. Signed-off-by: Bastien Nocera Acked-by: Alan Stern Link: https://lore.kernel.org/r/20191016093933.693-6-hadess@hadess.net Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 3663bd7b1fa4..ca1a5f1e1c5e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -708,6 +708,7 @@ struct usb_device { unsigned lpm_disable_count; u16 hub_delay; + unsigned use_generic_driver:1; }; #define to_usb_device(d) container_of(d, struct usb_device, dev) -- cgit v1.2.3 From f43caa2adc96fc9c95fd77eef63cdff86ebf33cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Fri, 24 Jan 2020 12:40:16 +0100 Subject: cgroup: Clean up css_set task traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit css_task_iter stores pointer to head of each iterable list, this dates back to commit 0f0a2b4fa621 ("cgroup: reorganize css_task_iter") when we did not store cur_cset. Let us utilize list heads directly in cur_cset and streamline css_task_iter_advance_css_set a bit. This is no intentional function change. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e75d2191226b..f1219b927817 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -58,9 +58,6 @@ struct css_task_iter { struct list_head *tcset_head; struct list_head *task_pos; - struct list_head *tasks_head; - struct list_head *mg_tasks_head; - struct list_head *dying_tasks_head; struct list_head *cur_tasks_head; struct css_set *cur_cset; -- cgit v1.2.3 From a49e4629b5edf1db856de05fbf1aae05502ef1af Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Fri, 24 Jan 2020 20:37:29 +0530 Subject: cpuset: Make cpuset hotplug synchronous Convert cpuset_hotplug_workfn() into synchronous call for cpu hotplug path. For memory hotplug path it still gets queued as a work item. Since cpuset_hotplug_workfn() can be made synchronous for cpu hotplug path, it is not required to wait for cpuset hotplug while thawing processes. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 04c20de66afc..cede4cb98b78 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -54,7 +54,6 @@ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); -extern void cpuset_wait_for_hotplug(void); extern void cpuset_read_lock(void); extern void cpuset_read_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); @@ -176,8 +175,6 @@ static inline void cpuset_update_active_cpus(void) partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_wait_for_hotplug(void) { } - static inline void cpuset_read_lock(void) { } static inline void cpuset_read_unlock(void) { } -- cgit v1.2.3 From ef2c41cf38a7559bbf91af42d5b6a4429db8fc68 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 5 Feb 2020 14:26:22 +0100 Subject: clone3: allow spawning processes into cgroups This adds support for creating a process in a different cgroup than its parent. Callers can limit and account processes and threads right from the moment they are spawned: - A service manager can directly spawn new services into dedicated cgroups. - A process can be directly created in a frozen cgroup and will be frozen as well. - The initial accounting jitter experienced by process supervisors and daemons is eliminated with this. - Threaded applications or even thread implementations can choose to create a specific cgroup layout where each thread is spawned directly into a dedicated cgroup. This feature is limited to the unified hierarchy. Callers need to pass a directory file descriptor for the target cgroup. The caller can choose to pass an O_PATH file descriptor. All usual migration restrictions apply, i.e. there can be no processes in inner nodes. In general, creating a process directly in a target cgroup adheres to all migration restrictions. One of the biggest advantages of this feature is that CLONE_INTO_GROUP does not need to grab the write side of the cgroup cgroup_threadgroup_rwsem. This global lock makes moving tasks/threads around super expensive. With clone3() this lock is avoided. Cc: Tejun Heo Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Johannes Weiner Cc: Li Zefan Cc: Peter Zijlstra Cc: cgroups@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 5 +++-- include/linux/cgroup.h | 20 ++++++++++++++------ include/linux/sched/task.h | 4 ++++ 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 63097cb243cb..68c391f451d1 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -628,8 +628,9 @@ struct cgroup_subsys { void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); void (*post_attach)(void); - int (*can_fork)(struct task_struct *task); - void (*cancel_fork)(struct task_struct *task); + int (*can_fork)(struct task_struct *task, + struct css_set *cset); + void (*cancel_fork)(struct task_struct *task, struct css_set *cset); void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); void (*release)(struct task_struct *task); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f1219b927817..4598e4da6b1b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,6 +27,8 @@ #include +struct kernel_clone_args; + #ifdef CONFIG_CGROUPS /* @@ -119,9 +121,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -extern int cgroup_can_fork(struct task_struct *p); -extern void cgroup_cancel_fork(struct task_struct *p); -extern void cgroup_post_fork(struct task_struct *p); +extern int cgroup_can_fork(struct task_struct *p, + struct kernel_clone_args *kargs); +extern void cgroup_cancel_fork(struct task_struct *p, + struct kernel_clone_args *kargs); +extern void cgroup_post_fork(struct task_struct *p, + struct kernel_clone_args *kargs); void cgroup_exit(struct task_struct *p); void cgroup_release(struct task_struct *p); void cgroup_free(struct task_struct *p); @@ -705,9 +710,12 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline int cgroup_can_fork(struct task_struct *p) { return 0; } -static inline void cgroup_cancel_fork(struct task_struct *p) {} -static inline void cgroup_post_fork(struct task_struct *p) {} +static inline int cgroup_can_fork(struct task_struct *p, + struct kernel_clone_args *kargs) { return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p, + struct kernel_clone_args *kargs) {} +static inline void cgroup_post_fork(struct task_struct *p, + struct kernel_clone_args *kargs) {} static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_release(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index f1879884238e..38359071236a 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -13,6 +13,7 @@ struct task_struct; struct rusage; union thread_union; +struct css_set; /* All the bits taken by the old clone syscall. */ #define CLONE_LEGACY_FLAGS 0xffffffffULL @@ -29,6 +30,9 @@ struct kernel_clone_args { pid_t *set_tid; /* Number of elements in *set_tid */ size_t set_tid_size; + int cgroup; + struct cgroup *cgrp; + struct css_set *cset; }; /* -- cgit v1.2.3 From 5a7ea52b6faec6f8877e49645a80349b2d970f0a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Feb 2020 23:58:17 +0100 Subject: PM: QoS: Drop pm_qos_update_request_timeout() The pm_qos_update_request_timeout() function is not called from anywhere, so drop it along with the work member in struct pm_qos_request needed by it. Also drop the useless pm_qos_update_request_timeout trace event that is only triggered by that function (so it never triggers at all) and update the trace events documentation accordingly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 19eafca5680e..4747bdb6bed2 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -8,7 +8,6 @@ #include #include #include -#include enum { PM_QOS_RESERVED = 0, @@ -43,7 +42,6 @@ enum pm_qos_flags_status { struct pm_qos_request { struct plist_node node; int pm_qos_class; - struct delayed_work work; /* for pm_qos_update_request_timeout */ }; struct pm_qos_flags_request { @@ -149,8 +147,6 @@ void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value); void pm_qos_update_request(struct pm_qos_request *req, s32 new_value); -void pm_qos_update_request_timeout(struct pm_qos_request *req, - s32 new_value, unsigned long timeout_us); void pm_qos_remove_request(struct pm_qos_request *req); int pm_qos_request(int pm_qos_class); -- cgit v1.2.3 From 87ad7356799622b69478076cd99df7a5024992cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Feb 2020 23:58:26 +0100 Subject: PM: QoS: Drop the PM_QOS_SUM QoS type The PM_QOS_SUM QoS type is not used, so drop it along with the code referring to it in pm_qos_get_value() and the related local variables in there. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 4747bdb6bed2..48bfb96a9360 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -53,7 +53,6 @@ enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ PM_QOS_MIN, /* return the smallest value */ - PM_QOS_SUM /* return the sum */ }; /* -- cgit v1.2.3 From 02c92a378940ba1a46b6b7ad277f1b07f8093dbc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:01:18 +0100 Subject: PM: QoS: Redefine struct pm_qos_request and drop struct pm_qos_object First, change the definition of struct pm_qos_request so that it contains a struct pm_qos_constraints pointer (called "qos") instead of a PM QoS class number (in preparation for dropping the PM QoS classes concept altogether going forward) and move its definition (along with the definition of struct pm_qos_flags_request that does not change) after the definition of struct pm_qos_constraints. Next, drop the definition of struct pm_qos_object and the null_pm_qos and cpu_dma_pm_qos variables of that type along with pm_qos_array[] holding pointers to them and change the code to refer to the pm_qos_constraints structure directly or to use the new qos pointer in struct pm_qos_request for that instead of going through pm_qos_array[] to access it. Also update kerneldoc comments that mention pm_qos_class to refer to PM_QOS_CPU_DMA_LATENCY directly instead. Finally, drop register_pm_qos_misc(), introduce cpu_latency_qos_miscdev (with the name field set to "cpu_dma_latency") to implement the CPU latency QoS interface in /dev/ and register it directly from pm_qos_power_init(). After these changes the notion of PM QoS classes remains only in the API (in the form of redundant function parameters that are ignored) and in the definitions of PM QoS trace events. While at it, some redundant local variables are dropped etc. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 48bfb96a9360..bef110aa80cc 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -39,16 +39,6 @@ enum pm_qos_flags_status { #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) -struct pm_qos_request { - struct plist_node node; - int pm_qos_class; -}; - -struct pm_qos_flags_request { - struct list_head node; - s32 flags; /* Do not change to 64 bit */ -}; - enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ @@ -69,6 +59,16 @@ struct pm_qos_constraints { struct blocking_notifier_head *notifiers; }; +struct pm_qos_request { + struct plist_node node; + struct pm_qos_constraints *qos; +}; + +struct pm_qos_flags_request { + struct list_head node; + s32 flags; /* Do not change to 64 bit */ +}; + struct pm_qos_flags { struct list_head list; s32 effective_flags; /* Do not change to 64 bit */ -- cgit v1.2.3 From 3a4a0042228a854d9b1073050620820b4a977e6e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:02:30 +0100 Subject: PM: QoS: Drop PM_QOS_CPU_DMA_LATENCY notifier chain Notice that pm_qos_remove_notifier() is not used at all and the only caller of pm_qos_add_notifier() is the cpuidle core, which only needs the PM_QOS_CPU_DMA_LATENCY notifier to invoke wake_up_all_idle_cpus() upon changes of the PM_QOS_CPU_DMA_LATENCY target value. First, to ensure that wake_up_all_idle_cpus() will be called whenever the PM_QOS_CPU_DMA_LATENCY target value changes, modify the pm_qos_add/update/remove_request() family of functions to check if the effective constraint for the PM_QOS_CPU_DMA_LATENCY has changed and call wake_up_all_idle_cpus() directly in that case. Next, drop the PM_QOS_CPU_DMA_LATENCY notifier from cpuidle as it is not necessary any more. Finally, drop both pm_qos_add_notifier() and pm_qos_remove_notifier(), as they have no callers now, along with cpu_dma_lat_notifier which is only used by them. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index bef110aa80cc..cb57e5918a25 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -149,8 +149,6 @@ void pm_qos_update_request(struct pm_qos_request *req, void pm_qos_remove_request(struct pm_qos_request *req); int pm_qos_request(int pm_qos_class); -int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier); -int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); -- cgit v1.2.3 From 2552d3520132a22834e0be85c51168a7a798608c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:04:31 +0100 Subject: PM: QoS: Rename things related to the CPU latency QoS First, rename PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE to PM_QOS_CPU_LATENCY_DEFAULT_VALUE and update all of the code referring to it accordingly. Next, rename cpu_dma_constraints to cpu_latency_constraints, move the definition of it closer to the functions referring to it and update all of them accordingly. [While at it, add a comment to mark the start of the code related to the CPU latency QoS.] Finally, rename the pm_qos_power_*() family of functions and pm_qos_power_fops to cpu_latency_qos_*() and cpu_latency_qos_fops, respectively, and update the definition of cpu_latency_qos_miscdev. [While at it, update the miscdev interface code start comment.] No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Greg Kroah-Hartman Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index cb57e5918a25..a3e0bfc6c470 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -28,7 +28,7 @@ enum pm_qos_flags_status { #define PM_QOS_LATENCY_ANY S32_MAX #define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC) -#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_CPU_LATENCY_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS -- cgit v1.2.3 From e033b6c175a32870a2f7cd3741c0d48c858c2d04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:07:01 +0100 Subject: PM: QoS: Adjust pm_qos_request() signature and reorder pm_qos.h Change the return type of pm_qos_request() to be the same as the one of pm_qos_read_value() called by it internally and stop exporting it to modules (because its only caller, cpuidle, is not modular). Also move the pm_qos_read_value() header away from the CPU latency QoS API function headers in pm_qos.h (because it technically does not belong to that API). No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index a3e0bfc6c470..3c4bee29ecda 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -137,20 +137,20 @@ static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req) return req->dev != NULL; } +s32 pm_qos_read_value(struct pm_qos_constraints *c); int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value); bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val); + void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value); void pm_qos_update_request(struct pm_qos_request *req, s32 new_value); void pm_qos_remove_request(struct pm_qos_request *req); - -int pm_qos_request(int pm_qos_class); +s32 pm_qos_request(int pm_qos_class); int pm_qos_request_active(struct pm_qos_request *req); -s32 pm_qos_read_value(struct pm_qos_constraints *c); #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); -- cgit v1.2.3 From fa048c59bf1ba13d8fe61890495577073a0ea919 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:07:42 +0100 Subject: PM: QoS: Add CPU latency QoS API wrappers Introduce (temporary) wrappers around pm_qos_request(), pm_qos_request_active() and pm_qos_add/update/remove_request() to provide replacements for them with function signatures that will be used in the final CPU latency QoS API, so that the users of it can be switched over to the new arrangement one by one before the API is finally set. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 3c4bee29ecda..63d39e66f95d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -152,6 +152,33 @@ void pm_qos_remove_request(struct pm_qos_request *req); s32 pm_qos_request(int pm_qos_class); int pm_qos_request_active(struct pm_qos_request *req); +static inline void cpu_latency_qos_add_request(struct pm_qos_request *req, + s32 value) +{ + pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, value); +} + +static inline void cpu_latency_qos_update_request(struct pm_qos_request *req, + s32 new_value) +{ + pm_qos_update_request(req, new_value); +} + +static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) +{ + pm_qos_remove_request(req); +} + +static inline bool cpu_latency_qos_request_active(struct pm_qos_request *req) +{ + return pm_qos_request_active(req); +} + +static inline s32 cpu_latency_qos_limit(void) +{ + return pm_qos_request(PM_QOS_CPU_DMA_LATENCY); +} + #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); -- cgit v1.2.3 From 256db7423c31c873abe6fb780513dd7b5705a510 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Thu, 13 Feb 2020 13:16:17 +0000 Subject: ieee80211: add WPA3 OWE AKM suite selector Add the definition for Opportunistic Wireless Encryption AKM selector. Signed-off-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20200213131608.10541-3-sergey.matyukevich.os@quantenna.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6f3e7c5c600a..33d907eec0b6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3044,6 +3044,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) +#define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From 67b06ba01857ed077e1a66bfa139156e7c68bab2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:35:04 +0100 Subject: PM: QoS: Drop PM_QOS_CPU_DMA_LATENCY and rename related functions Drop the PM QoS classes enum including PM_QOS_CPU_DMA_LATENCY, drop the wrappers around pm_qos_request(), pm_qos_request_active(), and pm_qos_add/update/remove_request() introduced previously, rename these functions, respectively, to cpu_latency_qos_limit(), cpu_latency_qos_request_active(), and cpu_latency_qos_add/update/remove_request(), and update their kerneldoc comments. [While at it, drop some useless comments from these functions.] No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 47 +++++------------------------------------------ 1 file changed, 5 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 63d39e66f95d..e0ca4d780457 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -9,14 +9,6 @@ #include #include -enum { - PM_QOS_RESERVED = 0, - PM_QOS_CPU_DMA_LATENCY, - - /* insert new class ID */ - PM_QOS_NUM_CLASSES, -}; - enum pm_qos_flags_status { PM_QOS_FLAGS_UNDEFINED = -1, PM_QOS_FLAGS_NONE, @@ -144,40 +136,11 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val); -void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, - s32 value); -void pm_qos_update_request(struct pm_qos_request *req, - s32 new_value); -void pm_qos_remove_request(struct pm_qos_request *req); -s32 pm_qos_request(int pm_qos_class); -int pm_qos_request_active(struct pm_qos_request *req); - -static inline void cpu_latency_qos_add_request(struct pm_qos_request *req, - s32 value) -{ - pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, value); -} - -static inline void cpu_latency_qos_update_request(struct pm_qos_request *req, - s32 new_value) -{ - pm_qos_update_request(req, new_value); -} - -static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) -{ - pm_qos_remove_request(req); -} - -static inline bool cpu_latency_qos_request_active(struct pm_qos_request *req) -{ - return pm_qos_request_active(req); -} - -static inline s32 cpu_latency_qos_limit(void) -{ - return pm_qos_request(PM_QOS_CPU_DMA_LATENCY); -} +s32 cpu_latency_qos_limit(void); +bool cpu_latency_qos_request_active(struct pm_qos_request *req); +void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value); +void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value); +void cpu_latency_qos_remove_request(struct pm_qos_request *req); #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); -- cgit v1.2.3 From fe52de36dc5d60960230cabec88c357113d9c32a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:35:39 +0100 Subject: PM: QoS: Update file information comments Update the file information comments in include/linux/pm_qos.h and kernel/power/qos.c by adding titles along with copyright and authors information to them and changing the qos.c description to better reflect its contents (outdated information is dropped from it in particular). Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index e0ca4d780457..df065db3f57a 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -1,10 +1,17 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PM_QOS_H -#define _LINUX_PM_QOS_H -/* interface for the pm_qos_power infrastructure of the linux kernel. +/* + * Definitions related to Power Management Quality of Service (PM QoS). + * + * Copyright (C) 2020 Intel Corporation * - * Mark Gross + * Authors: + * Mark Gross + * Rafael J. Wysocki */ + +#ifndef _LINUX_PM_QOS_H +#define _LINUX_PM_QOS_H + #include #include #include -- cgit v1.2.3 From 814d51f8889bc4afa75f647eeffd5ff0a5620e8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:37:11 +0100 Subject: PM: QoS: Make CPU latency QoS depend on CONFIG_CPU_IDLE Because cpuidle is the only user of the effective constraint coming from the CPU latency QoS, add #ifdef CONFIG_CPU_IDLE around that code to avoid building it unnecessarily. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- include/linux/pm_qos.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index df065db3f57a..4a69d4af3ff8 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -143,11 +143,24 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val); +#ifdef CONFIG_CPU_IDLE s32 cpu_latency_qos_limit(void); bool cpu_latency_qos_request_active(struct pm_qos_request *req); void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value); void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value); void cpu_latency_qos_remove_request(struct pm_qos_request *req); +#else +static inline s32 cpu_latency_qos_limit(void) { return INT_MAX; } +static inline bool cpu_latency_qos_request_active(struct pm_qos_request *req) +{ + return false; +} +static inline void cpu_latency_qos_add_request(struct pm_qos_request *req, + s32 value) {} +static inline void cpu_latency_qos_update_request(struct pm_qos_request *req, + s32 new_value) {} +static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) {} +#endif #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); -- cgit v1.2.3 From e92a4eb490cb445fd644cc9a344db3f01b866d29 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 11 Feb 2020 19:15:14 +0100 Subject: drivers base/arch_topology: Remove 'struct sched_domain' forward declaration The sched domain pointer argument from topology_get_freq_scale() and topology_get_cpu_scale() got removed by commit 7673c8a4c75d ("sched/cpufreq: Remove arch_scale_freq_capacity()'s 'sd' parameter") and commit 8ec59c0f5f49 ("sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity()"). So the 'struct sched_domain' forward declaration is no longer needed. Remove it. Signed-off-by: Dietmar Eggemann Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200211181515.32570-2-dietmar.eggemann@arm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 3015ecbb90b1..f4b1d4fd7efb 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -16,7 +16,6 @@ bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); DECLARE_PER_CPU(unsigned long, cpu_scale); -struct sched_domain; static inline unsigned long topology_get_cpu_scale(int cpu) { -- cgit v1.2.3 From 99c73ce158a44a1a11cb146062bc32df0c1d95db Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 11 Feb 2020 19:15:15 +0100 Subject: drivers base/arch_topology: Reformat topology_get_[cpu/freq]_scale() function name The storage class and inline definition as well as the return type, function name and parameter list fit all into one line. Signed-off-by: Dietmar Eggemann Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200211181515.32570-3-dietmar.eggemann@arm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index f4b1d4fd7efb..c507e9ddd909 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -16,8 +16,7 @@ bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); DECLARE_PER_CPU(unsigned long, cpu_scale); -static inline -unsigned long topology_get_cpu_scale(int cpu) +static inline unsigned long topology_get_cpu_scale(int cpu) { return per_cpu(cpu_scale, cpu); } @@ -26,8 +25,7 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); DECLARE_PER_CPU(unsigned long, freq_scale); -static inline -unsigned long topology_get_freq_scale(int cpu) +static inline unsigned long topology_get_freq_scale(int cpu) { return per_cpu(freq_scale, cpu); } -- cgit v1.2.3 From fe98d0ff5d5c43ee179e801275bb37641d398c6e Mon Sep 17 00:00:00 2001 From: Jianxin Pan Date: Wed, 15 Jan 2020 19:30:28 +0800 Subject: firmware: meson_sm: Add secure power domain support The Amlogic Meson A1/C1 Secure Monitor implements calls to control power domain. Signed-off-by: Jianxin Pan Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/1579087831-94965-2-git-send-email-jianxin.pan@amlogic.com --- include/linux/firmware/meson/meson_sm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 6669e2a1d5fd..95b0da2326a9 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -12,6 +12,8 @@ enum { SM_EFUSE_WRITE, SM_EFUSE_USER_MAX, SM_GET_CHIP_ID, + SM_A1_PWRC_SET, + SM_A1_PWRC_GET, }; struct meson_sm_firmware; -- cgit v1.2.3 From c67f3df88ffca45531a12214e8faffbdab1fa422 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 13 Feb 2020 17:20:20 -0600 Subject: of: Drop struct of_pci_range.pci_space field There's no more users of struct of_pci_range.pci_space field, so remove it. Signed-off-by: Rob Herring --- include/linux/of_address.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index eac7ab109df4..8d12bf18e80b 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -16,7 +16,6 @@ struct of_pci_range_parser { }; struct of_pci_range { - u32 pci_space; u64 pci_addr; u64 cpu_addr; u64 size; -- cgit v1.2.3 From bc5e522ec47174770a75df0a76d90f9ebb20132e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Feb 2020 14:01:05 +0000 Subject: of/address: Rework of_pci_range parsing for non-PCI buses The only PCI specific part of of_pci_range_parser_one() is the handling of the 3rd address cell. Rework it to work on regular 1 and 2 cell addresses. Use defines and a union to avoid a treewide renaming of the parsing helpers and struct. Signed-off-by: Rob Herring --- include/linux/of_address.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 8d12bf18e80b..763022ed3456 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -10,20 +10,27 @@ struct of_pci_range_parser { struct device_node *node; const __be32 *range; const __be32 *end; - int np; + int na; + int ns; int pna; bool dma; }; +#define of_range_parser of_pci_range_parser struct of_pci_range { - u64 pci_addr; + union { + u64 pci_addr; + u64 bus_addr; + }; u64 cpu_addr; u64 size; u32 flags; }; +#define of_range of_pci_range #define for_each_of_pci_range(parser, range) \ for (; of_pci_range_parser_one(parser, range);) +#define for_each_of_range for_each_of_pci_range /* Translate a DMA address from device space to CPU space */ extern u64 of_translate_dma_address(struct device_node *dev, @@ -142,4 +149,3 @@ static inline int of_pci_range_to_resource(struct of_pci_range *range, #endif /* CONFIG_OF_ADDRESS && CONFIG_PCI */ #endif /* __OF_ADDRESS_H */ - -- cgit v1.2.3 From 8c79fa6c44deac8042bd747527fea06a32738158 Mon Sep 17 00:00:00 2001 From: Jungseung Lee Date: Mon, 13 Jan 2020 14:59:05 +0900 Subject: mtd: spi-nor: introduce SR_BP_SHIFT define The shift variable of SR_BP is conclusive because the first bit of SR_BP is fixed on all known flashes. Replace ffs operation with SR_BP_SHIFT. Signed-off-by: Jungseung Lee Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 5abd91cc6dfa..61be6ed33097 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -137,6 +137,8 @@ #define SR1_QUAD_EN_BIT6 BIT(6) +#define SR_BP_SHIFT 2 + /* Enhanced Volatile Configuration Register bits */ #define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */ -- cgit v1.2.3 From d71151a39c97d551378a441c089508b0bca48210 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 11 Feb 2020 12:52:49 +0800 Subject: ptp_qoriq: drop the code of alarm The alarm function hadn't been supported by PTP clock driver. The recommended solution PHC + phc2sys + nanosleep provides best performance. So drop the code of alarm in ptp_qoriq driver. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index b0b743563f43..75884563059f 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -149,8 +149,6 @@ struct ptp_qoriq { bool extts_fifo_support; int irq; int phc_index; - u64 alarm_interval; /* for periodic alarm */ - u64 alarm_value; u32 tclk_period; /* nanoseconds */ u32 tmr_prsc; u32 tmr_add; -- cgit v1.2.3 From 8062e2333f8f7dcd5627e22b99e18d1cbb53eedb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 23:57:36 +0000 Subject: net: linkmode: make linkmode_test_bit() take const pointer linkmode_test_bit() does not modify the address; test_bit() is also declared const volatile for the same reason. There's no need for linkmode_test_bit() to be any different, and allows implementation of helpers that take a const linkmode pointer. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/linkmode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index fe740031339d..8e5b352e44f2 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -71,7 +71,7 @@ static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) __change_bit(nr, addr); } -static inline int linkmode_test_bit(int nr, volatile unsigned long *addr) +static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) { return test_bit(nr, addr); } -- cgit v1.2.3 From a87ae8a963bde755b0962bcc18db83d611f63e7a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:27 +0000 Subject: net: add helpers to resolve negotiated flow control Add a couple of helpers to resolve negotiated flow control. Two helpers are provided: - linkmode_resolve_pause() which takes the link partner and local advertisements, and decodes whether we should enable TX or RX pause at the MAC. This is useful outside of phylib, e.g. in phylink. - phy_get_pause(), which returns the TX/RX enablement status for the current negotiation results of the PHY. This allows us to centralise the flow control resolution, rather than spreading it around. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 4 ++++ include/linux/phy.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 8e5b352e44f2..9ec210f31d06 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -88,4 +88,8 @@ static inline int linkmode_subset(const unsigned long *src1, return bitmap_subset(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +void linkmode_resolve_pause(const unsigned long *local_adv, + const unsigned long *partner_adv, + bool *tx_pause, bool *rx_pause); + #endif /* __LINKMODE_H */ diff --git a/include/linux/phy.h b/include/linux/phy.h index c570e162e05e..80f8b2158271 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1257,6 +1257,9 @@ void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp); +void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); +void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, + bool *tx_pause, bool *rx_pause); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 45c767faef151899ac1a5e14a59c0e0a5bdba27b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:32 +0000 Subject: net: add linkmode helper for setting flow control advertisement Add a linkmode helper to set the flow control advertisement in an ethtool linkmode mask according to the tx/rx capabilities. This implementation is moved from phylib, and documented with an analysis of its shortcomings. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 9ec210f31d06..c664c27a29a0 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -92,4 +92,6 @@ void linkmode_resolve_pause(const unsigned long *local_adv, const unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); +void linkmode_set_pause(unsigned long *advertisement, bool tx, bool rx); + #endif /* __LINKMODE_H */ -- cgit v1.2.3 From 4e5aeb4157c879a021e6d92373dc7e4684ebd8c0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:49:53 +0000 Subject: net: phylink: resolve fixed link flow control Resolve the fixed link flow control using the recently introduced linkmode_resolve_pause() helper, which we use in phylink_get_fixed_state() only when operating in full duplex mode. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phylink.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 523209e70947..0d6073c2b2b7 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -12,12 +12,10 @@ struct net_device; enum { MLO_PAUSE_NONE, - MLO_PAUSE_ASYM = BIT(0), - MLO_PAUSE_SYM = BIT(1), - MLO_PAUSE_RX = BIT(2), - MLO_PAUSE_TX = BIT(3), + MLO_PAUSE_RX = BIT(0), + MLO_PAUSE_TX = BIT(1), MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX, - MLO_PAUSE_AN = BIT(4), + MLO_PAUSE_AN = BIT(2), MLO_AN_PHY = 0, /* Conventional PHY */ MLO_AN_FIXED, /* Fixed-link mode */ -- cgit v1.2.3 From b70486f94bb4820e84491089da5e30d29e774b0d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 15 Feb 2020 15:50:09 +0000 Subject: net: phylink: clarify flow control settings in documentation Clarify the expected flow control settings operation in the phylink documentation for each negotiation mode. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phylink.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 0d6073c2b2b7..812357c03df4 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -152,13 +152,20 @@ void mac_pcs_get_state(struct phylink_config *config, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * In all negotiation modes, as defined by @mode, @state->pause indicates the + * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not + * set, %MLO_PAUSE_TX and %MLO_PAUSE_RX indicate whether the MAC should send + * pause frames and/or act on received pause frames respectively. Otherwise, + * the results of in-band negotiation/status from the MAC PCS should be used + * to control the MAC pause mode settings. + * * The action performed depends on the currently selected mode: * * %MLO_AN_FIXED, %MLO_AN_PHY: - * Configure the specified @state->speed, @state->duplex and - * @state->pause (%MLO_PAUSE_TX / %MLO_PAUSE_RX) modes over a link - * specified by @state->interface. @state->advertising may be used, - * but is not required. Other members of @state must be ignored. + * Configure the specified @state->speed and @state->duplex over a link + * specified by @state->interface. @state->advertising may be used, but + * is not required. Pause modes as above. Other members of @state must + * be ignored. * * Valid state members: interface, speed, duplex, pause, advertising. * @@ -170,11 +177,14 @@ void mac_pcs_get_state(struct phylink_config *config, * mac_pcs_get_state() callback. Changes in link state must be made * by calling phylink_mac_change(). * + * Interface mode specific details are mentioned below. + * * If in 802.3z mode, the link speed is fixed, dependent on the - * @state->interface. Duplex is negotiated, and pause is advertised - * according to @state->an_enabled, @state->pause and - * @state->advertising flags. Beware of MACs which only support full - * duplex at gigabit and higher speeds. + * @state->interface. Duplex and pause modes are negotiated via + * the in-band configuration word. Advertised pause modes are set + * according to the @state->an_enabled and @state->advertising + * flags. Beware of MACs which only support full duplex at gigabit + * and higher speeds. * * If in Cisco SGMII mode, the link speed and duplex mode are passed * in the serial bitstream 16-bit configuration word, and the MAC -- cgit v1.2.3 From 672ef0e5684048a00aeb923b10131275ea688543 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 23 Jan 2020 09:02:54 +0000 Subject: EDAC: Store error type in struct edac_raw_error_desc Store the error type in struct edac_raw_error_desc. This makes the type parameter of edac_raw_mc_handle_error() obsolete. [ kernel-doc typo ] Reported-by: kbuild test robot Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-4-rrichter@marvell.com --- include/linux/edac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index cc31b9742684..6703eb492cd2 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -442,6 +442,7 @@ struct errcount_attribute_data { * struct edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type + * @type: severity of the error (CE/UE/Fatal) * @top_layer: top layer of the error (layer[0]) * @mid_layer: middle layer of the error (layer[1]) * @low_layer: low layer of the error (layer[2]) @@ -462,6 +463,7 @@ struct edac_raw_error_desc { long grain; u16 error_count; + enum hw_event_mc_err_type type; int top_layer; int mid_layer; int low_layer; -- cgit v1.2.3 From 67792cf9583c7816667c6b90007b5840f1b471f4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 23 Jan 2020 09:03:02 +0000 Subject: EDAC/mc: Remove enable_per_layer_report function argument Many functions carry the enable_per_layer_report argument. This is a bool value indicating the error information contains some location data where the error occurred. This can easily being determined by checking the pos[] array for values. Negative values indicate there is no location available. So if the top layer is negative, the error location is unknown. Just check if the top layer is negative and remove enable_per_layer_report as function argument and also from struct edac_raw_error_desc. [ bp: Reflow comments to 80 columns, while at it. ] Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-8-rrichter@marvell.com --- include/linux/edac.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 6703eb492cd2..815f246e0abd 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -454,8 +454,6 @@ struct errcount_attribute_data { * @location: location of the error * @label: label of the affected DIMM(s) * @other_detail: other driver-specific detail about the error - * @enable_per_layer_report: if false, the error affects all layers - * (typically, a memory controller error) */ struct edac_raw_error_desc { char location[LOCATION_SIZE]; @@ -472,7 +470,6 @@ struct edac_raw_error_desc { unsigned long syndrome; const char *msg; const char *other_detail; - bool enable_per_layer_report; }; /* MEMORY controller information structure -- cgit v1.2.3 From 4aa92c86463273b673e4170c60cb78e2625781eb Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 17 Feb 2020 12:30:23 +0100 Subject: EDAC/mc: Remove per layer counters Looking at how mci->{ue,ce}_per_layer[EDAC_MAX_LAYERS] is used, it turns out that only the leaves in the memory hierarchy are consumed (in sysfs), but not the intermediate layers, e.g.: count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx]; These unused counters only add complexity, remove them. The error counter values are directly stored in struct dimm_info now. Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Acked-by: Aristeu Rozanski Link: https://lkml.kernel.org/r/20200123090210.26933-11-rrichter@marvell.com --- include/linux/edac.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 815f246e0abd..0f20b986b0ab 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -383,6 +383,9 @@ struct dimm_info { unsigned int csrow, cschannel; /* Points to the old API data */ u16 smbios_handle; /* Handle for SMBIOS type 17 */ + + u32 ce_count; + u32 ue_count; }; /** @@ -559,7 +562,6 @@ struct mem_ctl_info { */ u32 ce_noinfo_count, ue_noinfo_count; u32 ue_mc, ce_mc; - u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; struct completion complete; -- cgit v1.2.3 From 3bd142a46b561a12408e8db78cc6d62eb1c6b84e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:38:53 +0100 Subject: clocksource: Cleanup struct clocksource and documentation Reformat the struct definition, add missing member documentation. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124402.825471920@linutronix.de --- include/linux/clocksource.h | 87 ++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index b21db536fd52..2c4574b517d2 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -32,9 +32,19 @@ struct module; * Provides mostly state-free accessors to the underlying hardware. * This is the structure used for system time. * - * @name: ptr to clocksource name - * @list: list head for registration - * @rating: rating value for selection (higher is better) + * @read: Returns a cycle value, passes clocksource as argument + * @mask: Bitmask for two's complement + * subtraction of non 64 bit counters + * @mult: Cycle to nanosecond multiplier + * @shift: Cycle to nanosecond divisor (power of two) + * @max_idle_ns: Maximum idle time permitted by the clocksource (nsecs) + * @maxadj: Maximum adjustment value to mult (~11%) + * @archdata: Optional arch-specific data + * @max_cycles: Maximum safe cycle value which won't overflow on + * multiplication + * @name: Pointer to clocksource name + * @list: List head for registration (internal) + * @rating: Rating value for selection (higher is better) * To avoid rating inflation the following * list should give you a guide as to how * to assign your clocksource a rating @@ -49,27 +59,23 @@ struct module; * 400-499: Perfect * The ideal clocksource. A must-use where * available. - * @read: returns a cycle value, passes clocksource as argument - * @enable: optional function to enable the clocksource - * @disable: optional function to disable the clocksource - * @mask: bitmask for two's complement - * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier - * @shift: cycle to nanosecond divisor (power of two) - * @max_idle_ns: max idle time permitted by the clocksource (nsecs) - * @maxadj: maximum adjustment value to mult (~11%) - * @max_cycles: maximum safe cycle value which won't overflow on multiplication - * @flags: flags describing special properties - * @archdata: arch-specific data - * @suspend: suspend function for the clocksource, if necessary - * @resume: resume function for the clocksource, if necessary + * @flags: Flags describing special properties + * @enable: Optional function to enable the clocksource + * @disable: Optional function to disable the clocksource + * @suspend: Optional suspend function for the clocksource + * @resume: Optional resume function for the clocksource * @mark_unstable: Optional function to inform the clocksource driver that * the watchdog marked the clocksource unstable - * @owner: module reference, must be set by clocksource in modules + * @tick_stable: Optional function called periodically from the watchdog + * code to provide stable syncrhonization points + * @wd_list: List head to enqueue into the watchdog list (internal) + * @cs_last: Last clocksource value for clocksource watchdog + * @wd_last: Last watchdog value corresponding to @cs_last + * @owner: Module reference, must be set by clocksource in modules * * Note: This struct is not used in hotpathes of the timekeeping code * because the timekeeper caches the hot path fields in its own data - * structure, so no line cache alignment is required, + * structure, so no cache line alignment is required, * * The pointer to the clocksource itself is handed to the read * callback. If you need extra information there you can wrap struct @@ -78,35 +84,36 @@ struct module; * structure. */ struct clocksource { - u64 (*read)(struct clocksource *cs); - u64 mask; - u32 mult; - u32 shift; - u64 max_idle_ns; - u32 maxadj; + u64 (*read)(struct clocksource *cs); + u64 mask; + u32 mult; + u32 shift; + u64 max_idle_ns; + u32 maxadj; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif - u64 max_cycles; - const char *name; - struct list_head list; - int rating; - int (*enable)(struct clocksource *cs); - void (*disable)(struct clocksource *cs); - unsigned long flags; - void (*suspend)(struct clocksource *cs); - void (*resume)(struct clocksource *cs); - void (*mark_unstable)(struct clocksource *cs); - void (*tick_stable)(struct clocksource *cs); + u64 max_cycles; + const char *name; + struct list_head list; + int rating; + unsigned long flags; + + int (*enable)(struct clocksource *cs); + void (*disable)(struct clocksource *cs); + void (*suspend)(struct clocksource *cs); + void (*resume)(struct clocksource *cs); + void (*mark_unstable)(struct clocksource *cs); + void (*tick_stable)(struct clocksource *cs); /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ - struct list_head wd_list; - u64 cs_last; - u64 wd_last; + struct list_head wd_list; + u64 cs_last; + u64 wd_last; #endif - struct module *owner; + struct module *owner; }; /* -- cgit v1.2.3 From 5d51bee725cc1497352d6b0b604e42a90c680540 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:38:55 +0100 Subject: clocksource: Add common vdso clock mode storage All architectures which use the generic VDSO code have their own storage for the VDSO clock mode. That's pointless and just requires duplicate code. Provide generic storage for it. The new Kconfig symbol is intermediate and will be removed once all architectures are converted over. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.028046322@linutronix.de --- include/linux/clocksource.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 2c4574b517d2..6d5ed1b4d24d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -23,10 +23,19 @@ struct clocksource; struct module; -#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA +#if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \ + defined(CONFIG_GENERIC_VDSO_CLOCK_MODE) #include #endif +enum vdso_clock_mode { + VDSO_CLOCKMODE_NONE, +#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE + VDSO_ARCH_CLOCKMODES, +#endif + VDSO_CLOCKMODE_MAX, +}; + /** * struct clocksource - hardware abstraction for a free running counter * Provides mostly state-free accessors to the underlying hardware. @@ -97,6 +106,7 @@ struct clocksource { const char *name; struct list_head list; int rating; + enum vdso_clock_mode vdso_clock_mode; unsigned long flags; int (*enable)(struct clocksource *cs); -- cgit v1.2.3 From 10fa9512769fa3b15ea29f4f331f4604c17b4b2c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 13 Feb 2020 12:20:58 +0100 Subject: usb: audio-v2: Add uac2_effect_unit_descriptor definition The UAC2 Effect Unit Descriptor has a slightly different definition from other similar ones like Processing Unit or Extension Unit. Define it here so that it can be used in USB-audio driver in a later patch. Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200213112059.18745-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ba4b3e3327ff..cb9900b34b67 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -156,6 +156,18 @@ struct uac2_feature_unit_descriptor { __u8 bmaControls[0]; /* variable length */ } __attribute__((packed)); +/* 4.7.2.10 Effect Unit Descriptor */ + +struct uac2_effect_unit_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bUnitID; + __le16 wEffectType; + __u8 bSourceID; + __u8 bmaControls[]; /* variable length */ +} __attribute__((packed)); + /* 4.9.2 Class-Specific AS Interface Descriptor */ struct uac2_as_header_descriptor { -- cgit v1.2.3 From b2ca916ce392a9d4cea3489a3efb2b627b839eaf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 16 Feb 2020 12:00:48 -0800 Subject: ACPI: NUMA: Up-level "map to online node" functionality The acpi_map_pxm_to_online_node() helper is used to find the closest online node to a given proximity domain. This is used to map devices in a proximity domain with no online memory or cpus to the closest online node and populate a device's 'numa_node' property. The numa_node property allows applications to be migrated "close" to a resource. In preparation for providing a generic facility to optionally map an address range to its closest online node, or the node the range would represent were it to be onlined (target_node), up-level the core of acpi_map_pxm_to_online_node() to a generic mm/numa helper. Cc: Michal Hocko Acked-by: Rafael J. Wysocki Reviewed-by: Ingo Molnar Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/158188324802.894464.13128795207831894206.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/acpi.h | 23 ++++++++++++++++++++++- include/linux/numa.h | 9 +++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0f24d701fbdc..3839363081f3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -416,9 +416,30 @@ extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA -int acpi_map_pxm_to_online_node(int pxm); int acpi_map_pxm_to_node(int pxm); int acpi_get_node(acpi_handle handle); + +/** + * acpi_map_pxm_to_online_node - Map proximity ID to online node + * @pxm: ACPI proximity ID + * + * This is similar to acpi_map_pxm_to_node(), but always returns an online + * node. When the mapped node from a given proximity ID is offline, it + * looks up the node distance table and returns the nearest online node. + * + * ACPI device drivers, which are called after the NUMA initialization has + * completed in the kernel, can call this interface to obtain their device + * NUMA topology from ACPI tables. Such drivers do not have to deal with + * offline nodes. A node may be offline when a device proximity ID is + * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. + * "numa=off" on x86. + */ +static inline int acpi_map_pxm_to_online_node(int pxm) +{ + int node = acpi_map_pxm_to_node(pxm); + + return numa_map_to_online_node(node); +} #else static inline int acpi_map_pxm_to_online_node(int pxm) { diff --git a/include/linux/numa.h b/include/linux/numa.h index 110b0e5d0fb0..20f4e44b186c 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -13,4 +13,13 @@ #define NUMA_NO_NODE (-1) +#ifdef CONFIG_NUMA +int numa_map_to_online_node(int node); +#else +static inline int numa_map_to_online_node(int node) +{ + return NUMA_NO_NODE; +} +#endif + #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 1e5d8e1e47afde23e3249aed25d7d124feff5c1c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 16 Feb 2020 12:01:04 -0800 Subject: x86/mm: Introduce CONFIG_NUMA_KEEP_MEMINFO Currently x86 numa_meminfo is marked __initdata in the CONFIG_MEMORY_HOTPLUG=n case. In support of a new facility to allow drivers to map reserved memory to a 'target_node' (phys_to_target_node()), add support for removing the __initdata designation for those users. Both memory hotplug and phys_to_target_node() users select CONFIG_NUMA_KEEP_MEMINFO to tell the arch to maintain its physical address to NUMA mapping infrastructure post init. Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Cc: Andrew Morton Cc: David Hildenbrand Cc: Michal Hocko Reviewed-by: Ingo Molnar Signed-off-by: Dan Williams Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/158188326422.894464.15742054998046628934.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/numa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 20f4e44b186c..5773cd2613fc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -13,6 +13,13 @@ #define NUMA_NO_NODE (-1) +/* optionally keep NUMA memory info available post init */ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +#define __initdata_or_meminfo +#else +#define __initdata_or_meminfo __initdata +#endif + #ifdef CONFIG_NUMA int numa_map_to_online_node(int node); #else -- cgit v1.2.3 From f86fd32db706613fe8d0104057efa6e83e0d7e8f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:38:59 +0100 Subject: lib/vdso: Cleanup clock mode storage leftovers Now that all architectures are converted to use the generic storage the helpers and conditionals can be removed. Signed-off-by: Thomas Gleixner Tested-by: Vincenzo Frascino Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.470699892@linutronix.de --- include/linux/clocksource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6d5ed1b4d24d..7fefe0b21a14 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -24,13 +24,13 @@ struct clocksource; struct module; #if defined(CONFIG_ARCH_CLOCKSOURCE_DATA) || \ - defined(CONFIG_GENERIC_VDSO_CLOCK_MODE) + defined(CONFIG_GENERIC_GETTIMEOFDAY) #include #endif enum vdso_clock_mode { VDSO_CLOCKMODE_NONE, -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE +#ifdef CONFIG_GENERIC_GETTIMEOFDAY VDSO_ARCH_CLOCKMODES, #endif VDSO_CLOCKMODE_MAX, -- cgit v1.2.3 From 2d6b01bd88ccabba06d342ef80eaab6b39d12497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2020 13:39:01 +0100 Subject: lib/vdso: Move VCLOCK_TIMENS to vdso_clock_modes Move the time namespace indicator clock mode to the other ones for consistency sake. Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/20200207124403.656097274@linutronix.de --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7fefe0b21a14..02e3282719bd 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -34,6 +34,9 @@ enum vdso_clock_mode { VDSO_ARCH_CLOCKMODES, #endif VDSO_CLOCKMODE_MAX, + + /* Indicator for time namespace VDSO */ + VDSO_CLOCKMODE_TIMENS = INT_MAX }; /** -- cgit v1.2.3 From d8fab4815a371e8013e1a769c31da1bcaf618b01 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 14 Feb 2020 15:30:01 +0100 Subject: net/mlx5: fix spelling mistake "reserverd" -> "reserved" The reserved member should be named reserved. Signed-off-by: Alexandre Belloni Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc_fpga.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 37e065a80a43..07d77323f78a 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -608,7 +608,7 @@ struct mlx5_ifc_tls_cmd_bits { struct mlx5_ifc_tls_resp_bits { u8 syndrome[0x20]; u8 stream_id[0x20]; - u8 reserverd[0x40]; + u8 reserved[0x40]; }; #define MLX5_TLS_COMMAND_SIZE (0x100) -- cgit v1.2.3 From 5d30f92e7631286b8617777c5400c8eadcae50a1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 16 Feb 2020 12:01:09 -0800 Subject: x86/NUMA: Provide a range-to-target_node lookup facility The DEV_DAX_KMEM facility is a generic mechanism to allow device-dax instances, fronting performance-differentiated-memory like pmem, to be added to the System RAM pool. The NUMA node for that hot-added memory is derived from the device-dax instance's 'target_node' attribute. Recall that the 'target_node' is the ACPI-PXM-to-node translation for memory when it comes online whereas the 'numa_node' attribute of the device represents the closest online cpu node. Presently useful target_node information from the ACPI SRAT is discarded with the expectation that "Reserved" memory will never be onlined. Now, DEV_DAX_KMEM violates that assumption, there is a need to retain the translation. Move, rather than discard, numa_memblk data to a secondary array that memory_add_physaddr_to_target_node() may consider at a later point in time. Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Cc: Andrew Morton Cc: David Hildenbrand Cc: Michal Hocko Reported-by: kbuild test robot Reviewed-by: Ingo Molnar Signed-off-by: Dan Williams Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/158188326978.894464.217282995221175417.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/numa.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 5773cd2613fc..a42df804679e 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NUMA_H #define _LINUX_NUMA_H - +#include #ifdef CONFIG_NODES_SHIFT #define NODES_SHIFT CONFIG_NODES_SHIFT @@ -21,12 +21,24 @@ #endif #ifdef CONFIG_NUMA +/* Generic implementation available */ int numa_map_to_online_node(int node); + +/* + * Optional architecture specific implementation, users need a "depends + * on $ARCH" + */ +int phys_to_target_node(phys_addr_t addr); #else static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } + +static inline int phys_to_target_node(phys_addr_t addr) +{ + return NUMA_NO_NODE; +} #endif #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 0be298a939b748256035f66716fca409dd26d0dc Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Jan 2020 14:10:04 +0200 Subject: ARM: at91: pm: add pmc_version member to at91_pm_data This will be used to differentiate b/w different PLLs settings to be applied in the final/first steps of the suspend/resume process by doing PLL specific configurations. Signed-off-by: Claudiu Beznea Acked-by: Stephen Boyd Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1579522208-19523-5-git-send-email-claudiu.beznea@microchip.com --- include/linux/clk/at91_pmc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 390437887b46..f3d691fc5f29 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -12,6 +12,9 @@ #ifndef AT91_PMC_H #define AT91_PMC_H +#define AT91_PMC_V1 (1) /* PMC version 1 */ +#define AT91_PMC_V2 (2) /* PMC version 2 [SAM9X60] */ + #define AT91_PMC_SCER 0x00 /* System Clock Enable Register */ #define AT91_PMC_SCDR 0x04 /* System Clock Disable Register */ -- cgit v1.2.3 From e13208ab5d938e51e46ba44a1dec8073142c3d8c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 20 Jan 2020 14:10:06 +0200 Subject: clk: at91: move sam9x60's PLL register offsets to PMC header Move SAM9X60's PLL register offsets to PMC header so that the definitions would also be available from arch/arm/mach-at91/pm_suspend.S. This is necessary to disable/enable PLLA for SAM9X60 on suspend/resume. Signed-off-by: Claudiu Beznea Acked-by: Stephen Boyd Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/1579522208-19523-7-git-send-email-claudiu.beznea@microchip.com --- include/linux/clk/at91_pmc.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index f3d691fc5f29..49a53a137610 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -33,16 +33,34 @@ #define AT91_PMC_HCK0 (1 << 16) /* AHB Clock (USB host) [AT91SAM9261 only] */ #define AT91_PMC_HCK1 (1 << 17) /* AHB Clock (LCD) [AT91SAM9261 only] */ +#define AT91_PMC_PLL_CTRL0 0x0C /* PLL Control Register 0 [for SAM9X60] */ +#define AT91_PMC_PLL_CTRL0_ENPLL (1 << 28) /* Enable PLL */ +#define AT91_PMC_PLL_CTRL0_ENPLLCK (1 << 29) /* Enable PLL clock for PMC */ +#define AT91_PMC_PLL_CTRL0_ENLOCK (1 << 31) /* Enable PLL lock */ + +#define AT91_PMC_PLL_CTRL1 0x10 /* PLL Control Register 1 [for SAM9X60] */ + #define AT91_PMC_PCER 0x10 /* Peripheral Clock Enable Register */ #define AT91_PMC_PCDR 0x14 /* Peripheral Clock Disable Register */ #define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */ +#define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */ +#define AT91_PMC_PLL_ACR_DEFAULT_UPLL 0x12020010UL /* Default PLL ACR value for UPLL */ +#define AT91_PMC_PLL_ACR_DEFAULT_PLLA 0x00020010UL /* Default PLL ACR value for PLLA */ +#define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */ +#define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */ + #define AT91_CKGR_UCKR 0x1C /* UTMI Clock Register [some SAM9] */ #define AT91_PMC_UPLLEN (1 << 16) /* UTMI PLL Enable */ #define AT91_PMC_UPLLCOUNT (0xf << 20) /* UTMI PLL Start-up Time */ #define AT91_PMC_BIASEN (1 << 24) /* UTMI BIAS Enable */ #define AT91_PMC_BIASCOUNT (0xf << 28) /* UTMI BIAS Start-up Time */ +#define AT91_PMC_PLL_UPDT 0x1C /* PMC PLL update register [for SAM9X60] */ +#define AT91_PMC_PLL_UPDT_UPDATE (1 << 8) /* Update PLL settings */ +#define AT91_PMC_PLL_UPDT_ID (1 << 0) /* PLL ID */ +#define AT91_PMC_PLL_UPDT_STUPTIM (0xff << 16) /* Startup time */ + #define AT91_CKGR_MOR 0x20 /* Main Oscillator Register [not on SAM9RL] */ #define AT91_PMC_MOSCEN (1 << 0) /* Main Oscillator Enable */ #define AT91_PMC_OSCBYPASS (1 << 1) /* Oscillator Bypass */ @@ -183,6 +201,8 @@ #define AT91_PMC_WPVS (0x1 << 0) /* Write Protect Violation Status */ #define AT91_PMC_WPVSRC (0xffff << 8) /* Write Protect Violation Source */ +#define AT91_PMC_PLL_ISR0 0xEC /* PLL Interrupt Status Register 0 [SAM9X60 only] */ + #define AT91_PMC_PCER1 0x100 /* Peripheral Clock Enable Register 1 [SAMA5 only]*/ #define AT91_PMC_PCDR1 0x104 /* Peripheral Clock Enable Register 1 */ #define AT91_PMC_PCSR1 0x108 /* Peripheral Clock Enable Register 1 */ -- cgit v1.2.3 From 12206b17235aed1ca6390b3e516825ae276f8345 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 11 Feb 2020 14:32:43 -0800 Subject: net/mlx5: Add support for resource dump On driver load: - Initialize resource dump data structure and memory access tools (mkey & pd). - Read the resource dump's menu which contains the FW segment identifier. Each record is identified by the segment name (ASCII). During the driver's course of life, users (like reporters) may request dumps per segment. The user should create a command providing the segment identifier (SW enumeration) and command keys. In return, the user receives a command context. In order to receive the dump, the user should supply the command context and a memory (aligned to a PAGE) on which the dump content will be written. Since the dump may be larger than the given memory, the user may resubmit the command until received an indication of end-of-dump. It is the user's responsibility to destroy the command. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 277a51d3ec40..f99cbe249425 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -722,6 +722,7 @@ struct mlx5_core_dev { struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; + struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; }; -- cgit v1.2.3 From df5c21002cf4bb9c755c6330d101487c5d530c10 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 19 Feb 2020 00:24:10 +0300 Subject: mtd: spi-nor: use spi-mem dirmap API Make use of the spi-mem direct mapping API to let advanced controllers optimize read/write operations when they support direct mapping. Based on the original patch by Boris Brezillon . Signed-off-by: Sergei Shtylyov Reviewed-by: Boris Brezillon Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 61be6ed33097..de90724f62f1 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -580,6 +580,7 @@ struct flash_info; * The structure includes legacy flash parameters and * settings that can be overwritten by the spi_nor_fixups * hooks, or dynamically when parsing the SFDP tables. + * @dirmap: pointers to struct spi_mem_dirmap_desc for reads/writes. * @priv: the private data */ struct spi_nor { @@ -606,6 +607,11 @@ struct spi_nor { struct spi_nor_flash_parameter params; + struct { + struct spi_mem_dirmap_desc *rdesc; + struct spi_mem_dirmap_desc *wdesc; + } dirmap; + void *priv; }; -- cgit v1.2.3 From 9255782f70614c89b1a15ec6997c4b72ce9e630a Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:06 +0530 Subject: sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function to change the symlink name The __compat_only_sysfs_link_entry_to_kobj function creates a symlink to a kobject but doesn't provide an option to change the symlink file name. This patch adds a wrapper function compat_only_sysfs_link_entry_to_kobj that extends the __compat_only_sysfs_link_entry_to_kobj functionality which allows function caller to customize the symlink name. Signed-off-by: Sourabh Jain [mpe: Fix compile error when CONFIG_SYSFS=n] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-3-sourabhjain@linux.ibm.com --- include/linux/sysfs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fa7ee503fb76..7462315a643b 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -300,6 +300,10 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name); +int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); @@ -508,6 +512,14 @@ static inline int __compat_only_sysfs_link_entry_to_kobj( return 0; } +static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name, + const char *symlink_name) +{ + return 0; +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { -- cgit v1.2.3 From 10892847de816edf4f8f2151174dc557c4a3f63f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 17 Feb 2020 16:59:54 -0800 Subject: usb-storage: Use const to reduce object data size Make structs const to reduce data size ~20KB. Change function arguments and prototypes as necessary to compile. $ size (x86-64 defconfig pre) text data bss dec hex filename 12281 10948 480 23709 5c9d ./drivers/usb/storage/usb.o 111 10528 8 10647 2997 ./drivers/usb/storage/usual-tables.o $ size (x86-64 defconfig post) text data bss dec hex filename 22809 420 480 23709 5c9d drivers/usb/storage/usb.o 10551 0 0 10551 2937 drivers/usb/storage/usual-tables.o Signed-off-by: Joe Perches Acked-by: Alan Stern Link: https://lore.kernel.org/r/cf13bd2d790ae3afbf5da55ea7bed12e00c5119d.camel@perches.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 000a5954b2e8..4a19ac3f24d0 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -92,6 +92,6 @@ enum { US_DO_ALL_FLAGS }; #include extern int usb_usual_ignore_device(struct usb_interface *intf); -extern struct usb_device_id usb_storage_usb_ids[]; +extern const struct usb_device_id usb_storage_usb_ids[]; #endif /* __LINUX_USB_USUAL_H */ -- cgit v1.2.3 From a4393861a351f66fef1102e775743c86a276afce Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:28 +0000 Subject: bpf, sk_msg: Let ULP restore sk_proto and write_space callback We don't need a fallback for when the socket is not using ULP. tcp_update_ulp handles this case exactly the same as we do in sk_psock_restore_proto. Get rid of the duplicated code. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-2-jakub@cloudflare.com --- include/linux/skmsg.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14d61bba0b79..8605947d6c08 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -361,16 +361,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, sk->sk_prot->unhash = psock->saved_unhash; if (psock->sk_proto) { - struct inet_connection_sock *icsk = inet_csk(sk); - bool has_ulp = !!icsk->icsk_ulp_data; - - if (has_ulp) { - tcp_update_ulp(sk, psock->sk_proto, - psock->saved_write_space); - } else { - sk->sk_prot = psock->sk_proto; - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); psock->sk_proto = NULL; } else { sk->sk_write_space = psock->saved_write_space; -- cgit v1.2.3 From a178b4585865a4c756c41bc5376f63416b7d9271 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 17 Feb 2020 12:15:29 +0000 Subject: bpf, sk_msg: Don't clear saved sock proto on restore There is no need to clear psock->sk_proto when restoring socket protocol callbacks in sk->sk_prot. The psock is about to get detached from the sock and eventually destroyed. At worst we will restore the protocol callbacks and the write callback twice. This makes reasoning about psock state easier. Once psock is initialized, we can count on psock->sk_proto always being set. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200217121530.754315-3-jakub@cloudflare.com --- include/linux/skmsg.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8605947d6c08..d90ef61712a1 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -359,13 +359,7 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - - if (psock->sk_proto) { - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); - psock->sk_proto = NULL; - } else { - sk->sk_write_space = psock->saved_write_space; - } + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From ad5906bd6e9aa3e156dcac8fc070b153648e8b68 Mon Sep 17 00:00:00 2001 From: Phong LE Date: Wed, 19 Feb 2020 15:09:06 +0100 Subject: regmap: wrong descriptions in regmap_range_cfg Swap selector_mask and selector_shift descriptions Signed-off-by: Phong LE Link: https://lore.kernel.org/r/20200219140906.29180-1-ple@baylibre.com Signed-off-by: Mark Brown --- include/linux/regmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f0a092a1a96d..40b07168fd8e 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -461,8 +461,8 @@ struct regmap_config { * @range_max: Address of the highest register in virtual range. * * @selector_reg: Register with selector field. - * @selector_mask: Bit shift for selector value. - * @selector_shift: Bit mask for selector value. + * @selector_mask: Bit mask for selector value. + * @selector_shift: Bit shift for selector value. * * @window_start: Address of first (lowest) register in data window. * @window_len: Number of registers in data window. -- cgit v1.2.3 From 1d0827b75ee7df497f611a2ac412a88135fb0ef5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Jan 2020 12:06:01 -0800 Subject: mm/memremap_pages: Kill unused __devm_memremap_pages() Kill this definition that was introduced in commit 41e94a851304 ("add devm_memremap_pages") add never used. Cc: Christoph Hellwig Reviewed-by: Aneesh Kumar K.V Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/158041476158.3889308.4221100673554151124.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/io.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index b1c44bb4b2d7..8394c56babc2 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -77,8 +77,6 @@ void *devm_memremap(struct device *dev, resource_size_t offset, size_t size, unsigned long flags); void devm_memunmap(struct device *dev, void *addr); -void *__devm_memremap_pages(struct device *dev, struct resource *res); - #ifdef CONFIG_PCI /* * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and -- cgit v1.2.3 From bd1a5a53d7c1231f89c51705f6a894516b40aa88 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 23:08:38 -0800 Subject: security: : fix all kernel-doc warnings Fix all kernel-doc warnings in . Fixes the following warnings: ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'quotactl' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'quota_on' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'sb_free_mnt_opts' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'sb_eat_lsm_opts' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'sb_kern_mount' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'sb_show_options' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'sb_add_mnt_opt' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'd_instantiate' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'getprocattr' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'setprocattr' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'locked_down' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'perf_event_open' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'perf_event_alloc' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'perf_event_free' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'perf_event_read' not described in 'security_list_options' ../include/linux/lsm_hooks.h:1830: warning: Function parameter or member 'perf_event_write' not described in 'security_list_options' Signed-off-by: Randy Dunlap Cc: John Johansen Cc: Kees Cook Cc: Micah Morton Cc: James Morris Cc: "Serge E. Hallyn" Cc: linux-security-module@vger.kernel.org Cc: Paul Moore Cc: Stephen Smalley Cc: Eric Paris Cc: Casey Schaufler Cc: Kentaro Takeda Cc: Tetsuo Handa Acked-by: Casey Schaufler Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a3763247547c..59f5ee945c1c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -103,6 +103,10 @@ * @sb_free_security: * Deallocate and clear the sb->s_security field. * @sb contains the super_block structure to be modified. + * @sb_free_mnt_opts: + * Free memory associated with @mnt_ops. + * @sb_eat_lsm_opts: + * Eat (scan @orig options) and save them in @mnt_opts. * @sb_statfs: * Check permission before obtaining filesystem statistics for the @mnt * mountpoint. @@ -136,6 +140,10 @@ * @sb superblock being remounted * @data contains the filesystem-specific data. * Return 0 if permission is granted. + * @sb_kern_mount: + * Mount this @sb if allowed by permissions. + * @sb_show_options: + * Show (print on @m) mount options for this @sb. * @sb_umount: * Check permission before the @mnt file system is unmounted. * @mnt contains the mounted file system. @@ -155,6 +163,8 @@ * Copy all security options from a given superblock to another * @oldsb old superblock which contain information to clone * @newsb new superblock which needs filled in + * @sb_add_mnt_opt: + * Add one mount @option to @mnt_opts. * @sb_parse_opts_str: * Parse a string of security data filling in the opts structure * @options string containing all mount options known by the LSM @@ -451,6 +461,12 @@ * security module does not know about attribute or a negative error code * to abort the copy up. Note that the caller is responsible for reading * and writing the xattrs as this hook is merely a filter. + * @d_instantiate: + * Fill in @inode security information for a @dentry if allowed. + * @getprocattr: + * Read attribute @name for process @p and store it into @value if allowed. + * @setprocattr: + * Write (set) attribute @name to @value, size @size if allowed. * * Security hooks for kernfs node operations * @@ -1113,6 +1129,7 @@ * In case of failure, @secid will be set to zero. * * Security hooks for individual messages held in System V IPC message queues + * * @msg_msg_alloc_security: * Allocate and attach a security structure to the msg->security field. * The security field is initialized to NULL when the structure is first @@ -1302,6 +1319,10 @@ * @cap contains the capability . * @opts contains options for the capable check * Return 0 if the capability is granted for @tsk. + * @quotactl: + * Check whether the quotactl syscall is allowed for this @sb. + * @quota_on: + * Check whether QUOTAON is allowed for this @dentry. * @syslog: * Check permission before accessing the kernel message ring or changing * logging to the console. @@ -1449,11 +1470,24 @@ * @bpf_prog_free_security: * Clean up the security information stored inside bpf prog. * - * @locked_down + * @locked_down: * Determine whether a kernel feature that potentially enables arbitrary * code execution in kernel space should be permitted. * * @what: kernel feature being accessed + * + * Security hooks for perf events + * + * @perf_event_open: + * Check whether the @type of perf_event_open syscall is allowed. + * @perf_event_alloc: + * Allocate and save perf_event security info. + * @perf_event_free: + * Release (free) perf_event security info. + * @perf_event_read: + * Read perf_event security info if allowed. + * @perf_event_write: + * Write perf_event security info if allowed. */ union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); -- cgit v1.2.3 From 0f0d3827c0b4d6c3d219a73ea103077dc5bc17aa Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:26 +0200 Subject: net/mlx5: E-Switch, Move source port on reg_c0 to the upper 16 bits Multi chain support requires the miss path to continue the processing from the last chain id, and for that we need to save the chain miss tag (a mapping for 32bit chain id) on reg_c0 which will come in a next patch. Currently reg_c0 is exclusively used to store the source port metadata, giving it 32bit, it is created from 16bits of vcha_id, and 16bits of vport number. We will move this source port metadata to upper 16bits, and leave the lower bits for the chain miss tag. We compress the reg_c0 source port metadata to 16bits by taking 8 bits from vhca_id, and 8bits from the vport number. Since we compress the vport number to 8bits statically, and leave two top ids for special PF/ECPF numbers, we will only support a max of 254 vports with this strategy. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 98e667b176ef..dd1333f29f6e 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -71,7 +71,26 @@ enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); -u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + +/* Reg C0 usage: + * Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) > + * + * Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num, + * the rest (lowest 16 bits) is left for tc chain tag restoration. + * VHCA_ID + VPORT comprise the SOURCE_PORT matching. + */ +#define ESW_VHCA_ID_BITS 8 +#define ESW_VPORT_BITS 8 +#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) +#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) + +static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) +{ + return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -94,11 +113,17 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) }; static inline u32 -mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, +mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, int vport_num) { return 0; }; + +static inline u32 +mlx5_eswitch_get_vport_metadata_mask(void) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3 From 11b717d6152699623fb1133759f9b8f235935a51 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 16 Feb 2020 12:01:27 +0200 Subject: net/mlx5: E-Switch, Get reg_c0 value on CQE On RX side create a restore table in OFFLOADS namespace. This table will match on all values for reg_c0 we will use, and set it to the flow_tag. This flow tag can then be read on the CQE. As there is no copy action from reg c0 to flow tag, instead we have to set the flow tag explictily. We add an API so callers can add all the used reg_c0 values (tags) and for each of those we add a restore rule. This will be used in a following patch to save the miss chain mapping tag on reg_c0 and from it restore the tc chain on the skb. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index dd1333f29f6e..61705e74a5bb 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -84,6 +84,8 @@ bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); #define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS) #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS) #define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS) +#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\ + 0) static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) { -- cgit v1.2.3 From 66630058e56b26b3a9cf2625e250a8c592dd0207 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 8 Feb 2020 20:48:29 +0100 Subject: sched/rt: Provide migrate_disable/enable() inlines Code which solely needs to prevent migration of a task uses preempt_disable()/enable() pairs. This is the only reliable way to do so as setting the task affinity to a single CPU can be undone by a setaffinity operation from a different task/process. RT provides a seperate migrate_disable/enable() mechanism which does not disable preemption to achieve the semantic requirements of a (almost) fully preemptible kernel. As it is unclear from looking at a given code path whether the intention is to disable preemption or migration, introduce migrate_disable/enable() inline functions which can be used to annotate code which merely needs to disable migration. Map them to preempt_disable/enable() for now. The RT substitution will be provided later. Code which is annotated that way documents that it has no requirement to protect against reentrancy of a preempting task. Either this is not required at all or the call sites are already serialized by other means. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/878slclv1u.fsf@nanos.tec.linutronix.de --- include/linux/preempt.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index bbb68dba37cc..bc3f1aecaa19 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -322,4 +322,34 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, #endif +/** + * migrate_disable - Prevent migration of the current task + * + * Maps to preempt_disable() which also disables preemption. Use + * migrate_disable() to annotate that the intent is to prevent migration, + * but not necessarily preemption. + * + * Can be invoked nested like preempt_disable() and needs the corresponding + * number of migrate_enable() invocations. + */ +static __always_inline void migrate_disable(void) +{ + preempt_disable(); +} + +/** + * migrate_enable - Allow migration of the current task + * + * Counterpart to migrate_disable(). + * + * As migrate_disable() can be invoked nested, only the outermost invocation + * reenables migration. + * + * Currently mapped to preempt_enable(). + */ +static __always_inline void migrate_enable(void) +{ + preempt_enable(); +} + #endif /* __LINUX_PREEMPT_H */ -- cgit v1.2.3 From 4e139c7711633365ebb52fbb63905395522a8413 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 14 Feb 2020 14:39:19 +0100 Subject: sched: Provide cant_migrate() Some code pathes rely on preempt_disable() to prevent migration on a non RT enabled kernel. These preempt_disable/enable() pairs are substituted by migrate_disable/enable() pairs or other forms of RT specific protection. On RT these protections prevent migration but not preemption. Obviously a cant_sleep() check in such a section will trigger on RT because preemption is not disabled. Provide a cant_migrate() macro which maps to cant_sleep() on a non RT kernel and an empty placeholder for RT for now. The placeholder will be changed to a proper debug check along with the RT specific migration protection mechanism. Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200214161503.070487511@linutronix.de --- include/linux/kernel.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0d9db2a14f44..9b7a8d74a9d6 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -257,6 +257,13 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) +#ifndef CONFIG_PREEMPT_RT +# define cant_migrate() cant_sleep() +#else + /* Placeholder for now */ +# define cant_migrate() do { } while (0) +#endif + /** * abs - return absolute value of an argument * @x: the value. If it is unsigned type, it is converted to signed type first. -- cgit v1.2.3 From 90c018942c2babab73814648b37808fc3bf2ed1a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 11:37:38 -0800 Subject: timer: Use hlist_unhashed_lockless() in timer_pending() The timer_pending() function is mostly used in lockless contexts, so Without proper annotations, KCSAN might detect a data-race [1]. Using hlist_unhashed_lockless() instead of hand-coding it seems appropriate (as suggested by Paul E. McKenney). [1] BUG: KCSAN: data-race in del_timer / detach_if_pending write to 0xffff88808697d870 of 8 bytes by task 10 on cpu 0: __hlist_del include/linux/list.h:764 [inline] detach_timer kernel/time/timer.c:815 [inline] detach_if_pending+0xcd/0x2d0 kernel/time/timer.c:832 try_to_del_timer_sync+0x60/0xb0 kernel/time/timer.c:1226 del_timer_sync+0x6b/0xa0 kernel/time/timer.c:1365 schedule_timeout+0x2d2/0x6e0 kernel/time/timer.c:1896 rcu_gp_fqs_loop+0x37c/0x580 kernel/rcu/tree.c:1639 rcu_gp_kthread+0x143/0x230 kernel/rcu/tree.c:1799 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff88808697d870 of 8 bytes by task 12060 on cpu 1: del_timer+0x3b/0xb0 kernel/time/timer.c:1198 sk_stop_timer+0x25/0x60 net/core/sock.c:2845 inet_csk_clear_xmit_timers+0x69/0xa0 net/ipv4/inet_connection_sock.c:523 tcp_clear_xmit_timers include/net/tcp.h:606 [inline] tcp_v4_destroy_sock+0xa3/0x3f0 net/ipv4/tcp_ipv4.c:2096 inet_csk_destroy_sock+0xf4/0x250 net/ipv4/inet_connection_sock.c:836 tcp_close+0x6f3/0x970 net/ipv4/tcp.c:2497 inet_release+0x86/0x100 net/ipv4/af_inet.c:427 __sock_release+0x85/0x160 net/socket.c:590 sock_close+0x24/0x30 net/socket.c:1268 __fput+0x1e1/0x520 fs/file_table.c:280 ____fput+0x1f/0x30 fs/file_table.c:313 task_work_run+0xf6/0x130 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x2b4/0x2c0 arch/x86/entry/common.c:163 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12060 Comm: syz-executor.5 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, Signed-off-by: Eric Dumazet Cc: Thomas Gleixner [ paulmck: Pulled in Eric's later amendments. ] Signed-off-by: Paul E. McKenney --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 1e6650ed066d..0dc19a8c39c9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -164,7 +164,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } */ static inline int timer_pending(const struct timer_list * timer) { - return timer->entry.pprev != NULL; + return !hlist_unhashed_lockless(&timer->entry); } extern void add_timer_on(struct timer_list *timer, int cpu); -- cgit v1.2.3 From 4dfd5cd83dc4458049c7f6eb9c4f361acc4239ea Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sat, 18 Jan 2020 22:24:18 +0530 Subject: rculist: Add brackets around cond argument in __list_check_rcu macro Passing a complex lockdep condition to __list_check_rcu results in false positive lockdep splat due to incorrect expression evaluation. For example, a lockdep check condition `cond1 || cond2` is evaluated as `!cond1 || cond2 && !rcu_read_lock_any_held()` which, according to operator precedence, evaluates to `!cond1 || (cond2 && !rcu_read_lock_any_held())`. This would result in a lockdep splat when cond1 is false and cond2 is true which is logically incorrect. Signed-off-by: Amol Grover Acked-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 9f313e4999fe..8214cdc715f2 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -60,9 +60,9 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) #define __list_check_rcu(dummy, cond, extra...) \ ({ \ check_arg_count_one(extra); \ - RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \ + RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \ "RCU-list traversed in non-reader section!"); \ - }) + }) #else #define __list_check_rcu(dummy, cond, extra...) \ ({ check_arg_count_one(extra); }) -- cgit v1.2.3 From 59ee0326ccf712f9a637d5df2465a16a784cbfb0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Nov 2019 18:54:06 -0800 Subject: rcutorture: Suppress forward-progress complaints during early boot Some larger systems can take in excess of 50 seconds to complete their early boot initcalls prior to spawing init. This does not in any way help the forward-progress judgments of built-in rcutorture (when rcutorture is built as a module, the insmod or modprobe command normally cannot happen until some time after boot completes). This commit therefore suppresses such complaints until about the time that init is spawned. This also includes a fix to a stupid error located by kbuild test robot. [ paulmck: Apply kbuild test robot feedback. ] Signed-off-by: Paul E. McKenney [ paulmck: Fix to nohz_full slow-expediting recovery logic, per bpetkov. ] [ paulmck: Restrict splat to CONFIG_PREEMPT_RT=y kernels and simplify. ] Tested-by: Borislav Petkov --- include/linux/rcutiny.h | 1 + include/linux/rcutree.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index b2b2dc990da9..045c28b71f4f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -83,6 +83,7 @@ void rcu_scheduler_starting(void); static inline void rcu_scheduler_starting(void) { } #endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } +static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_dyntick_idle(void) { } static inline void kfree_rcu_scheduler_running(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 2f787b9029d1..45f3f66bb04d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -54,6 +54,7 @@ void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; void rcu_end_inkernel_boot(void); +bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); #ifndef CONFIG_PREEMPTION void rcu_all_qs(void); -- cgit v1.2.3 From 9ffc1d19fc4a6dfcfe06c91c2861ad6d44fdd92d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Jan 2020 12:06:07 -0800 Subject: mm/memremap_pages: Introduce memremap_compat_align() The "sub-section memory hotplug" facility allows memremap_pages() users like libnvdimm to compensate for hardware platforms like x86 that have a section size larger than their hardware memory mapping granularity. The compensation that sub-section support affords is being tolerant of physical memory resources shifting by units smaller (64MiB on x86) than the memory-hotplug section size (128 MiB). Where the platform physical-memory mapping granularity is limited by the number and capability of address-decode-registers in the memory controller. While the sub-section support allows memremap_pages() to operate on sub-section (2MiB) granularity, the Power architecture may still require 16MiB alignment on "!radix_enabled()" platforms. In order for libnvdimm to be able to detect and manage this per-arch limitation, introduce memremap_compat_align() as a common minimum alignment across all driver-facing memory-mapping interfaces, and let Power override it to 16MiB in the "!radix_enabled()" case. The assumption / requirement for 16MiB to be a viable memremap_compat_align() value is that Power does not have platforms where its equivalent of address-decode-registers never hardware remaps a persistent memory resource on smaller than 16MiB boundaries. Note that I tried my best to not add a new Kconfig symbol, but header include entanglements defeated the #ifndef memremap_compat_align design pattern and the need to export it defeats the __weak design pattern for arch overrides. Based on an initial patch by Aneesh. Link: http://lore.kernel.org/r/CAPcyv4gBGNP95APYaBcsocEa50tQj9b5h__83vgngjq3ouGX_Q@mail.gmail.com Reported-by: Aneesh Kumar K.V Reported-by: Jeff Moyer Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Reviewed-by: Aneesh Kumar K.V Acked-by: Michael Ellerman (powerpc) Signed-off-by: Dan Williams --- include/linux/memremap.h | 8 ++++++++ include/linux/mmzone.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..8af1cbd8f293 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -132,6 +132,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); +unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) @@ -165,6 +166,12 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) { } + +/* when memremap_pages() is disabled all archs can remap a single page */ +static inline unsigned long memremap_compat_align(void) +{ + return PAGE_SIZE; +} #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) @@ -172,4 +179,5 @@ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) if (pgmap) percpu_ref_put(pgmap->ref); } + #endif /* _LINUX_MEMREMAP_H_ */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 462f6873905a..6b77f7239af5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1170,6 +1170,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) #define SUBSECTION_SHIFT 21 +#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) -- cgit v1.2.3 From dce05aa6eec977f1472abed95ccd71276b9a3864 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:43 +0100 Subject: vt: selection, introduce vc_is_sel Avoid global variables (namely sel_cons) by introducing vc_is_sel. It checks whether the parameter is the current selection console. This will help putting sel_cons to a struct later. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/selection.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/selection.h b/include/linux/selection.h index e2c1f96bf059..5b890ef5b59f 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -11,8 +11,8 @@ #include #include -extern struct vc_data *sel_cons; struct tty_struct; +struct vc_data; extern void clear_selection(void); extern int set_selection_user(const struct tiocl_selection __user *sel, @@ -24,6 +24,8 @@ extern int sel_loadlut(char __user *p); extern int mouse_reporting(void); extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); +bool vc_is_sel(struct vc_data *vc); + extern int console_blanked; extern const unsigned char color_table[]; -- cgit v1.2.3 From 3e27a33932df104f4f9ff811467b0b4ccebde773 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Feb 2020 15:43:42 +0800 Subject: security: remove duplicated include from security.h Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: James Morris --- include/linux/security.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 9df7547afc0c..896da4429c17 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -30,7 +30,6 @@ #include #include #include -#include struct linux_binprm; struct cred; -- cgit v1.2.3 From b8e202d1d1d0f182f01062804efb523ea9a9008c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:13 +0000 Subject: net, sk_msg: Annotate lockless access to sk_prot on clone sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com --- include/linux/skmsg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d90ef61712a1..112765bd146d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk, psock->saved_write_space = sk->sk_write_space; psock->sk_proto = sk->sk_prot; - sk->sk_prot = ops; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, ops); } static inline void sk_psock_restore_proto(struct sock *sk, -- cgit v1.2.3 From 015d239ac0142ad0e26567fd890ef8d171f13709 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Tue, 11 Feb 2020 15:54:23 +0800 Subject: uacce: add uacce driver Uacce (Unified/User-space-access-intended Accelerator Framework) targets to provide Shared Virtual Addressing (SVA) between accelerators and processes. So accelerator can access any data structure of the main cpu. This differs from the data sharing between cpu and io device, which share only data content rather than address. Since unified address, hardware and user space of process can share the same virtual address in the communication. Uacce create a chrdev for every registration, the queue is allocated to the process when the chrdev is opened. Then the process can access the hardware resource by interact with the queue file. By mmap the queue file space to user space, the process can directly put requests to the hardware without syscall to the kernel space. The IOMMU core only tracks mm<->device bonds at the moment, because it only needs to handle IOTLB invalidation and PASID table entries. However uacce needs a finer granularity since multiple queues from the same device can be bound to an mm. When the mm exits, all bound queues must be stopped so that the IOMMU can safely clear the PASID table entry and reallocate the PASID. An intermediate struct uacce_mm links uacce devices and queues. Note that an mm may be bound to multiple devices but an uacce_mm structure only ever belongs to a single device, because we don't need anything more complex (if multiple devices are bound to one mm, then we'll create one uacce_mm for each bond). uacce_device --+-- uacce_mm --+-- uacce_queue | '-- uacce_queue | '-- uacce_mm --+-- uacce_queue +-- uacce_queue '-- uacce_queue Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Kenneth Lee Signed-off-by: Zaibo Xu Signed-off-by: Zhou Wang Signed-off-by: Jean-Philippe Brucker Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- include/linux/uacce.h | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 include/linux/uacce.h (limited to 'include/linux') diff --git a/include/linux/uacce.h b/include/linux/uacce.h new file mode 100644 index 000000000000..904a461591a3 --- /dev/null +++ b/include/linux/uacce.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_UACCE_H +#define _LINUX_UACCE_H + +#include +#include + +#define UACCE_NAME "uacce" +#define UACCE_MAX_REGION 2 +#define UACCE_MAX_NAME_SIZE 64 + +struct uacce_queue; +struct uacce_device; + +/** + * struct uacce_qfile_region - structure of queue file region + * @type: type of the region + */ +struct uacce_qfile_region { + enum uacce_qfrt type; +}; + +/** + * struct uacce_ops - uacce device operations + * @get_available_instances: get available instances left of the device + * @get_queue: get a queue from the device + * @put_queue: free a queue to the device + * @start_queue: make the queue start work after get_queue + * @stop_queue: make the queue stop work before put_queue + * @is_q_updated: check whether the task is finished + * @mmap: mmap addresses of queue to user space + * @ioctl: ioctl for user space users of the queue + */ +struct uacce_ops { + int (*get_available_instances)(struct uacce_device *uacce); + int (*get_queue)(struct uacce_device *uacce, unsigned long arg, + struct uacce_queue *q); + void (*put_queue)(struct uacce_queue *q); + int (*start_queue)(struct uacce_queue *q); + void (*stop_queue)(struct uacce_queue *q); + int (*is_q_updated)(struct uacce_queue *q); + int (*mmap)(struct uacce_queue *q, struct vm_area_struct *vma, + struct uacce_qfile_region *qfr); + long (*ioctl)(struct uacce_queue *q, unsigned int cmd, + unsigned long arg); +}; + +/** + * struct uacce_interface - interface required for uacce_register() + * @name: the uacce device name. Will show up in sysfs + * @flags: uacce device attributes + * @ops: pointer to the struct uacce_ops + */ +struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; +}; + +enum uacce_q_state { + UACCE_Q_ZOMBIE = 0, + UACCE_Q_INIT, + UACCE_Q_STARTED, +}; + +/** + * struct uacce_queue + * @uacce: pointer to uacce + * @priv: private pointer + * @wait: wait queue head + * @list: index into uacce_mm + * @uacce_mm: the corresponding mm + * @qfrs: pointer of qfr regions + * @state: queue state machine + */ +struct uacce_queue { + struct uacce_device *uacce; + void *priv; + wait_queue_head_t wait; + struct list_head list; + struct uacce_mm *uacce_mm; + struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; + enum uacce_q_state state; +}; + +/** + * struct uacce_device + * @algs: supported algorithms + * @api_ver: api version + * @ops: pointer to the struct uacce_ops + * @qf_pg_num: page numbers of the queue file regions + * @parent: pointer to the parent device + * @is_vf: whether virtual function + * @flags: uacce attributes + * @dev_id: id of the uacce device + * @cdev: cdev of the uacce + * @dev: dev of the uacce + * @priv: private pointer of the uacce + * @mm_list: list head of uacce_mm->list + * @mm_lock: lock for mm_list + */ +struct uacce_device { + const char *algs; + const char *api_ver; + const struct uacce_ops *ops; + unsigned long qf_pg_num[UACCE_MAX_REGION]; + struct device *parent; + bool is_vf; + u32 flags; + u32 dev_id; + struct cdev *cdev; + struct device dev; + void *priv; + struct list_head mm_list; + struct mutex mm_lock; +}; + +/** + * struct uacce_mm - keep track of queues bound to a process + * @list: index into uacce_device + * @queues: list of queues + * @mm: the mm struct + * @lock: protects the list of queues + * @pasid: pasid of the uacce_mm + * @handle: iommu_sva handle return from iommu_sva_bind_device + */ +struct uacce_mm { + struct list_head list; + struct list_head queues; + struct mm_struct *mm; + struct mutex lock; + int pasid; + struct iommu_sva *handle; +}; + +#if IS_ENABLED(CONFIG_UACCE) + +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); +int uacce_register(struct uacce_device *uacce); +void uacce_remove(struct uacce_device *uacce); + +#else /* CONFIG_UACCE */ + +static inline +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + return ERR_PTR(-ENODEV); +} + +static inline int uacce_register(struct uacce_device *uacce) +{ + return -EINVAL; +} + +static inline void uacce_remove(struct uacce_device *uacce) {} + +#endif /* CONFIG_UACCE */ + +#endif /* _LINUX_UACCE_H */ -- cgit v1.2.3 From 30332eeefec8f83afcea00c360f99ef64b87f220 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:55 +0100 Subject: debugfs: regset32: Add Runtime PM support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardware registers of devices under control of power management cannot be accessed at all times. If such a device is suspended, register accesses may lead to undefined behavior, like reading bogus values, or causing exceptions or system lock-ups. Extend struct debugfs_regset32 with an optional field to let device drivers specify the device the registers in the set belong to. This allows debugfs_show_regset32() to make sure the device is resumed while its registers are being read. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Greg Kroah-Hartman Acked-by: Rafael J. Wysocki Signed-off-by: Herbert Xu --- include/linux/debugfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3d013de64f70..ad416853e722 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -35,6 +35,7 @@ struct debugfs_regset32 { const struct debugfs_reg32 *regs; int nregs; void __iomem *base; + struct device *dev; /* Optional device for Runtime PM */ }; extern struct dentry *arch_debugfs_dir; -- cgit v1.2.3 From 6bc3f3979edce0b11deb685a4c817abb7d74b227 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 20 Feb 2020 07:20:17 -0600 Subject: USB: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200220132017.GA29262@embeddedor Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++-- include/linux/usb/audio-v2.h | 2 +- include/linux/usb/audio-v3.h | 2 +- include/linux/usb/gadget.h | 2 +- include/linux/usb/hcd.h | 2 +- include/linux/usbdevice_fs.h | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index ca1a5f1e1c5e..9f3c721c70dc 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -325,7 +325,7 @@ struct usb_interface_cache { /* variable-length array of alternate settings for this interface, * stored in no particular order */ - struct usb_host_interface altsetting[0]; + struct usb_host_interface altsetting[]; }; #define ref_to_usb_interface_cache(r) \ container_of(r, struct usb_interface_cache, ref) @@ -1589,7 +1589,7 @@ struct urb { int error_count; /* (return) number of ISO errors */ void *context; /* (in) context for completion */ usb_complete_t complete; /* (in) completion routine */ - struct usb_iso_packet_descriptor iso_frame_desc[0]; + struct usb_iso_packet_descriptor iso_frame_desc[]; /* (in) ISO ONLY */ }; diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ba4b3e3327ff..5e31740c7e40 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -153,7 +153,7 @@ struct uac2_feature_unit_descriptor { __u8 bSourceID; /* bmaControls is actually u32, * but u8 is needed for the hybrid parser */ - __u8 bmaControls[0]; /* variable length */ + __u8 bmaControls[]; /* variable length */ } __attribute__((packed)); /* 4.9.2 Class-Specific AS Interface Descriptor */ diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h index 6b708434b7f9..c69a6f2e6837 100644 --- a/include/linux/usb/audio-v3.h +++ b/include/linux/usb/audio-v3.h @@ -109,7 +109,7 @@ struct uac3_feature_unit_descriptor { __u8 bSourceID; /* bmaControls is actually u32, * but u8 is needed for the hybrid parser */ - __u8 bmaControls[0]; /* variable length */ + __u8 bmaControls[]; /* variable length */ /* wFeatureDescrStr omitted */ } __attribute__((packed)); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 124462d65eac..9411c08a5c7e 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -767,7 +767,7 @@ struct usb_gadget_strings { struct usb_gadget_string_container { struct list_head list; - u8 *stash[0]; + u8 *stash[]; }; /* put descriptor for string with that id into buf (buflen >= 256) */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 712b2a603645..e12105ed3834 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -228,7 +228,7 @@ struct usb_hcd { /* The HC driver's private data is stored at the end of * this structure. */ - unsigned long hcd_priv[0] + unsigned long hcd_priv[] __attribute__ ((aligned(sizeof(s64)))); }; diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h index 79aab0065ec8..14ea197ce37f 100644 --- a/include/linux/usbdevice_fs.h +++ b/include/linux/usbdevice_fs.h @@ -69,7 +69,7 @@ struct usbdevfs_urb32 { compat_int_t error_count; compat_uint_t signr; compat_caddr_t usercontext; /* unused */ - struct usbdevfs_iso_packet_desc iso_frame_desc[0]; + struct usbdevfs_iso_packet_desc iso_frame_desc[]; }; struct usbdevfs_ioctl32 { -- cgit v1.2.3 From a7495c28c86ab3b31508c5754bc5fb717ab1169c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2020 17:02:35 +0100 Subject: efi/libstub: Simplify efi_high_alloc() and rename to efi_allocate_pages() The implementation of efi_high_alloc() uses a complicated way of traversing the memory map to find an available region that is located as close as possible to the provided upper limit, and calls AllocatePages subsequently to create the allocation at that exact address. This is precisely what the EFI_ALLOCATE_MAX_ADDRESS allocation type argument to AllocatePages() does, and considering that EFI_ALLOC_ALIGN only exceeds EFI_PAGE_SIZE on arm64, let's use AllocatePages() directly and implement the alignment using code that the compiler can remove if it does not exceed EFI_PAGE_SIZE. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7efd7072cca5..7e231c3cfb6f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1508,8 +1508,8 @@ efi_status_t efi_low_alloc(unsigned long size, unsigned long align, return efi_low_alloc_above(size, align, addr, 0x8); } -efi_status_t efi_high_alloc(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long max); +efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, + unsigned long max); efi_status_t efi_relocate_kernel(unsigned long *image_addr, unsigned long image_size, -- cgit v1.2.3 From a46a290a01149120f40f83a694d3e6041bcf8f70 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2020 17:02:37 +0100 Subject: efi/libstub: Use consistent type names for file I/O protocols Align the naming of efi_file_io_interface_t and efi_file_handle_t with the UEFI spec, and call them efi_simple_file_system_protocol_t and efi_file_protocol_t, respectively, using the same convention we use for all other type definitions that originate in the UEFI spec. While at it, move the definitions to efistub.h, so they are only seen by code that needs them. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 60 +---------------------------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7e231c3cfb6f..2b228df18407 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -796,65 +796,7 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef struct { - u32 revision; - efi_handle_t parent_handle; - efi_system_table_t *system_table; - efi_handle_t device_handle; - void *file_path; - void *reserved; - u32 load_options_size; - void *load_options; - void *image_base; - __aligned_u64 image_size; - unsigned int image_code_type; - unsigned int image_data_type; - efi_status_t ( __efiapi *unload)(efi_handle_t image_handle); -} efi_loaded_image_t; - -typedef struct { - u64 size; - u64 file_size; - u64 phys_size; - efi_time_t create_time; - efi_time_t last_access_time; - efi_time_t modification_time; - __aligned_u64 attribute; - efi_char16_t filename[1]; -} efi_file_info_t; - -typedef struct efi_file_handle efi_file_handle_t; - -struct efi_file_handle { - u64 revision; - efi_status_t (__efiapi *open)(efi_file_handle_t *, - efi_file_handle_t **, - efi_char16_t *, u64, u64); - efi_status_t (__efiapi *close)(efi_file_handle_t *); - void *delete; - efi_status_t (__efiapi *read)(efi_file_handle_t *, - unsigned long *, void *); - void *write; - void *get_position; - void *set_position; - efi_status_t (__efiapi *get_info)(efi_file_handle_t *, - efi_guid_t *, unsigned long *, - void *); - void *set_info; - void *flush; -}; - -typedef struct efi_file_io_interface efi_file_io_interface_t; - -struct efi_file_io_interface { - u64 revision; - int (__efiapi *open_volume)(efi_file_io_interface_t *, - efi_file_handle_t **); -}; - -#define EFI_FILE_MODE_READ 0x0000000000000001 -#define EFI_FILE_MODE_WRITE 0x0000000000000002 -#define EFI_FILE_MODE_CREATE 0x8000000000000000 +typedef struct efi_loaded_image efi_loaded_image_t; typedef struct { u32 version; -- cgit v1.2.3 From 8166ec091573fad528d884ede291fd1ec02d0298 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2020 17:02:38 +0100 Subject: efi/libstub: Move stub specific declarations into efistub.h Move all the declarations that are only used in stub code from linux/efi.h to efistub.h which is only included locally. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 456 +--------------------------------------------------- 1 file changed, 1 insertion(+), 455 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2b228df18407..0e047d2738cd 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -56,19 +56,6 @@ typedef void *efi_handle_t; #define __efiapi #endif -#define efi_get_handle_at(array, idx) \ - (efi_is_native() ? (array)[idx] \ - : (efi_handle_t)(unsigned long)((u32 *)(array))[idx]) - -#define efi_get_handle_num(size) \ - ((size) / (efi_is_native() ? sizeof(efi_handle_t) : sizeof(u32))) - -#define for_each_efi_handle(handle, array, size, i) \ - for (i = 0; \ - i < efi_get_handle_num(size) && \ - ((handle = efi_get_handle_at((array), i)) || true); \ - i++) - /* * The UEFI spec and EDK2 reference implementation both define EFI_GUID as * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment @@ -157,15 +144,6 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; -struct efi_boot_memmap { - efi_memory_desc_t **map; - unsigned long *map_size; - unsigned long *desc_size; - u32 *desc_ver; - unsigned long *key_ptr; - unsigned long *buff_size; -}; - /* * EFI capsule flags */ @@ -187,14 +165,6 @@ struct capsule_info { int __efi_capsule_setup_info(struct capsule_info *cap_info); -/* - * Allocation types for calls to boottime->allocate_pages. - */ -#define EFI_ALLOCATE_ANY_PAGES 0 -#define EFI_ALLOCATE_MAX_ADDRESS 1 -#define EFI_ALLOCATE_ADDRESS 2 -#define EFI_MAX_ALLOCATE_TYPE 3 - typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg); /* @@ -224,291 +194,7 @@ typedef struct { u8 sets_to_zero; } efi_time_cap_t; -typedef struct { - efi_table_hdr_t hdr; - u32 raise_tpl; - u32 restore_tpl; - u32 allocate_pages; - u32 free_pages; - u32 get_memory_map; - u32 allocate_pool; - u32 free_pool; - u32 create_event; - u32 set_timer; - u32 wait_for_event; - u32 signal_event; - u32 close_event; - u32 check_event; - u32 install_protocol_interface; - u32 reinstall_protocol_interface; - u32 uninstall_protocol_interface; - u32 handle_protocol; - u32 __reserved; - u32 register_protocol_notify; - u32 locate_handle; - u32 locate_device_path; - u32 install_configuration_table; - u32 load_image; - u32 start_image; - u32 exit; - u32 unload_image; - u32 exit_boot_services; - u32 get_next_monotonic_count; - u32 stall; - u32 set_watchdog_timer; - u32 connect_controller; - u32 disconnect_controller; - u32 open_protocol; - u32 close_protocol; - u32 open_protocol_information; - u32 protocols_per_handle; - u32 locate_handle_buffer; - u32 locate_protocol; - u32 install_multiple_protocol_interfaces; - u32 uninstall_multiple_protocol_interfaces; - u32 calculate_crc32; - u32 copy_mem; - u32 set_mem; - u32 create_event_ex; -} __packed efi_boot_services_32_t; - -/* - * EFI Boot Services table - */ -typedef union { - struct { - efi_table_hdr_t hdr; - void *raise_tpl; - void *restore_tpl; - efi_status_t (__efiapi *allocate_pages)(int, int, unsigned long, - efi_physical_addr_t *); - efi_status_t (__efiapi *free_pages)(efi_physical_addr_t, - unsigned long); - efi_status_t (__efiapi *get_memory_map)(unsigned long *, void *, - unsigned long *, - unsigned long *, u32 *); - efi_status_t (__efiapi *allocate_pool)(int, unsigned long, - void **); - efi_status_t (__efiapi *free_pool)(void *); - void *create_event; - void *set_timer; - void *wait_for_event; - void *signal_event; - void *close_event; - void *check_event; - void *install_protocol_interface; - void *reinstall_protocol_interface; - void *uninstall_protocol_interface; - efi_status_t (__efiapi *handle_protocol)(efi_handle_t, - efi_guid_t *, void **); - void *__reserved; - void *register_protocol_notify; - efi_status_t (__efiapi *locate_handle)(int, efi_guid_t *, - void *, unsigned long *, - efi_handle_t *); - void *locate_device_path; - efi_status_t (__efiapi *install_configuration_table)(efi_guid_t *, - void *); - void *load_image; - void *start_image; - void *exit; - void *unload_image; - efi_status_t (__efiapi *exit_boot_services)(efi_handle_t, - unsigned long); - void *get_next_monotonic_count; - void *stall; - void *set_watchdog_timer; - void *connect_controller; - efi_status_t (__efiapi *disconnect_controller)(efi_handle_t, - efi_handle_t, - efi_handle_t); - void *open_protocol; - void *close_protocol; - void *open_protocol_information; - void *protocols_per_handle; - void *locate_handle_buffer; - efi_status_t (__efiapi *locate_protocol)(efi_guid_t *, void *, - void **); - void *install_multiple_protocol_interfaces; - void *uninstall_multiple_protocol_interfaces; - void *calculate_crc32; - void *copy_mem; - void *set_mem; - void *create_event_ex; - }; - efi_boot_services_32_t mixed_mode; -} efi_boot_services_t; - -typedef enum { - EfiPciIoWidthUint8, - EfiPciIoWidthUint16, - EfiPciIoWidthUint32, - EfiPciIoWidthUint64, - EfiPciIoWidthFifoUint8, - EfiPciIoWidthFifoUint16, - EfiPciIoWidthFifoUint32, - EfiPciIoWidthFifoUint64, - EfiPciIoWidthFillUint8, - EfiPciIoWidthFillUint16, - EfiPciIoWidthFillUint32, - EfiPciIoWidthFillUint64, - EfiPciIoWidthMaximum -} EFI_PCI_IO_PROTOCOL_WIDTH; - -typedef enum { - EfiPciIoAttributeOperationGet, - EfiPciIoAttributeOperationSet, - EfiPciIoAttributeOperationEnable, - EfiPciIoAttributeOperationDisable, - EfiPciIoAttributeOperationSupported, - EfiPciIoAttributeOperationMaximum -} EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION; - -typedef struct { - u32 read; - u32 write; -} efi_pci_io_protocol_access_32_t; - -typedef union efi_pci_io_protocol efi_pci_io_protocol_t; - -typedef -efi_status_t (__efiapi *efi_pci_io_protocol_cfg_t)(efi_pci_io_protocol_t *, - EFI_PCI_IO_PROTOCOL_WIDTH, - u32 offset, - unsigned long count, - void *buffer); - -typedef struct { - void *read; - void *write; -} efi_pci_io_protocol_access_t; - -typedef struct { - efi_pci_io_protocol_cfg_t read; - efi_pci_io_protocol_cfg_t write; -} efi_pci_io_protocol_config_access_t; - -union efi_pci_io_protocol { - struct { - void *poll_mem; - void *poll_io; - efi_pci_io_protocol_access_t mem; - efi_pci_io_protocol_access_t io; - efi_pci_io_protocol_config_access_t pci; - void *copy_mem; - void *map; - void *unmap; - void *allocate_buffer; - void *free_buffer; - void *flush; - efi_status_t (__efiapi *get_location)(efi_pci_io_protocol_t *, - unsigned long *segment_nr, - unsigned long *bus_nr, - unsigned long *device_nr, - unsigned long *func_nr); - void *attributes; - void *get_bar_attributes; - void *set_bar_attributes; - uint64_t romsize; - void *romimage; - }; - struct { - u32 poll_mem; - u32 poll_io; - efi_pci_io_protocol_access_32_t mem; - efi_pci_io_protocol_access_32_t io; - efi_pci_io_protocol_access_32_t pci; - u32 copy_mem; - u32 map; - u32 unmap; - u32 allocate_buffer; - u32 free_buffer; - u32 flush; - u32 get_location; - u32 attributes; - u32 get_bar_attributes; - u32 set_bar_attributes; - u64 romsize; - u32 romimage; - } mixed_mode; -}; - -#define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001 -#define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002 -#define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO 0x0004 -#define EFI_PCI_IO_ATTRIBUTE_VGA_MEMORY 0x0008 -#define EFI_PCI_IO_ATTRIBUTE_VGA_IO 0x0010 -#define EFI_PCI_IO_ATTRIBUTE_IDE_PRIMARY_IO 0x0020 -#define EFI_PCI_IO_ATTRIBUTE_IDE_SECONDARY_IO 0x0040 -#define EFI_PCI_IO_ATTRIBUTE_MEMORY_WRITE_COMBINE 0x0080 -#define EFI_PCI_IO_ATTRIBUTE_IO 0x0100 -#define EFI_PCI_IO_ATTRIBUTE_MEMORY 0x0200 -#define EFI_PCI_IO_ATTRIBUTE_BUS_MASTER 0x0400 -#define EFI_PCI_IO_ATTRIBUTE_MEMORY_CACHED 0x0800 -#define EFI_PCI_IO_ATTRIBUTE_MEMORY_DISABLE 0x1000 -#define EFI_PCI_IO_ATTRIBUTE_EMBEDDED_DEVICE 0x2000 -#define EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM 0x4000 -#define EFI_PCI_IO_ATTRIBUTE_DUAL_ADDRESS_CYCLE 0x8000 -#define EFI_PCI_IO_ATTRIBUTE_ISA_IO_16 0x10000 -#define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000 -#define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000 - -struct efi_dev_path; - -typedef union apple_properties_protocol apple_properties_protocol_t; - -union apple_properties_protocol { - struct { - unsigned long version; - efi_status_t (__efiapi *get)(apple_properties_protocol_t *, - struct efi_dev_path *, - efi_char16_t *, void *, u32 *); - efi_status_t (__efiapi *set)(apple_properties_protocol_t *, - struct efi_dev_path *, - efi_char16_t *, void *, u32); - efi_status_t (__efiapi *del)(apple_properties_protocol_t *, - struct efi_dev_path *, - efi_char16_t *); - efi_status_t (__efiapi *get_all)(apple_properties_protocol_t *, - void *buffer, u32 *); - }; - struct { - u32 version; - u32 get; - u32 set; - u32 del; - u32 get_all; - } mixed_mode; -}; - -typedef u32 efi_tcg2_event_log_format; - -typedef union efi_tcg2_protocol efi_tcg2_protocol_t; - -union efi_tcg2_protocol { - struct { - void *get_capability; - efi_status_t (__efiapi *get_event_log)(efi_handle_t, - efi_tcg2_event_log_format, - efi_physical_addr_t *, - efi_physical_addr_t *, - efi_bool_t *); - void *hash_log_extend_event; - void *submit_command; - void *get_active_pcr_banks; - void *set_active_pcr_banks; - void *get_result_of_set_active_pcr_banks; - }; - struct { - u32 get_capability; - u32 get_event_log; - u32 hash_log_extend_event; - u32 submit_command; - u32 get_active_pcr_banks; - u32 set_active_pcr_banks; - u32 get_result_of_set_active_pcr_banks; - } mixed_mode; -}; +typedef union efi_boot_services efi_boot_services_t; /* * Types and defines for EFI ResetSystem @@ -796,8 +482,6 @@ struct efi_fdt_params { u32 desc_ver; }; -typedef struct efi_loaded_image efi_loaded_image_t; - typedef struct { u32 version; u32 length; @@ -1130,13 +814,6 @@ extern int efi_status_to_err(efi_status_t status); */ #define EFI_VARIABLE_GUID_LEN UUID_STRING_LEN -/* - * The type of search to perform when calling boottime->locate_handle - */ -#define EFI_LOCATE_ALL_HANDLES 0 -#define EFI_LOCATE_BY_REGISTER_NOTIFY 1 -#define EFI_LOCATE_BY_PROTOCOL 2 - /* * EFI Device Path information */ @@ -1254,80 +931,6 @@ struct efivar_entry { bool deleting; }; -union efi_simple_text_output_protocol { - struct { - void *reset; - efi_status_t (__efiapi *output_string)(efi_simple_text_output_protocol_t *, - efi_char16_t *); - void *test_string; - }; - struct { - u32 reset; - u32 output_string; - u32 test_string; - } mixed_mode; -}; - -#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 -#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 -#define PIXEL_BIT_MASK 2 -#define PIXEL_BLT_ONLY 3 -#define PIXEL_FORMAT_MAX 4 - -typedef struct { - u32 red_mask; - u32 green_mask; - u32 blue_mask; - u32 reserved_mask; -} efi_pixel_bitmask_t; - -typedef struct { - u32 version; - u32 horizontal_resolution; - u32 vertical_resolution; - int pixel_format; - efi_pixel_bitmask_t pixel_information; - u32 pixels_per_scan_line; -} efi_graphics_output_mode_info_t; - -typedef union efi_graphics_output_protocol_mode efi_graphics_output_protocol_mode_t; - -union efi_graphics_output_protocol_mode { - struct { - u32 max_mode; - u32 mode; - efi_graphics_output_mode_info_t *info; - unsigned long size_of_info; - efi_physical_addr_t frame_buffer_base; - unsigned long frame_buffer_size; - }; - struct { - u32 max_mode; - u32 mode; - u32 info; - u32 size_of_info; - u64 frame_buffer_base; - u32 frame_buffer_size; - } mixed_mode; -}; - -typedef union efi_graphics_output_protocol efi_graphics_output_protocol_t; - -union efi_graphics_output_protocol { - struct { - void *query_mode; - void *set_mode; - void *blt; - efi_graphics_output_protocol_mode_t *mode; - }; - struct { - u32 query_mode; - u32 set_mode; - u32 blt; - u32 mode; - } mixed_mode; -}; - extern struct list_head efivar_sysfs_list; static inline void @@ -1425,52 +1028,6 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) #endif -/* prototypes shared between arch specific and generic stub code */ - -void efi_printk(char *str); - -void efi_free(unsigned long size, unsigned long addr); - -char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len); - -efi_status_t efi_get_memory_map(struct efi_boot_memmap *map); - -efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long min); - -static inline -efi_status_t efi_low_alloc(unsigned long size, unsigned long align, - unsigned long *addr) -{ - /* - * Don't allocate at 0x0. It will confuse code that - * checks pointers against NULL. Skip the first 8 - * bytes so we start at a nice even number. - */ - return efi_low_alloc_above(size, align, addr, 0x8); -} - -efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, - unsigned long max); - -efi_status_t efi_relocate_kernel(unsigned long *image_addr, - unsigned long image_size, - unsigned long alloc_size, - unsigned long preferred_addr, - unsigned long alignment, - unsigned long min_addr); - -efi_status_t handle_cmdline_files(efi_loaded_image_t *image, - char *cmd_line, char *option_string, - unsigned long max_addr, - unsigned long *load_addr, - unsigned long *load_size); - -efi_status_t efi_parse_options(char const *cmdline); - -efi_status_t efi_setup_gop(struct screen_info *si, efi_guid_t *proto, - unsigned long size); - #ifdef CONFIG_EFI extern bool efi_runtime_disabled(void); #else @@ -1548,15 +1105,6 @@ void efi_retrieve_tpm2_eventlog(void); arch_efi_call_virt_teardown(); \ }) -typedef efi_status_t (*efi_exit_boot_map_processing)( - struct efi_boot_memmap *map, - void *priv); - -efi_status_t efi_exit_boot_services(void *handle, - struct efi_boot_memmap *map, - void *priv, - efi_exit_boot_map_processing priv_func); - #define EFI_RANDOM_SEED_SIZE 64U struct linux_efi_random_seed { @@ -1643,6 +1191,4 @@ struct linux_efi_memreserve { #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) -void efi_pci_disable_bridge_busmaster(void); - #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 2931d526d5674940d916a4b513a681ee3562e574 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2020 17:02:48 +0100 Subject: efi/libstub: Make the LoadFile EFI protocol accessible Add the protocol definitions, GUIDs and mixed mode glue so that the EFI loadfile protocol can be used from the stub. This will be used in a future patch to load the initrd. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0e047d2738cd..9ccf313fe9de 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -332,6 +332,8 @@ void efi_native_runtime_setup(void); #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) +#define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) +#define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) -- cgit v1.2.3 From db8952e7094fde3a397321240d5d57ec111258d8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2020 08:46:57 +0000 Subject: efi/dev-path-parser: Add struct definition for vendor type device path nodes In preparation of adding support for loading the initrd via a special device path, add the struct definition of a vendor GUIDed device path node to efi.h. Since we will be producing these data structures rather than just consumsing the ones instantiated by the firmware, refactor the various device path node definitions so we can take the size of each node using sizeof() rather than having to resort to opaque arithmetic in the static initializers. While at it, drop the #if IS_ENABLED() check for the declaration of efi_get_device_by_path(), which is unnecessary, and constify its first argument as well. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 9ccf313fe9de..0976e57b4caa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -855,30 +855,40 @@ extern int efi_status_to_err(efi_status_t status); #define EFI_DEV_END_ENTIRE 0xFF struct efi_generic_dev_path { - u8 type; - u8 sub_type; - u16 length; -} __attribute ((packed)); + u8 type; + u8 sub_type; + u16 length; +} __packed; + +struct efi_acpi_dev_path { + struct efi_generic_dev_path header; + u32 hid; + u32 uid; +} __packed; + +struct efi_pci_dev_path { + struct efi_generic_dev_path header; + u8 fn; + u8 dev; +} __packed; + +struct efi_vendor_dev_path { + struct efi_generic_dev_path header; + efi_guid_t vendorguid; + u8 vendordata[]; +} __packed; struct efi_dev_path { - u8 type; /* can be replaced with unnamed */ - u8 sub_type; /* struct efi_generic_dev_path; */ - u16 length; /* once we've moved to -std=c11 */ union { - struct { - u32 hid; - u32 uid; - } acpi; - struct { - u8 fn; - u8 dev; - } pci; + struct efi_generic_dev_path header; + struct efi_acpi_dev_path acpi; + struct efi_pci_dev_path pci; + struct efi_vendor_dev_path vendor; }; -} __attribute ((packed)); +} __packed; -#if IS_ENABLED(CONFIG_EFI_DEV_PATH_PARSER) -struct device *efi_get_device_by_path(struct efi_dev_path **node, size_t *len); -#endif +struct device *efi_get_device_by_path(const struct efi_dev_path **node, + size_t *len); static inline void memrange_efi_to_native(u64 *addr, u64 *npages) { -- cgit v1.2.3 From ec93fc371f014a6fb483e3556061ecad4b40735c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Feb 2020 23:45:14 +0000 Subject: efi/libstub: Add support for loading the initrd from a device path There are currently two ways to specify the initrd to be passed to the Linux kernel when booting via the EFI stub: - it can be passed as a initrd= command line option when doing a pure PE boot (as opposed to the EFI handover protocol that exists for x86) - otherwise, the bootloader or firmware can load the initrd into memory, and pass the address and size via the bootparams struct (x86) or device tree (ARM) In the first case, we are limited to loading from the same file system that the kernel was loaded from, and it is also problematic in a trusted boot context, given that we cannot easily protect the command line from tampering without either adding complicated white/blacklisting of boot arguments or locking down the command line altogether. In the second case, we force the bootloader to duplicate knowledge about the boot protocol which is already encoded in the stub, and which may be subject to change over time, e.g., bootparams struct definitions, memory allocation/alignment requirements for the placement of the initrd etc etc. In the ARM case, it also requires the bootloader to modify the hardware description provided by the firmware, as it is passed in the same file. On systems where the initrd is measured after loading, it creates a time window where the initrd contents might be manipulated in memory before handing over to the kernel. Address these concerns by adding support for loading the initrd into memory by invoking the EFI LoadFile2 protocol installed on a vendor GUIDed device path that specifically designates a Linux initrd. This addresses the above concerns, by putting the EFI stub in charge of placement in memory and of passing the base and size to the kernel proper (via whatever means it desires) while still leaving it up to the firmware or bootloader to obtain the file contents, potentially from other file systems than the one the kernel itself was loaded from. On platforms that implement measured boot, it permits the firmware to take the measurement right before the kernel actually consumes the contents. Acked-by: Laszlo Ersek Tested-by: Ilias Apalodimas Acked-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 0976e57b4caa..1bf482daa22d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -353,6 +353,7 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) #define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) +#define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) /* OEM GUIDs */ #define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) -- cgit v1.2.3 From 50d53c58dd77d3b0b6a5afe391eaac3722fc3153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 19 Jan 2020 15:29:21 +0100 Subject: efi: Drop handling of 'boot_info' configuration table Some plumbing exists to handle a UEFI configuration table of type BOOT_INFO but since we never match it to a GUID anywhere, we never actually register such a table, or access it, for that matter. So simply drop all mentions of it. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1bf482daa22d..c517d3b7986b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -536,7 +536,6 @@ extern struct efi { unsigned long acpi20; /* ACPI table (ACPI 2.0) */ unsigned long smbios; /* SMBIOS table (32 bit entry point) */ unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ - unsigned long boot_info; /* boot info table */ unsigned long hcdp; /* HCDP table */ unsigned long uga; /* UGA table */ unsigned long fw_vendor; /* fw_vendor */ -- cgit v1.2.3 From 120540f230d5d2d32846adc0156b58961c8c59d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 19 Jan 2020 15:43:53 +0100 Subject: efi/ia64: Move HCDP and MPS table handling into IA64 arch code The HCDP and MPS tables are Itanium specific EFI config tables, so move their handling to ia64 arch code. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c517d3b7986b..45443932104f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -531,12 +531,10 @@ typedef struct { extern struct efi { efi_system_table_t *systab; /* EFI system table */ unsigned int runtime_version; /* Runtime services version */ - unsigned long mps; /* MPS table */ unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ unsigned long acpi20; /* ACPI table (ACPI 2.0) */ unsigned long smbios; /* SMBIOS table (32 bit entry point) */ unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ - unsigned long hcdp; /* HCDP table */ unsigned long uga; /* UGA table */ unsigned long fw_vendor; /* fw_vendor */ unsigned long runtime; /* runtime table */ -- cgit v1.2.3 From fd506e0cf9fd4306aa0eb57cbff5f00514da8179 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 19 Jan 2020 16:17:59 +0100 Subject: efi: Move UGA and PROP table handling to x86 code The UGA table is x86 specific (its handling was introduced when the EFI support code was modified to accommodate IA32), so there is no need to handle it in generic code. The EFI properties table is not strictly x86 specific, but it was deprecated almost immediately after having been introduced, due to implementation difficulties. Only x86 takes it into account today, and this is not going to change, so make this table x86 only as well. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 45443932104f..e091f2aff61d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -535,12 +535,10 @@ extern struct efi { unsigned long acpi20; /* ACPI table (ACPI 2.0) */ unsigned long smbios; /* SMBIOS table (32 bit entry point) */ unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ - unsigned long uga; /* UGA table */ unsigned long fw_vendor; /* fw_vendor */ unsigned long runtime; /* runtime table */ unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ - unsigned long properties_table; /* properties table */ unsigned long mem_attr_table; /* memory attributes table */ unsigned long rng_seed; /* UEFI firmware random seed */ unsigned long tpm_log; /* TPM2 Event Log table */ -- cgit v1.2.3 From 5d288dbd88606d8f215c7138b10649115d79cadd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Jan 2020 14:58:15 +0100 Subject: efi: Make rng_seed table handling local to efi.c Move the rng_seed table address from struct efi into a static global variable in efi.c, which is the only place we ever refer to it anyway. This reduces the footprint of struct efi, which is a r/w data structure that is shared with the world. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e091f2aff61d..36380542e054 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -540,7 +540,6 @@ extern struct efi { unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ unsigned long mem_attr_table; /* memory attributes table */ - unsigned long rng_seed; /* UEFI firmware random seed */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ unsigned long mem_reserve; /* Linux EFI memreserve table */ -- cgit v1.2.3 From a17e809ea573e69474064ba2bbff06d212861e19 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Jan 2020 15:05:12 +0100 Subject: efi: Move mem_attr_table out of struct efi The memory attributes table is only used at init time by the core EFI code, so there is no need to carry its address in struct efi that is shared with the world. So move it out, and make it __ro_after_init as well, considering that the value is set during early boot. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 36380542e054..b093fce1cf59 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -539,7 +539,6 @@ extern struct efi { unsigned long runtime; /* runtime table */ unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ - unsigned long mem_attr_table; /* memory attributes table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ unsigned long mem_reserve; /* Linux EFI memreserve table */ @@ -641,6 +640,8 @@ extern void __init efi_fake_memmap(void); static inline void efi_fake_memmap(void) { } #endif +extern unsigned long efi_mem_attr_table; + /* * efi_memattr_perm_setter - arch specific callback function passed into * efi_memattr_apply_permissions() that updates the -- cgit v1.2.3 From b7846e6be235c4a19337a32168b27ed836a1504e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Jan 2020 15:06:54 +0100 Subject: efi: Make memreserve table handling local to efi.c There is no need for struct efi to carry the address of the memreserve table and share it with the world. So move it out and make it __initdata as well. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index b093fce1cf59..a5e210abe4ca 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -541,7 +541,6 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ - unsigned long mem_reserve; /* Linux EFI memreserve table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 14fb4209094355928d5a742e35afabdf7b404c17 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jan 2020 10:49:11 +0100 Subject: efi: Merge EFI system table revision and vendor checks We have three different versions of the code that checks the EFI system table revision and copies the firmware vendor string, and they are mostly equivalent, with the exception of the use of early_memremap_ro vs. __va() and the lowest major revision to warn about. Let's move this into common code and factor out the commonalities. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a5e210abe4ca..287510e84dfb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -616,6 +616,10 @@ static inline void efi_esrt_init(void) { } #endif extern int efi_config_parse_tables(void *config_tables, int count, int sz, efi_config_table_type_t *arch_tables); +extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr, + int min_major_version); +extern void efi_systab_report_header(const efi_table_hdr_t *systab_hdr, + unsigned long fw_vendor); extern u64 efi_get_iobase (void); extern int efi_mem_type(unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); -- cgit v1.2.3 From 3a0701dc7ff8ebe1031a9f64c99c638929cd2d70 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jan 2020 17:17:27 +0100 Subject: efi: Make efi_config_init() x86 only The efi_config_init() routine is no longer shared with ia64 so let's move it into the x86 arch code before making further x86 specific changes to it. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 287510e84dfb..d61c25fd5824 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -608,7 +608,6 @@ extern int __init efi_memmap_split_count(efi_memory_desc_t *md, extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf, struct efi_mem_range *mem); -extern int efi_config_init(efi_config_table_type_t *arch_tables); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); #else -- cgit v1.2.3 From 06c0bd93434c5b9b284773f90bb054aff591d5be Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Jan 2020 14:40:57 +0100 Subject: efi: Clean up config_parse_tables() config_parse_tables() is a jumble of pointer arithmetic, due to the fact that on x86, we may be dealing with firmware whose native word size differs from the kernel's. This is not a concern on other architectures, and doesn't quite justify the state of the code, so let's clean it up by adding a non-x86 code path, constifying statically allocated tables and replacing preprocessor conditionals with IS_ENABLED() checks. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d61c25fd5824..99a7fcbe5e9b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -613,8 +613,9 @@ extern void __init efi_esrt_init(void); #else static inline void efi_esrt_init(void) { } #endif -extern int efi_config_parse_tables(void *config_tables, int count, int sz, - efi_config_table_type_t *arch_tables); +extern int efi_config_parse_tables(const efi_config_table_t *config_tables, + int count, + const efi_config_table_type_t *arch_tables); extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr, int min_major_version); extern void efi_systab_report_header(const efi_table_hdr_t *systab_hdr, -- cgit v1.2.3 From 9cd437ac0ef4f324a92e2579784b03bb487ae7fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Jan 2020 17:23:21 +0100 Subject: efi/x86: Make fw_vendor, config_table and runtime sysfs nodes x86 specific There is some code that exposes physical addresses of certain parts of the EFI firmware implementation via sysfs nodes. These nodes are only used on x86, and are of dubious value to begin with, so let's move their handling into the x86 arch code. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 99a7fcbe5e9b..a42045568df3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -535,9 +535,6 @@ extern struct efi { unsigned long acpi20; /* ACPI table (ACPI 2.0) */ unsigned long smbios; /* SMBIOS table (32 bit entry point) */ unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ - unsigned long fw_vendor; /* fw_vendor */ - unsigned long runtime; /* runtime table */ - unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ -- cgit v1.2.3 From 59f2a619a2db86111e8bb30f349aebff6eb75baa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jan 2020 09:44:43 +0100 Subject: efi: Add 'runtime' pointer to struct efi Instead of going through the EFI system table each time, just copy the runtime services table pointer into struct efi directly. This is the last use of the system table pointer in struct efi, allowing us to drop it in a future patch, along with a fair amount of quirky handling of the translated address. Note that usually, the runtime services pointer changes value during the call to SetVirtualAddressMap(), so grab the updated value as soon as that call returns. (Mixed mode uses a 1:1 mapping, and kexec boot enters with the updated address in the system table, so in those cases, we don't need to do anything here) Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a42045568df3..1f69c4c2dd5c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -529,6 +529,7 @@ typedef struct { * All runtime access to EFI goes through this structure: */ extern struct efi { + const efi_runtime_services_t *runtime; /* EFI runtime services table */ efi_system_table_t *systab; /* EFI system table */ unsigned int runtime_version; /* Runtime services version */ unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ -- cgit v1.2.3 From fd26830423e5f7442001f090cd4a53f4b6c3d9fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jan 2020 10:16:32 +0100 Subject: efi/x86: Drop 'systab' member from struct efi The systab member in struct efi has outlived its usefulness, now that we have better ways to access the only piece of information we are interested in after init, which is the EFI runtime services table address. So instead of instantiating a doctored copy at early boot with lots of mangled values, and switching the pointer when switching into virtual mode, let's grab the values we need directly, and get rid of the systab pointer entirely. Tested-by: Tony Luck # arch/ia64 Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1f69c4c2dd5c..575e6aa39514 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -530,31 +530,33 @@ typedef struct { */ extern struct efi { const efi_runtime_services_t *runtime; /* EFI runtime services table */ - efi_system_table_t *systab; /* EFI system table */ - unsigned int runtime_version; /* Runtime services version */ - unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ - unsigned long acpi20; /* ACPI table (ACPI 2.0) */ - unsigned long smbios; /* SMBIOS table (32 bit entry point) */ - unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ - unsigned long esrt; /* ESRT table */ - unsigned long tpm_log; /* TPM2 Event Log table */ - unsigned long tpm_final_log; /* TPM2 Final Events Log table */ - efi_get_time_t *get_time; - efi_set_time_t *set_time; - efi_get_wakeup_time_t *get_wakeup_time; - efi_set_wakeup_time_t *set_wakeup_time; - efi_get_variable_t *get_variable; - efi_get_next_variable_t *get_next_variable; - efi_set_variable_t *set_variable; - efi_set_variable_t *set_variable_nonblocking; - efi_query_variable_info_t *query_variable_info; - efi_query_variable_info_t *query_variable_info_nonblocking; - efi_update_capsule_t *update_capsule; - efi_query_capsule_caps_t *query_capsule_caps; - efi_get_next_high_mono_count_t *get_next_high_mono_count; - efi_reset_system_t *reset_system; - struct efi_memory_map memmap; - unsigned long flags; + unsigned int runtime_version; /* Runtime services version */ + + unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ + unsigned long acpi20; /* ACPI table (ACPI 2.0) */ + unsigned long smbios; /* SMBIOS table (32 bit entry point) */ + unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ + unsigned long esrt; /* ESRT table */ + unsigned long tpm_log; /* TPM2 Event Log table */ + unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + + efi_get_time_t *get_time; + efi_set_time_t *set_time; + efi_get_wakeup_time_t *get_wakeup_time; + efi_set_wakeup_time_t *set_wakeup_time; + efi_get_variable_t *get_variable; + efi_get_next_variable_t *get_next_variable; + efi_set_variable_t *set_variable; + efi_set_variable_t *set_variable_nonblocking; + efi_query_variable_info_t *query_variable_info; + efi_query_variable_info_t *query_variable_info_nonblocking; + efi_update_capsule_t *update_capsule; + efi_query_capsule_caps_t *query_capsule_caps; + efi_get_next_high_mono_count_t *get_next_high_mono_count; + efi_reset_system_t *reset_system; + + struct efi_memory_map memmap; + unsigned long flags; } efi; extern struct mm_struct efi_mm; -- cgit v1.2.3 From 3b2e4b4c634cc7dd4730ce3e1c75b8206dcc4b04 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Feb 2020 10:19:34 +0100 Subject: efi/arm: Move FDT specific definitions into fdtparams.c Push the FDT params specific types and definition into fdtparams.c, and instead, pass a reference to the memory map data structure and populate it directly, and return the system table address as the return value. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 575e6aa39514..a0008e3d4e9d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -477,14 +477,6 @@ struct efi_mem_range { u64 attribute; }; -struct efi_fdt_params { - u64 system_table; - u64 mmap; - u32 mmap_size; - u32 desc_size; - u32 desc_ver; -}; - typedef struct { u32 version; u32 length; @@ -631,7 +623,7 @@ extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern int efi_get_fdt_params(struct efi_fdt_params *params); +extern u64 efi_get_fdt_params(struct efi_memory_map_data *data); extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; -- cgit v1.2.3 From 96a3dd3dece8134ba19b0ded7e6663136d3107b9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Jan 2020 11:17:47 +0100 Subject: efi: Store mask of supported runtime services in struct efi Revision 2.8 of the UEFI spec introduces provisions for firmware to advertise lack of support for certain runtime services at OS runtime. Let's store this mask in struct efi for easy access. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a0008e3d4e9d..57695f400044 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -523,6 +523,7 @@ typedef struct { extern struct efi { const efi_runtime_services_t *runtime; /* EFI runtime services table */ unsigned int runtime_version; /* Runtime services version */ + unsigned int runtime_supported_mask; unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ unsigned long acpi20; /* ACPI table (ACPI 2.0) */ @@ -551,6 +552,26 @@ extern struct efi { unsigned long flags; } efi; +#define EFI_RT_SUPPORTED_GET_TIME 0x0001 +#define EFI_RT_SUPPORTED_SET_TIME 0x0002 +#define EFI_RT_SUPPORTED_GET_WAKEUP_TIME 0x0004 +#define EFI_RT_SUPPORTED_SET_WAKEUP_TIME 0x0008 +#define EFI_RT_SUPPORTED_GET_VARIABLE 0x0010 +#define EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME 0x0020 +#define EFI_RT_SUPPORTED_SET_VARIABLE 0x0040 +#define EFI_RT_SUPPORTED_SET_VIRTUAL_ADDRESS_MAP 0x0080 +#define EFI_RT_SUPPORTED_CONVERT_POINTER 0x0100 +#define EFI_RT_SUPPORTED_GET_NEXT_HIGH_MONOTONIC_COUNT 0x0200 +#define EFI_RT_SUPPORTED_RESET_SYSTEM 0x0400 +#define EFI_RT_SUPPORTED_UPDATE_CAPSULE 0x0800 +#define EFI_RT_SUPPORTED_QUERY_CAPSULE_CAPABILITIES 0x1000 +#define EFI_RT_SUPPORTED_QUERY_VARIABLE_INFO 0x2000 + +#define EFI_RT_SUPPORTED_ALL 0x3fff + +#define EFI_RT_SUPPORTED_TIME_SERVICES 0x000f +#define EFI_RT_SUPPORTED_VARIABLE_SERVICES 0x0070 + extern struct mm_struct efi_mm; static inline int @@ -761,6 +782,11 @@ static inline bool __pure efi_soft_reserve_enabled(void) return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) && __efi_soft_reserve_enabled(); } + +static inline bool efi_rt_services_supported(unsigned int mask) +{ + return (efi.runtime_supported_mask & mask) == mask; +} #else static inline bool efi_enabled(int feature) { @@ -779,6 +805,11 @@ static inline bool efi_soft_reserve_enabled(void) { return false; } + +static inline bool efi_rt_services_supported(unsigned int mask) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From fe4db90a80cd12ebe4efe385d40d6636330149ed Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 23 Jan 2020 13:10:25 +0100 Subject: efi: Add support for EFI_RT_PROPERTIES table Take the newly introduced EFI_RT_PROPERTIES_TABLE configuration table into account, which carries a mask of which EFI runtime services are still functional after ExitBootServices() has been called by the OS. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 57695f400044..2ab33d5d6ca5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -334,6 +334,7 @@ void efi_native_runtime_setup(void); #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) +#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) @@ -486,6 +487,14 @@ typedef struct { #define EFI_PROPERTIES_TABLE_VERSION 0x00010000 #define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA 0x1 +typedef struct { + u16 version; + u16 length; + u32 runtime_services_supported; +} efi_rt_properties_table_t; + +#define EFI_RT_PROPERTIES_TABLE_VERSION 0x1 + #define EFI_INVALID_TABLE_ADDR (~0UL) typedef struct { -- cgit v1.2.3 From 148d3f716c208738d2c7ff8c80be18d89ec8a06e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Feb 2020 11:06:00 +0100 Subject: efi/libstub: Introduce symbolic constants for the stub major/minor version Now that we have added new ways to load the initrd or the mixed mode kernel, we will also need a way to tell the loader about this. Add symbolic constants for the PE/COFF major/minor version numbers (which fortunately have always been 0x0 for all architectures), so that we can bump them later to document the capabilities of the stub. Signed-off-by: Ard Biesheuvel --- include/linux/pe.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index c86bd3a2f70f..e0869f3eadd6 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -10,6 +10,9 @@ #include +#define LINUX_EFISTUB_MAJOR_VERSION 0x0 +#define LINUX_EFISTUB_MINOR_VERSION 0x0 + #define MZ_MAGIC 0x5a4d /* "MZ" */ #define PE_MAGIC 0x00004550 /* "PE\0\0" */ -- cgit v1.2.3 From dc235d62fc60a6549238eda7ff29769457fe5663 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Feb 2020 11:57:20 +0100 Subject: efi: Bump the Linux EFI stub major version number to #1 Now that we have introduced new, generic ways for the OS loader to interface with Linux kernels during boot, we need to record this fact in a way that allows loaders to discover this information, and fall back to the existing methods for older kernels. Signed-off-by: Ard Biesheuvel --- include/linux/pe.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index e0869f3eadd6..8ad71d763a77 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -10,7 +10,25 @@ #include -#define LINUX_EFISTUB_MAJOR_VERSION 0x0 +/* + * Linux EFI stub v1.0 adds the following functionality: + * - Loading initrd from the LINUX_EFI_INITRD_MEDIA_GUID device path, + * - Loading/starting the kernel from firmware that targets a different + * machine type, via the entrypoint exposed in the .compat PE/COFF section. + * + * The recommended way of loading and starting v1.0 or later kernels is to use + * the LoadImage() and StartImage() EFI boot services, and expose the initrd + * via the LINUX_EFI_INITRD_MEDIA_GUID device path. + * + * Versions older than v1.0 support initrd loading via the image load options + * (using initrd=, limited to the volume from which the kernel itself was + * loaded), or via arch specific means (bootparams, DT, etc). + * + * On x86, LoadImage() and StartImage() can be omitted if the EFI handover + * protocol is implemented, which can be inferred from the version, + * handover_offset and xloadflags fields in the bootparams structure. + */ +#define LINUX_EFISTUB_MAJOR_VERSION 0x1 #define LINUX_EFISTUB_MINOR_VERSION 0x0 #define MZ_MAGIC 0x5a4d /* "MZ" */ -- cgit v1.2.3 From 4f929d0877543df8a834afa5b8732d469c05cd84 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 20 Feb 2020 17:56:49 +0200 Subject: firmware: imx: Remove IMX_SC_RPC_SVC_ABORT This is not used by linux and not supported as part of imx SCU api, it was added by mistake. The constant value "9" has since been reassigned in firmware to a different service. Signed-off-by: Leonard Crestez Signed-off-by: Shawn Guo --- include/linux/firmware/imx/ipc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/imx/ipc.h b/include/linux/firmware/imx/ipc.h index 6312c8cb084a..891057434858 100644 --- a/include/linux/firmware/imx/ipc.h +++ b/include/linux/firmware/imx/ipc.h @@ -25,7 +25,6 @@ enum imx_sc_rpc_svc { IMX_SC_RPC_SVC_PAD = 6, IMX_SC_RPC_SVC_MISC = 7, IMX_SC_RPC_SVC_IRQ = 8, - IMX_SC_RPC_SVC_ABORT = 9 }; struct imx_sc_rpc_msg { -- cgit v1.2.3 From 5779dd0a7dbd71e82478fb0bf125cc6cd3c43266 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:34 +0530 Subject: PCI: endpoint: Use notification chain mechanism to notify EPC events to EPF Use atomic_notifier_call_chain() to notify EPC events like linkup to EPF driver instead of using linkup ops in EPF driver. This is in preparation for adding proper locking mechanism to EPF ops. This will also enable to add more events (in addition to linkup) in the future. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Tested-by: Vidya Sagar --- include/linux/pci-epc.h | 8 ++++++++ include/linux/pci-epf.h | 6 ++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 56f1846b9d39..36644ccd32ac 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -89,6 +89,7 @@ struct pci_epc_mem { * @max_functions: max number of functions that can be configured in this EPC * @group: configfs group representing the PCI EPC device * @lock: spinlock to protect pci_epc ops + * @notifier: used to notify EPF of any EPC events (like linkup) */ struct pci_epc { struct device dev; @@ -99,6 +100,7 @@ struct pci_epc { struct config_group *group; /* spinlock to protect against concurrent access of EP controller */ spinlock_t lock; + struct atomic_notifier_head notifier; }; /** @@ -141,6 +143,12 @@ static inline void *epc_get_drvdata(struct pci_epc *epc) return dev_get_drvdata(&epc->dev); } +static inline int +pci_epc_register_notifier(struct pci_epc *epc, struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&epc->notifier, nb); +} + struct pci_epc * __devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct module *owner); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 2d6f07556682..4993f7f6439b 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -55,13 +55,10 @@ struct pci_epf_header { * @bind: ops to perform when a EPC device has been bound to EPF device * @unbind: ops to perform when a binding has been lost between a EPC device * and EPF device - * @linkup: ops to perform when the EPC device has established a connection with - * a host system */ struct pci_epf_ops { int (*bind)(struct pci_epf *epf); void (*unbind)(struct pci_epf *epf); - void (*linkup)(struct pci_epf *epf); }; /** @@ -112,6 +109,7 @@ struct pci_epf_bar { * @epc: the EPC device to which this EPF device is bound * @driver: the EPF driver to which this EPF device is bound * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc + * @nb: notifier block to notify EPF of any EPC events (like linkup) */ struct pci_epf { struct device dev; @@ -125,6 +123,7 @@ struct pci_epf { struct pci_epc *epc; struct pci_epf_driver *driver; struct list_head list; + struct notifier_block nb; }; #define to_pci_epf(epf_dev) container_of((epf_dev), struct pci_epf, dev) @@ -154,5 +153,4 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar); int pci_epf_bind(struct pci_epf *epf); void pci_epf_unbind(struct pci_epf *epf); -void pci_epf_linkup(struct pci_epf *epf); #endif /* __LINUX_PCI_EPF_H */ -- cgit v1.2.3 From 3d3248dbd018502f654064c78efcd2e165ab3486 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:35 +0530 Subject: PCI: endpoint: Replace spinlock with mutex The pci_epc_ops is not intended to be invoked from interrupt context. Hence replace spin_lock_irqsave and spin_unlock_irqrestore with mutex_lock and mutex_unlock respectively. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/linux/pci-epc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 36644ccd32ac..9dd60f2e9705 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -88,7 +88,7 @@ struct pci_epc_mem { * @mem: address space of the endpoint controller * @max_functions: max number of functions that can be configured in this EPC * @group: configfs group representing the PCI EPC device - * @lock: spinlock to protect pci_epc ops + * @lock: mutex to protect pci_epc ops * @notifier: used to notify EPF of any EPC events (like linkup) */ struct pci_epc { @@ -98,8 +98,8 @@ struct pci_epc { struct pci_epc_mem *mem; u8 max_functions; struct config_group *group; - /* spinlock to protect against concurrent access of EP controller */ - spinlock_t lock; + /* mutex to protect against concurrent access of EP controller */ + struct mutex lock; struct atomic_notifier_head notifier; }; -- cgit v1.2.3 From 04e046ca57ebed3943422dee10eec9e73aec081e Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:36 +0530 Subject: PCI: endpoint: Fix for concurrent memory allocation in OB address region pci-epc-mem uses a bitmap to manage the Endpoint outbound (OB) address region. This address region will be shared by multiple endpoint functions (in the case of multi function endpoint) and it has to be protected from concurrent access to avoid updating an inconsistent state. Use a mutex to protect bitmap updates to prevent the memory allocation API from returning incorrect addresses. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # v4.14+ --- include/linux/pci-epc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 9dd60f2e9705..4e3e527c49d1 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -71,6 +71,7 @@ struct pci_epc_ops { * @bitmap: bitmap to manage the PCI address space * @pages: number of bits representing the address region * @page_size: size of each page + * @lock: mutex to protect bitmap */ struct pci_epc_mem { phys_addr_t phys_base; @@ -78,6 +79,8 @@ struct pci_epc_mem { unsigned long *bitmap; size_t page_size; int pages; + /* mutex to protect against concurrent access for memory allocation*/ + struct mutex lock; }; /** -- cgit v1.2.3 From 07301c982643a432212840a4b648b5d3f5a061fa Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:37 +0530 Subject: PCI: endpoint: Protect concurrent access to pci_epf_ops with mutex Protect concurrent access to pci_epf_ops with a mutex. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/linux/pci-epf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 4993f7f6439b..bcdf4f07bde7 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -110,6 +110,7 @@ struct pci_epf_bar { * @driver: the EPF driver to which this EPF device is bound * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc * @nb: notifier block to notify EPF of any EPC events (like linkup) + * @lock: mutex to protect pci_epf_ops */ struct pci_epf { struct device dev; @@ -124,6 +125,8 @@ struct pci_epf { struct pci_epf_driver *driver; struct list_head list; struct notifier_block nb; + /* mutex to protect against concurrent access of pci_epf_ops */ + struct mutex lock; }; #define to_pci_epf(epf_dev) container_of((epf_dev), struct pci_epf, dev) -- cgit v1.2.3 From 2499ee84e02774a8573b7b4c76c8f2ea38669313 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 24 Feb 2020 15:23:38 +0530 Subject: PCI: endpoint: Assign function number for each PF in EPC core The PCIe endpoint core relies on the drivers that invoke the pci_epc_add_epf() API to allocate and assign a function number to each physical function (PF). Since endpoint function device can be created by multiple mechanisms (configfs, devicetree, etc..), allowing each of these mechanisms to assign a function number would result in mutliple endpoint function devices having the same function number. In order to avoid this, let EPC core assign a function number to the endpoint device. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/linux/pci-epc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 4e3e527c49d1..ccaf6e3fa931 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -92,6 +92,7 @@ struct pci_epc_mem { * @max_functions: max number of functions that can be configured in this EPC * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops + * @function_num_map: bitmap to manage physical function number * @notifier: used to notify EPF of any EPC events (like linkup) */ struct pci_epc { @@ -103,6 +104,7 @@ struct pci_epc { struct config_group *group; /* mutex to protect against concurrent access of EP controller */ struct mutex lock; + unsigned long function_num_map; struct atomic_notifier_head notifier; }; -- cgit v1.2.3 From 0dacee1bfa70e171be3a12a30414c228453048d2 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 24 Feb 2020 09:52:17 +0000 Subject: sched/pelt: Remove unused runnable load average Now that runnable_load_avg is no more used, we can remove it to make space for a new signal. Signed-off-by: Vincent Guittot Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Reviewed-by: "Dietmar Eggemann " Acked-by: Peter Zijlstra Cc: Juri Lelli Cc: Valentin Schneider Cc: Phil Auld Cc: Hillf Danton Link: https://lore.kernel.org/r/20200224095223.13361-8-mgorman@techsingularity.net --- include/linux/sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 04278493bf15..037eaffabc24 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -357,7 +357,7 @@ struct util_est { /* * The load_avg/util_avg accumulates an infinite geometric series - * (see __update_load_avg() in kernel/sched/fair.c). + * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * @@ -401,11 +401,9 @@ struct util_est { struct sched_avg { u64 last_update_time; u64 load_sum; - u64 runnable_load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; - unsigned long runnable_load_avg; unsigned long util_avg; struct util_est util_est; } ____cacheline_aligned; @@ -449,7 +447,6 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; - unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; -- cgit v1.2.3 From 9f68395333ad7f5bfe2f83473fed363d4229f11c Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 24 Feb 2020 09:52:18 +0000 Subject: sched/pelt: Add a new runnable average signal Now that runnable_load_avg has been removed, we can replace it by a new signal that will highlight the runnable pressure on a cfs_rq. This signal track the waiting time of tasks on rq and can help to better define the state of rqs. At now, only util_avg is used to define the state of a rq: A rq with more that around 80% of utilization and more than 1 tasks is considered as overloaded. But the util_avg signal of a rq can become temporaly low after that a task migrated onto another rq which can bias the classification of the rq. When tasks compete for the same rq, their runnable average signal will be higher than util_avg as it will include the waiting time and we can use this signal to better classify cfs_rqs. The new runnable_avg will track the runnable time of a task which simply adds the waiting time to the running time. The runnable _avg of cfs_rq will be the /Sum of se's runnable_avg and the runnable_avg of group entity will follow the one of the rq similarly to util_avg. Signed-off-by: Vincent Guittot Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Reviewed-by: "Dietmar Eggemann " Acked-by: Peter Zijlstra Cc: Juri Lelli Cc: Valentin Schneider Cc: Phil Auld Cc: Hillf Danton Link: https://lore.kernel.org/r/20200224095223.13361-9-mgorman@techsingularity.net --- include/linux/sched.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 037eaffabc24..2e9199bf947b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -356,28 +356,30 @@ struct util_est { } __attribute__((__aligned__(sizeof(u64)))); /* - * The load_avg/util_avg accumulates an infinite geometric series + * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * - * where runnable% is the time ratio that a sched_entity is runnable. - * For cfs_rq, it is the aggregated load_avg of all runnable and - * blocked sched_entities. + * [runnable_avg definition] + * + * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * - * where running% is the time ratio that a sched_entity is running on - * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable - * and blocked sched_entities. + * where runnable% is the time ratio that a sched_entity is runnable and + * running% the time ratio that a sched_entity is running. + * + * For cfs_rq, they are the aggregated values of all runnable and blocked + * sched_entities. * - * load_avg and util_avg don't direcly factor frequency scaling and CPU - * capacity scaling. The scaling is done through the rq_clock_pelt that - * is used for computing those signals (see update_rq_clock_pelt()) + * The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU + * capacity scaling. The scaling is done through the rq_clock_pelt that is used + * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them @@ -401,9 +403,11 @@ struct util_est { struct sched_avg { u64 last_update_time; u64 load_sum; + u64 runnable_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; + unsigned long runnable_avg; unsigned long util_avg; struct util_est util_est; } ____cacheline_aligned; @@ -467,6 +471,8 @@ struct sched_entity { struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; + /* cached value of my_q->h_nr_running */ + unsigned long runnable_weight; #endif #ifdef CONFIG_SMP -- cgit v1.2.3 From 4e52889f48fedfa5f2e443dc8490550cb49fbdc3 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 13 Jan 2020 10:48:53 +0100 Subject: media: atmel: atmel-isc-base: expose white balance as v4l2 controls This exposes the white balance configuration of the ISC as v4l2 controls into userspace. There are 8 controls available: 4 gain controls, sliders, for each of the BAYER components: R, B, GR, GB. These gains are multipliers for each component, in format unsigned 0:4:9 with a default value of 512 (1.0 multiplier). 4 offset controls, sliders, for each of the BAYER components: R, B, GR, GB. These offsets are added/substracted from each component, in format signed 1:12:0 with a default value of 0 (+/- 0) To expose this to userspace, added 8 custom controls, in an auto cluster. To summarize the functionality: The auto cluster switch is the auto white balance control, and it works like this: AWB == 1: autowhitebalance is on, the do_white_balance button is inactive, the gains/offsets are inactive, but volatile and readable. Thus, the results of the whitebalance algorithm are available to userspace to read at any time. AWB == 0: autowhitebalance is off, cluster is in manual mode, user can configure the gain/offsets directly. More than that, if the do_white_balance button is pressed, the driver will perform one-time-adjustment, (preferably with color checker card) and the userspace can read again the new values. With this feature, the userspace can save the coefficients and reinstall them for example after reboot or reprobing the driver. [hverkuil: fix checkpatch warning] [hverkuil: minor spacing adjustments in the functionality description] Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/atmel-isc-media.h | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 include/linux/atmel-isc-media.h (limited to 'include/linux') diff --git a/include/linux/atmel-isc-media.h b/include/linux/atmel-isc-media.h new file mode 100644 index 000000000000..79a320fb724e --- /dev/null +++ b/include/linux/atmel-isc-media.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019 Microchip Technology Inc. and its subsidiaries + * + * Author: Eugen Hristev + */ + +#ifndef __LINUX_ATMEL_ISC_MEDIA_H__ +#define __LINUX_ATMEL_ISC_MEDIA_H__ + +/* + * There are 8 controls available: + * 4 gain controls, sliders, for each of the BAYER components: R, B, GR, GB. + * These gains are multipliers for each component, in format unsigned 0:4:9 with + * a default value of 512 (1.0 multiplier). + * 4 offset controls, sliders, for each of the BAYER components: R, B, GR, GB. + * These offsets are added/substracted from each component, in format signed + * 1:12:0 with a default value of 0 (+/- 0) + * + * To expose this to userspace, added 8 custom controls, in an auto cluster. + * + * To summarize the functionality: + * The auto cluster switch is the auto white balance control, and it works + * like this: + * AWB == 1: autowhitebalance is on, the do_white_balance button is inactive, + * the gains/offsets are inactive, but volatile and readable. + * Thus, the results of the whitebalance algorithm are available to userspace to + * read at any time. + * AWB == 0: autowhitebalance is off, cluster is in manual mode, user can + * configure the gain/offsets directly. + * More than that, if the do_white_balance button is + * pressed, the driver will perform one-time-adjustment, (preferably with color + * checker card) and the userspace can read again the new values. + * + * With this feature, the userspace can save the coefficients and reinstall them + * for example after reboot or reprobing the driver. + */ + +enum atmel_isc_ctrl_id { + /* Red component gain control */ + ISC_CID_R_GAIN = (V4L2_CID_USER_ATMEL_ISC_BASE + 0), + /* Blue component gain control */ + ISC_CID_B_GAIN, + /* Green Red component gain control */ + ISC_CID_GR_GAIN, + /* Green Blue gain control */ + ISC_CID_GB_GAIN, + /* Red component offset control */ + ISC_CID_R_OFFSET, + /* Blue component offset control */ + ISC_CID_B_OFFSET, + /* Green Red component offset control */ + ISC_CID_GR_OFFSET, + /* Green Blue component offset control */ + ISC_CID_GB_OFFSET, +}; + +#endif -- cgit v1.2.3 From 7bc3e6e55acf065500a24621f3b313e7e5998acf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 19 Feb 2020 18:22:26 -0600 Subject: proc: Use a list of inodes to flush from proc Rework the flushing of proc to use a list of directory inodes that need to be flushed. The list is kept on struct pid not on struct task_struct, as there is a fixed connection between proc inodes and pids but at least for the case of de_thread the pid of a task_struct changes. This removes the dependency on proc_mnt which allows for different mounts of proc having different mount options even in the same pid namespace and this allows for the removal of proc_mnt which will trivially the first mount of proc to honor it's mount options. This flushing remains an optimization. The functions pid_delete_dentry and pid_revalidate ensure that ordinary dcache management will not attempt to use dentries past the point their respective task has died. When unused the shrinker will eventually be able to remove these dentries. There is a case in de_thread where proc_flush_pid can be called early for a given pid. Which winds up being safe (if suboptimal) as this is just an optiimization. Only pid directories are put on the list as the other per pid files are children of those directories and d_invalidate on the directory will get them as well. So that the pid can be used during flushing it's reference count is taken in release_task and dropped in proc_flush_pid. Further the call of proc_flush_pid is moved after the tasklist_lock is released in release_task so that it is certain that the pid has already been unhashed when flushing it taking place. This removes a small race where a dentry could recreated. As struct pid is supposed to be small and I need a per pid lock I reuse the only lock that currently exists in struct pid the the wait_pidfd.lock. The net result is that this adds all of this functionality with just a little extra list management overhead and a single extra pointer in struct pid. v2: Initialize pid->inodes. I somehow failed to get that initialization into the initial version of the patch. A boot failure was reported by "kernel test robot ", and failure to initialize that pid->inodes matches all of the reported symptoms. Signed-off-by: Eric W. Biederman --- include/linux/pid.h | 1 + include/linux/proc_fs.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 998ae7d24450..01a0d4e28506 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -62,6 +62,7 @@ struct pid unsigned int level; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; + struct hlist_head inodes; /* wait queue for pidfd notifications */ wait_queue_head_t wait_pidfd; struct rcu_head rcu; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3dfa92633af3..40a7982b7285 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -32,7 +32,7 @@ struct proc_ops { typedef int (*proc_write_t)(struct file *, char *, size_t); extern void proc_root_init(void); -extern void proc_flush_task(struct task_struct *); +extern void proc_flush_pid(struct pid *); extern struct proc_dir_entry *proc_symlink(const char *, struct proc_dir_entry *, const char *); @@ -105,7 +105,7 @@ static inline void proc_root_init(void) { } -static inline void proc_flush_task(struct task_struct *task) +static inline void proc_flush_pid(struct pid *pid) { } -- cgit v1.2.3 From ce69e2162f158d9d4a0e513971d02dabc7d14cb7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 24 Feb 2020 16:53:58 -0600 Subject: mdio_bus: Add generic mdio_find_bus() It appears most ethernet drivers follow one of two main strategies for mdio bus/phy management. A monolithic model where the net driver itself creates, probes and uses the phy, and one where an external mdio/phy driver instantiates the mdio bus/phy and the net driver only attaches to a known phy. Usually in this latter model the phys are discovered via DT relationships or simply phy name/address hardcoding. This is a shame because modern well behaved mdio buses are self describing and can be probed. The mdio layer itself is fully capable of this, yet there isn't a clean way for a standalone net driver to attach and enumerate the discovered devices. This is because outside of of_mdio_find_bus() there isn't a straightforward way to acquire the mii_bus pointer. So, lets add a mdio_find_bus which can return the mii_bus based only on its name. Signed-off-by: Jeremy Linton Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 80f8b2158271..e72dbd0d2d6a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -289,6 +289,7 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) return devm_mdiobus_alloc_size(dev, 0); } +struct mii_bus *mdio_find_bus(const char *mdio_name); void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); -- cgit v1.2.3 From 3c58482a382bae89410439247152eb342e9872f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:41 +0100 Subject: bpf: Provide bpf_prog_run_pin_on_cpu() helper BPF programs require to run on one CPU to completion as they use per CPU storage, but according to Alexei they don't need reentrancy protection as obviously BPF programs running in thread context can always be 'preempted' by hard and soft interrupts and instrumentation and the same program can run concurrently on a different CPU. The currently used mechanism to ensure CPUness is to wrap the invocation into a preempt_disable/enable() pair. Disabling preemption is also disabling migration for a task. preempt_disable/enable() is used because there is no explicit way to reliably disable only migration. Provide a separate macro to invoke a BPF program which can be used in migrateable task context. It wraps BPF_PROG_RUN() in a migrate_disable/enable() pair which maps on non RT enabled kernels to preempt_disable/enable(). On RT enabled kernels this merely disables migration. Both methods ensure that the invoked BPF program runs on one CPU to completion. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.474592620@linutronix.de --- include/linux/filter.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f349e2c0884c..38f60188bb26 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); } \ ret; }) -#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ - bpf_dispatcher_nopfunc) +#define BPF_PROG_RUN(prog, ctx) \ + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + +/* + * Use in preemptible and therefore migratable context to make sure that + * the execution of the BPF program runs on one CPU. + * + * This uses migrate_disable/enable() explicitly to document that the + * invocation of a BPF program does not require reentrancy protection + * against a BPF program which is invoked from a preempting task. + * + * For non RT enabled kernels migrate_disable/enable() maps to + * preempt_disable/enable(), i.e. it disables also preemption. + */ +static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, + const void *ctx) +{ + u32 ret; + + migrate_disable(); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + migrate_enable(); + return ret; +} #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN -- cgit v1.2.3 From 37e1d9202225635772b32e340294208367279c2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:42 +0100 Subject: bpf: Replace cant_sleep() with cant_migrate() As already discussed in the previous change which introduced BPF_RUN_PROG_PIN_ON_CPU() BPF only requires to disable migration to guarantee per CPUness. If RT substitutes the preempt disable based migration protection then the cant_sleep() check will obviously trigger as preemption is not disabled. Replace it by cant_migrate() which maps to cant_sleep() on a non RT kernel and will verify that migration is disabled on a full RT kernel. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.583038889@linutronix.de --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 38f60188bb26..1982a52eb4c9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ u32 ret; \ - cant_sleep(); \ + cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ struct bpf_prog_stats *stats; \ u64 start = sched_clock(); \ -- cgit v1.2.3 From 3d9f773cf2876c01a505b9fe27270901d464e90a Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:43 +0100 Subject: bpf: Use bpf_prog_run_pin_on_cpu() at simple call sites. All of these cases are strictly of the form: preempt_disable(); BPF_PROG_RUN(...); preempt_enable(); Replace this with bpf_prog_run_pin_on_cpu() which wraps BPF_PROG_RUN() with: migrate_disable(); BPF_PROG_RUN(...); migrate_enable(); On non RT enabled kernels this maps to preempt_disable/enable() and on RT enabled kernels this solely prevents migration, which is sufficient as there is no requirement to prevent reentrancy to any BPF program from a preempting task. The only requirement is that the program stays on the same CPU. Therefore, this is a trivially correct transformation. The seccomp loop does not need protection over the loop. It only needs protection per BPF filter program [ tglx: Converted to bpf_prog_run_pin_on_cpu() ] Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.691493094@linutronix.de --- include/linux/filter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1982a52eb4c9..9270de2a0df8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -717,9 +717,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - preempt_disable(); - res = BPF_PROG_RUN(prog, skb); - preempt_enable(); + res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } -- cgit v1.2.3 From 2a916f2f546ca1c1e3323e2a4269307f6d9890eb Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 24 Feb 2020 15:01:46 +0100 Subject: bpf: Use migrate_disable/enable in array macros and cgroup/lirc code. Replace the preemption disable/enable with migrate_disable/enable() to reflect the actual requirement and to allow PREEMPT_RT to substitute it with an actual migration disable mechanism which does not disable preemption. Including the code paths that go via __bpf_prog_run_save_cb(). Signed-off-by: David S. Miller Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145643.998293311@linutronix.de --- include/linux/bpf.h | 8 ++++---- include/linux/filter.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49b1a70e12c8..76b3a0eb1502 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ @@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, } \ _out: \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ _ret; \ }) @@ -932,7 +932,7 @@ _out: \ u32 ret; \ u32 _ret = 1; \ u32 _cn = 0; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ @@ -944,7 +944,7 @@ _out: \ _item++; \ } \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ if (_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 9270de2a0df8..43b5e455d2f5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -677,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } +/* Must be invoked with migration disabled */ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { @@ -702,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, { u32 res; - preempt_disable(); + migrate_disable(); res = __bpf_prog_run_save_cb(prog, skb); - preempt_enable(); + migrate_enable(); return res; } -- cgit v1.2.3 From c518cfa0c5ad75ddf3d743f1e35b9cf5fc2c346e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Feb 2020 15:01:47 +0100 Subject: bpf: Provide recursion prevention helpers The places which need to prevent the execution of trace type BPF programs to prevent deadlocks on the hash bucket lock do this open coded. Provide two inline functions, bpf_disable/enable_instrumentation() to replace these open coded protection constructs. Use migrate_disable/enable() instead of preempt_disable/enable() right away so this works on RT enabled kernels. On a !RT kernel migrate_disable / enable() are mapped to preempt_disable/enable(). These helpers use this_cpu_inc/dec() instead of __this_cpu_inc/dec() on an RT enabled kernel because migrate disabled regions are preemptible and preemption might hit in the middle of a RMW operation which can lead to inconsistent state. Signed-off-by: Thomas Gleixner Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200224145644.103910133@linutronix.de --- include/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76b3a0eb1502..1acd5bf70350 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -961,6 +961,36 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +/* + * Block execution of BPF programs attached to instrumentation (perf, + * kprobes, tracepoints) to prevent deadlocks on map operations as any of + * these events can happen inside a region which holds a map bucket lock + * and can deadlock on it. + * + * Use the preemption safe inc/dec variants on RT because migrate disable + * is preemptible on RT and preemption in the middle of the RMW operation + * might lead to inconsistent state. Use the raw variants for non RT + * kernels as migrate_disable() maps to preempt_disable() so the slightly + * more expensive save operation can be avoided. + */ +static inline void bpf_disable_instrumentation(void) +{ + migrate_disable(); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_inc(bpf_prog_active); + else + __this_cpu_inc(bpf_prog_active); +} + +static inline void bpf_enable_instrumentation(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_dec(bpf_prog_active); + else + __this_cpu_dec(bpf_prog_active); + migrate_enable(); +} + extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; -- cgit v1.2.3 From 0231453bc08f63584545dda1c05d61b19755d3a9 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Tue, 14 Jan 2020 18:08:43 -0600 Subject: soundwire: bus: add clock stop helpers SoundWire supports two clock stop modes. Add support to handle the clock stop modes and add pm_runtime calls in the bus. Credits: this patch is based on an earlier internal contribution by Vinod Koul, Sanyog Kale, Shreyas Nc and Hardik Shah. Signed-off-by: Bard Liao Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200115000844.14695-10-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index b451bb622335..b8427df034ce 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -79,6 +79,21 @@ enum sdw_slave_status { SDW_SLAVE_RESERVED = 3, }; +/** + * enum sdw_clk_stop_type: clock stop operations + * + * @SDW_CLK_PRE_PREPARE: pre clock stop prepare + * @SDW_CLK_POST_PREPARE: post clock stop prepare + * @SDW_CLK_PRE_DEPREPARE: pre clock stop de-prepare + * @SDW_CLK_POST_DEPREPARE: post clock stop de-prepare + */ +enum sdw_clk_stop_type { + SDW_CLK_PRE_PREPARE = 0, + SDW_CLK_POST_PREPARE, + SDW_CLK_PRE_DEPREPARE, + SDW_CLK_POST_DEPREPARE, +}; + /** * enum sdw_command_response - Command response as defined by SDW spec * @SDW_CMD_OK: cmd was successful @@ -533,6 +548,11 @@ struct sdw_slave_ops { int (*port_prep)(struct sdw_slave *slave, struct sdw_prepare_ch *prepare_ch, enum sdw_port_prep_ops pre_ops); + int (*get_clk_stop_mode)(struct sdw_slave *slave); + int (*clk_stop)(struct sdw_slave *slave, + enum sdw_clk_stop_mode mode, + enum sdw_clk_stop_type type); + }; /** @@ -575,6 +595,7 @@ struct sdw_slave { #endif struct list_head node; struct completion *port_ready; + enum sdw_clk_stop_mode curr_clk_stop_mode; u16 dev_num; u16 dev_num_sticky; bool probed; @@ -892,6 +913,9 @@ int sdw_prepare_stream(struct sdw_stream_runtime *stream); int sdw_enable_stream(struct sdw_stream_runtime *stream); int sdw_disable_stream(struct sdw_stream_runtime *stream); int sdw_deprepare_stream(struct sdw_stream_runtime *stream); +int sdw_bus_prep_clk_stop(struct sdw_bus *bus); +int sdw_bus_clk_stop(struct sdw_bus *bus); +int sdw_bus_exit_clk_stop(struct sdw_bus *bus); /* messaging and data APIs */ -- cgit v1.2.3 From 3d5f7d9f6a38ddcc105ebfb23b640630bbabba65 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 17 Feb 2020 17:40:32 +0530 Subject: PCI: endpoint: Add core init notifying feature Add a new feature core_init_notifier for cores that can notify about their availability for initialization. Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index ccaf6e3fa931..9ffe6bd081ae 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -120,6 +120,7 @@ struct pci_epc { */ struct pci_epc_features { unsigned int linkup_notifier : 1; + unsigned int core_init_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; u8 reserved_bar; -- cgit v1.2.3 From 0ef22dcf0c1871888c4c0ee46a9d9c494f2fe997 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 17 Feb 2020 17:40:34 +0530 Subject: PCI: endpoint: Add notification for core init completion Add support to send notifications to EPF from EPC once the core registers initialization is complete. Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 1 + include/linux/pci-epf.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 9ffe6bd081ae..0d7e91bad91e 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -165,6 +165,7 @@ void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc); void pci_epc_destroy(struct pci_epc *epc); int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf); void pci_epc_linkup(struct pci_epc *epc); +void pci_epc_init_notify(struct pci_epc *epc); void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, struct pci_epf_header *hdr); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index bcdf4f07bde7..0c628e30c582 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -15,6 +15,11 @@ struct pci_epf; +enum pci_notify_event { + CORE_INIT, + LINK_UP, +}; + enum pci_barno { BAR_0, BAR_1, -- cgit v1.2.3 From 2c6cff682d6681fb1cdb03b3cdbbecd3fb0e4c89 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 25 Feb 2020 11:00:41 -0600 Subject: soundwire: add helper macros for devID fields Move bit extractors to macros, so that the definitions can be used by other drivers parsing the MIPI definitions extracted from firmware tables (ACPI or DT). Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200225170041.23644-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index b451bb622335..56273c5c1f6b 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -424,6 +424,29 @@ struct sdw_slave_id { __u8 sdw_version:4; }; +/* + * Helper macros to extract the MIPI-defined IDs + * + * Spec definition + * Register Bit Contents + * DevId_0 [7:4] 47:44 sdw_version + * DevId_0 [3:0] 43:40 unique_id + * DevId_1 39:32 mfg_id [15:8] + * DevId_2 31:24 mfg_id [7:0] + * DevId_3 23:16 part_id [15:8] + * DevId_4 15:08 part_id [7:0] + * DevId_5 07:00 class_id + * + * The MIPI DisCo for SoundWire defines in addition the link_id as bits 51:48 + */ + +#define SDW_DISCO_LINK_ID(adr) (((adr) >> 48) & GENMASK(3, 0)) +#define SDW_VERSION(adr) (((adr) >> 44) & GENMASK(3, 0)) +#define SDW_UNIQUE_ID(adr) (((adr) >> 40) & GENMASK(3, 0)) +#define SDW_MFG_ID(adr) (((adr) >> 24) & GENMASK(15, 0)) +#define SDW_PART_ID(adr) (((adr) >> 8) & GENMASK(15, 0)) +#define SDW_CLASS_ID(adr) ((adr) & GENMASK(7, 0)) + /** * struct sdw_slave_intr_status - Slave interrupt status * @control_port: control port status -- cgit v1.2.3 From 5ee0caf161a9a21b9410b29f3cae8e1e98d1b100 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 26 Feb 2020 13:24:21 +0200 Subject: video: hdmi: Change return type of hdmi_avi_infoframe_init() to void The hdmi_avi_infoframe_init() never needs to return an error, change its return type to void. Signed-off-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Acked-by: Bartlomiej Zolnierkiewicz Reviewed-by: Boris Brezillon Acked-by: Sam Ravnborg Tested-by: Sebastian Reichel Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20200226112514.12455-2-laurent.pinchart@ideasonboard.com --- include/linux/hdmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 9918a6c910c5..9613d796cfb1 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -207,7 +207,7 @@ struct hdmi_drm_infoframe { u16 max_fall; }; -int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame); +void hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame); ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, size_t size); ssize_t hdmi_avi_infoframe_pack_only(const struct hdmi_avi_infoframe *frame, -- cgit v1.2.3 From c7896490dd1a4e6b346e8a475e4a433356362770 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 7 Jan 2020 15:10:28 +0100 Subject: leds: ns2: Absorb platform data Nothing in the kernel includes the external header so just push the contents into the ns2 leds driver. If someone wants to use platform data or board files to describe this device they should be able to do so using GPIO machine descriptors but in any case device tree should be the way forward for these systems in all cases I can think of, and the driver already supports that. Cc: Simon Guinot Cc: Vincent Donnefort Signed-off-by: Linus Walleij Tested-by: Simon Guinot Signed-off-by: Pavel Machek --- include/linux/platform_data/leds-kirkwood-ns2.h | 38 ------------------------- 1 file changed, 38 deletions(-) delete mode 100644 include/linux/platform_data/leds-kirkwood-ns2.h (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-kirkwood-ns2.h b/include/linux/platform_data/leds-kirkwood-ns2.h deleted file mode 100644 index eb8a6860e816..000000000000 --- a/include/linux/platform_data/leds-kirkwood-ns2.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Platform data structure for Network Space v2 LED driver - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __LEDS_KIRKWOOD_NS2_H -#define __LEDS_KIRKWOOD_NS2_H - -enum ns2_led_modes { - NS_V2_LED_OFF, - NS_V2_LED_ON, - NS_V2_LED_SATA, -}; - -struct ns2_led_modval { - enum ns2_led_modes mode; - int cmd_level; - int slow_level; -}; - -struct ns2_led { - const char *name; - const char *default_trigger; - unsigned cmd; - unsigned slow; - int num_modes; - struct ns2_led_modval *modval; -}; - -struct ns2_led_platform_data { - int num_leds; - struct ns2_led *leds; -}; - -#endif /* __LEDS_KIRKWOOD_NS2_H */ -- cgit v1.2.3 From feaa8baee82ababa46af95b03cfc28680ad647a6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 24 Feb 2020 12:58:03 -0800 Subject: bus: ti-sysc: Implement SoC revision handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to know SoC type and features for cases where the same SoC may be installed in various versions on the same board and would need a separate dts file otherwise for the different variants. For example, am3703 is pin compatible with omap3630, but has sgx and iva accelerators disabled. We must not try to access the sgx or iva module registers on am3703, and need to set the unavailable devices disabled early. Let's also detect omap3430 as that is needed for display subsystem (DSS) reset later on, and GP vs EMU or HS devices. Further SoC specific disabled device detection can be added as needed, such as dra71x vs dra76x rtc and usb4. Cc: Adam Ford Cc: André Hentschel Cc: H. Nikolaus Schaller Cc: Keerthy Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 2cbde6542849..accab5325cf3 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -141,6 +141,7 @@ struct clk; struct ti_sysc_platform_data { struct of_dev_auxdata *auxdata; + bool (*soc_type_gp)(void); int (*init_clockdomain)(struct device *dev, struct clk *fck, struct clk *ick, struct ti_sysc_cookie *cookie); void (*clkdm_deny_idle)(struct device *dev, -- cgit v1.2.3 From e8639e1c986a8a9d0f94549170f6db579376c3ae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 24 Feb 2020 12:58:03 -0800 Subject: bus: ti-sysc: Handle module unlock quirk needed for some RTC The RTC modules on am3 and am4 need quirk handling to unlock and lock them for reset so let's add the quirk handling based on what we already have for legacy platform data. In later patches we will simply drop the RTC related platform data and the old quirk handling. Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index accab5325cf3..0b33c3b7302f 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -49,6 +49,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) #define SYSC_QUIRK_CLKDM_NOAUTO BIT(21) #define SYSC_QUIRK_FORCE_MSTANDBY BIT(20) #define SYSC_MODULE_QUIRK_AESS BIT(19) -- cgit v1.2.3 From 7324a7a0d5e232551eedad69fea3e4b91973d7c6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 24 Feb 2020 12:58:03 -0800 Subject: bus: ti-sysc: Implement display subsystem reset quirk The display subsystem (DSS) needs the child outputs disabled for reset. In order to prepare to probe DSS without legacy platform data, let's implement sysc_pre_reset_quirk_dss() similar to what we have for the platform data with omap_dss_reset(). Note that we cannot directly use the old omap_dss_reset() without platform data callbacks and updating omap_dss_reset() to understand struct device. And we will be dropping omap_dss_reset() anyways when all the SoCs are probing with device tree, so let's not mess with the legacy code at all. Cc: Jyri Sarha Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 0b33c3b7302f..ecd3a979a14d 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -49,6 +49,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) #define SYSC_QUIRK_CLKDM_NOAUTO BIT(21) #define SYSC_QUIRK_FORCE_MSTANDBY BIT(20) -- cgit v1.2.3 From f70ce185687bbe4e2d7ff126a8c890631f5fc2af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:11 +0100 Subject: sysfs: add sysfs_file_change_owner() Add helpers to change the owner of a sysfs files. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fa7ee503fb76..a7884024a911 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -310,6 +310,9 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) return kernfs_enable_ns(kn); } +int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, + kgid_t kgid); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) @@ -522,6 +525,13 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) { } +static inline int sysfs_file_change_owner(struct kobject *kobj, + const char *name, kuid_t kuid, + kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 0666a3aee762cd4f7981c2eed0fd8cab87533539 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:12 +0100 Subject: sysfs: add sysfs_link_change_owner() Add a helper to change the owner of a sysfs link. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index a7884024a911..7e15ebfd750e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -312,6 +312,8 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, + const char *name, kuid_t kuid, kgid_t kgid); #else /* CONFIG_SYSFS */ @@ -532,6 +534,14 @@ static inline int sysfs_file_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_link_change_owner(struct kobject *kobj, + struct kobject *targ, + const char *name, kuid_t kuid, + kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 303a42769c4c4d8e5e3ad928df87eb36f8c1fa60 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:13 +0100 Subject: sysfs: add sysfs_group{s}_change_owner() Add helpers to change the owner of sysfs groups. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 7e15ebfd750e..3fcaabdb05ef 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -314,6 +314,12 @@ int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_groups_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid); +int sysfs_group_change_owner(struct kobject *kobj, + const struct attribute_group *groups, kuid_t kuid, + kgid_t kgid); #else /* CONFIG_SYSFS */ @@ -542,6 +548,20 @@ static inline int sysfs_link_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_groups_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid) +{ + return 0; +} + +static inline int sysfs_group_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 2c4f9401ceb00167a3bfd322a28aa87b646a253f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:14 +0100 Subject: sysfs: add sysfs_change_owner() Add a helper to change the owner of sysfs objects. This function will be used to correctly account for kobject ownership changes, e.g. when moving network devices between network namespaces. This mirrors how a kobject is added through driver core which in its guts is done via kobject_add_internal() which in summary creates the main directory via create_dir(), populates that directory with the groups associated with the ktype of the kobject (if any) and populates the directory with the basic attributes associated with the ktype of the kobject (if any). These are the basic steps that are associated with adding a kobject in sysfs. Any additional properties are added by the specific subsystem itself (not by driver core) after it has registered the device. So for the example of network devices, a network device will e.g. register a queue subdirectory under the basic sysfs directory for the network device and than further subdirectories within that queues subdirectory. But that is all specific to network devices and they call the corresponding sysfs functions to do that directly when they create those queue objects. So anything that a subsystem adds outside of what driver core does must also be changed by it (That's already true for removal of files it created outside of driver core.) and it's the same for ownership changes. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/sysfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 3fcaabdb05ef..9e531ec76274 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -312,6 +312,7 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) int sysfs_file_change_owner(struct kobject *kobj, const char *name, kuid_t kuid, kgid_t kgid); +int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_groups_change_owner(struct kobject *kobj, @@ -548,6 +549,11 @@ static inline int sysfs_link_change_owner(struct kobject *kobj, return 0; } +static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid) +{ + return 0; +} + static inline int sysfs_groups_change_owner(struct kobject *kobj, const struct attribute_group **groups, kuid_t kuid, kgid_t kgid) -- cgit v1.2.3 From b8f33e5d76a7a1b87e0cc760d05bf2477b4e91d6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Feb 2020 04:37:15 +0100 Subject: device: add device_change_owner() Add a helper to change the owner of a device's sysfs entries. This needs to happen when the ownership of a device is changed, e.g. when moving network devices between network namespaces. This function will be used to correctly account for ownership changes, e.g. when moving network devices between network namespaces. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0cd7c647c16c..3e40533d2037 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -817,6 +817,7 @@ extern struct device *device_find_child_by_name(struct device *parent, extern int device_rename(struct device *dev, const char *new_name); extern int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); +extern int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); extern const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp); -- cgit v1.2.3 From cc2550b421aa30e3da67e5a7f6d14f3ecd3527b3 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Thu, 27 Feb 2020 16:29:02 +0530 Subject: clocksource: Replace setup_irq() by request_irq() request_irq() is preferred over setup_irq(). The early boot setup_irq() invocations happen either via 'init_IRQ()' or 'time_init()', while memory allocators are ready by 'mm_init()'. Per tglx[1], setup_irq() existed in olden days when allocators were not ready by the time early interrupts were initialized. Hence replace setup_irq() by request_irq(). Seldom remove_irq() usage has been observed coupled with setup_irq(), wherever that has been found, it too has been replaced by free_irq(). A build error that was reported by kbuild test robot in the previous version of the patch also has been fixed. [1] https://lkml.kernel.org/r/alpine.DEB.2.20.1710191609480.1971@nanos Signed-off-by: afzal mohammed Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/91961c77c1cf93d41523f5e1ac52043f32f97077.1582799709.git.afzal.mohd.ma@gmail.com --- include/linux/dw_apb_timer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index 14f072edbca5..82ebf9223948 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -25,7 +25,6 @@ struct dw_apb_timer { struct dw_apb_clock_event_device { struct clock_event_device ced; struct dw_apb_timer timer; - struct irqaction irqaction; void (*eoi)(struct dw_apb_timer *); }; -- cgit v1.2.3 From bb42df4662a4476531147c3d9caa1940f7ad36bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 3 Jul 2018 16:42:26 +0200 Subject: dma-buf: add dynamic DMA-buf handling v15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the exporter side we add optional explicit pinning callbacks. Which are called when the importer doesn't implement dynamic handling, move notification or need the DMA-buf locked in place for its use case. On the importer side we add an optional move_notify callback. This callback is used by the exporter to inform the importers that their mappings should be destroyed as soon as possible. This allows the exporter to provide the mappings without the need to pin the backing store. v2: don't try to invalidate mappings when the callback is NULL, lock the reservation obj while using the attachments, add helper to set the callback v3: move flag for invalidation support into the DMA-buf, use new attach_info structure to set the callback v4: use importer_priv field instead of mangling exporter priv. v5: drop invalidation_supported flag v6: squash together with pin/unpin changes v7: pin/unpin takes an attachment now v8: nuke dma_buf_attachment_(map|unmap)_locked, everything is now handled backward compatible v9: always cache when export/importer don't agree on dynamic handling v10: minimal style cleanup v11: drop automatically re-entry avoidance v12: rename callback to move_notify v13: add might_lock in appropriate places v14: rebase on separated locking change v15: add EXPERIMENTAL flag, some more code comments Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/353993/?series=73646&rev=1 --- include/linux/dma-buf.h | 82 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index abf5459a5b9d..b38cea240b67 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -93,14 +93,41 @@ struct dma_buf_ops { */ void (*detach)(struct dma_buf *, struct dma_buf_attachment *); + /** + * @pin: + * + * This is called by dma_buf_pin and lets the exporter know that the + * DMA-buf can't be moved any more. + * + * This is called with the dmabuf->resv object locked. + * + * This callback is optional and should only be used in limited use + * cases like scanout and not for temporary pin operations. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ + int (*pin)(struct dma_buf_attachment *attach); + + /** + * @unpin: + * + * This is called by dma_buf_unpin and lets the exporter know that the + * DMA-buf can be moved again. + * + * This is called with the dmabuf->resv object locked. + * + * This callback is optional. + */ + void (*unpin)(struct dma_buf_attachment *attach); + /** * @map_dma_buf: * * This is called by dma_buf_map_attachment() and is used to map a * shared &dma_buf into device address space, and it is mandatory. It - * can only be called if @attach has been called successfully. This - * essentially pins the DMA buffer into place, and it cannot be moved - * any more + * can only be called if @attach has been called successfully. * * This call may sleep, e.g. when the backing storage first needs to be * allocated, or moved to a location suitable for all currently attached @@ -141,9 +168,8 @@ struct dma_buf_ops { * * This is called by dma_buf_unmap_attachment() and should unmap and * release the &sg_table allocated in @map_dma_buf, and it is mandatory. - * It should also unpin the backing storage if this is the last mapping - * of the DMA buffer, it the exporter supports backing storage - * migration. + * For static dma_buf handling this might also unpins the backing + * storage if this is the last mapping of the DMA buffer. */ void (*unmap_dma_buf)(struct dma_buf_attachment *, struct sg_table *, @@ -311,6 +337,34 @@ struct dma_buf { } cb_excl, cb_shared; }; +/** + * struct dma_buf_attach_ops - importer operations for an attachment + * @move_notify: [optional] notification that the DMA-buf is moving + * + * Attachment operations implemented by the importer. + */ +struct dma_buf_attach_ops { + /** + * @move_notify + * + * If this callback is provided the framework can avoid pinning the + * backing store while mappings exists. + * + * This callback is called with the lock of the reservation object + * associated with the dma_buf held and the mapping function must be + * called with this lock held as well. This makes sure that no mapping + * is created concurrently with an ongoing move operation. + * + * Mappings stay valid and are not directly affected by this callback. + * But the DMA-buf can now be in a different physical location, so all + * mappings should be destroyed and re-created as soon as possible. + * + * New mappings can be created after this callback returns, and will + * point to the new location of the DMA-buf. + */ + void (*move_notify)(struct dma_buf_attachment *attach); +}; + /** * struct dma_buf_attachment - holds device-buffer attachment data * @dmabuf: buffer for this attachment. @@ -319,8 +373,9 @@ struct dma_buf { * @sgt: cached mapping. * @dir: direction of cached mapping. * @priv: exporter specific attachment data. - * @dynamic_mapping: true if dma_buf_map/unmap_attachment() is called with the - * dma_resv lock held. + * @importer_ops: importer operations for this attachment, if provided + * dma_buf_map/unmap_attachment() must be called with the dma_resv lock held. + * @importer_priv: importer specific attachment data. * * This structure holds the attachment information between the dma_buf buffer * and its user device(s). The list contains one attachment struct per device @@ -337,7 +392,8 @@ struct dma_buf_attachment { struct list_head node; struct sg_table *sgt; enum dma_data_direction dir; - bool dynamic_mapping; + const struct dma_buf_attach_ops *importer_ops; + void *importer_priv; void *priv; }; @@ -399,6 +455,7 @@ static inline void get_dma_buf(struct dma_buf *dmabuf) */ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) { + /* TODO: switch to using pin/unpin functions as indicator. */ return dmabuf->ops->dynamic_mapping; } @@ -413,16 +470,19 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) static inline bool dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) { - return attach->dynamic_mapping; + return !!attach->importer_ops; } struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, - bool dynamic_mapping); + const struct dma_buf_attach_ops *importer_ops, + void *importer_priv); void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach); +int dma_buf_pin(struct dma_buf_attachment *attach); +void dma_buf_unpin(struct dma_buf_attachment *attach); struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info); -- cgit v1.2.3 From bd2275eeed5b2d33eb7718e3562bf39e46ee64d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 18 Feb 2020 16:57:24 +0100 Subject: dma-buf: drop dynamic_mapping flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead use the pin() callback to detect dynamic DMA-buf handling. Since amdgpu is now migrated it doesn't make much sense to keep the extra flag. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/353997/?series=73646&rev=1 --- include/linux/dma-buf.h | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index b38cea240b67..1ade486fc2bb 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -42,18 +42,6 @@ struct dma_buf_ops { */ bool cache_sgt_mapping; - /** - * @dynamic_mapping: - * - * If true the framework makes sure that the map/unmap_dma_buf - * callbacks are always called with the dma_resv object locked. - * - * If false the framework makes sure that the map/unmap_dma_buf - * callbacks are always called without the dma_resv object locked. - * Mutual exclusive with @cache_sgt_mapping. - */ - bool dynamic_mapping; - /** * @attach: * @@ -99,7 +87,8 @@ struct dma_buf_ops { * This is called by dma_buf_pin and lets the exporter know that the * DMA-buf can't be moved any more. * - * This is called with the dmabuf->resv object locked. + * This is called with the dmabuf->resv object locked and is mutual + * exclusive with @cache_sgt_mapping. * * This callback is optional and should only be used in limited use * cases like scanout and not for temporary pin operations. @@ -116,7 +105,8 @@ struct dma_buf_ops { * This is called by dma_buf_unpin and lets the exporter know that the * DMA-buf can be moved again. * - * This is called with the dmabuf->resv object locked. + * This is called with the dmabuf->resv object locked and is mutual + * exclusive with @cache_sgt_mapping. * * This callback is optional. */ @@ -455,8 +445,7 @@ static inline void get_dma_buf(struct dma_buf *dmabuf) */ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) { - /* TODO: switch to using pin/unpin functions as indicator. */ - return dmabuf->ops->dynamic_mapping; + return !!dmabuf->ops->pin; } /** -- cgit v1.2.3 From 06ee7a950b6a342cd79590e7243bdda850141967 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 Dec 2019 21:07:52 -0600 Subject: ARM: OMAP2+: pm33xx-core: Add cpuidle_ops for am335x/am437x am335x and am437x can now make use of the generic cpuidle-arm driver. This requires that we define init and suspend ops to be passed set as the cpuidle ops for the SoC. These ops are invoked directly at the last stage of the cpuidle-arm driver in order to allow low level platform code to run and bring the CPU the rest of the way into it's desired idle state. It is required that the CPUIDLE_METHOD_OF_DECLARE be called from code that is built in so define these ops in pm33xx-core where the always built-in portion of the PM code for these SoCs lives. Additionally, although an soc_suspend function is already exposed by the pm33xx platform code, it contains additional operations needed for full SoC suspend beyond what is needed for a relatively simple CPU suspend needed during cpuidle. To get around this introduce cpu_suspend ops to be used by the am335x and am437x PM driver for the last stage of cpuidle path. Acked-by: Santosh Shilimkar Signed-off-by: Dave Gerlach Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- include/linux/platform_data/pm33xx.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h index dd5971937a64..8e59f2db2adc 100644 --- a/include/linux/platform_data/pm33xx.h +++ b/include/linux/platform_data/pm33xx.h @@ -49,6 +49,9 @@ struct am33xx_pm_platform_data { int (*init)(void); int (*soc_suspend)(unsigned int state, int (*fn)(unsigned long), unsigned long args); + int (*cpu_suspend)(int (*fn)(unsigned long), unsigned long args); + void (*begin_suspend)(void); + void (*finish_suspend)(void); struct am33xx_pm_sram_addr *(*get_sram_addrs)(void); void __iomem *(*get_rtc_base_addr)(void); void (*save_context)(void); -- cgit v1.2.3 From 65880ab160838e0764138894ef4450abdbed4af5 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 Dec 2019 21:07:53 -0600 Subject: ARM: OMAP2+: pm33xx-core: Extend platform_data ops for cpuidle In order for am335x and am437x to properly enter deeper c-states in cpuidle they must always call into the sleep33/43xx suspend code and also sometimes invoke the wkup_m3_ipc driver. These are both controlled by the pm33xx module so we must provide a method for the platform code to call back into the module when it is available as the core cpuidle ops that are invoked by the cpuidle-arm driver must remain as built in. Extend the init platform op to take an idle function as an argument so that we can use this to call into the pm33xx module for c-states that need it. Also add a deinit op so we can unregister this idle function from the PM core when the pm33xx module gets unloaded. Acked-by: Santosh Shilimkar Signed-off-by: Dave Gerlach Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- include/linux/platform_data/pm33xx.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h index 8e59f2db2adc..644af1d89cfa 100644 --- a/include/linux/platform_data/pm33xx.h +++ b/include/linux/platform_data/pm33xx.h @@ -46,7 +46,8 @@ struct am33xx_pm_sram_addr { }; struct am33xx_pm_platform_data { - int (*init)(void); + int (*init)(int (*idle)(u32 wfi_flags)); + int (*deinit)(void); int (*soc_suspend)(unsigned int state, int (*fn)(unsigned long), unsigned long args); int (*cpu_suspend)(int (*fn)(unsigned long), unsigned long args); -- cgit v1.2.3 From 91a208f2185ad4855ff03c342d0b7e4f5fc6f5df Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 10:23:41 +0000 Subject: net: phylink: propagate resolved link config via mac_link_up() Propagate the resolved link parameters via the mac_link_up() call for MACs that do not automatically track their PCS state. We propagate the link parameters via function arguments so that inappropriate members of struct phylink_link_state can't be accessed, and creating a new structure just for this adds needless complexity to the API. Tested-by: Andre Przywara Tested-by: Alexandre Belloni Tested-by: Vladimir Oltean Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 57 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 812357c03df4..2180eb1aa254 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -91,9 +91,10 @@ struct phylink_mac_ops { void (*mac_an_restart)(struct phylink_config *config); void (*mac_link_down)(struct phylink_config *config, unsigned int mode, phy_interface_t interface); - void (*mac_link_up)(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); + void (*mac_link_up)(struct phylink_config *config, + struct phy_device *phy, unsigned int mode, + phy_interface_t interface, int speed, int duplex, + bool tx_pause, bool rx_pause); }; #if 0 /* For kernel-doc purposes only. */ @@ -152,6 +153,9 @@ void mac_pcs_get_state(struct phylink_config *config, * guaranteed to be correct, and so any mac_config() implementation must * never reference these fields. * + * (this requires a rewrite - please refer to mac_link_up() for situations + * where the PCS and MAC are not tightly integrated.) + * * In all negotiation modes, as defined by @mode, @state->pause indicates the * pause settings which should be applied as follows. If %MLO_PAUSE_AN is not * set, %MLO_PAUSE_TX and %MLO_PAUSE_RX indicate whether the MAC should send @@ -162,12 +166,20 @@ void mac_pcs_get_state(struct phylink_config *config, * The action performed depends on the currently selected mode: * * %MLO_AN_FIXED, %MLO_AN_PHY: - * Configure the specified @state->speed and @state->duplex over a link - * specified by @state->interface. @state->advertising may be used, but - * is not required. Pause modes as above. Other members of @state must - * be ignored. + * Configure for non-inband negotiation mode, where the link settings + * are completely communicated via mac_link_up(). The physical link + * protocol from the MAC is specified by @state->interface. + * + * @state->advertising may be used, but is not required. + * + * Older drivers (prior to the mac_link_up() change) may use @state->speed, + * @state->duplex and @state->pause to configure the MAC, but this is + * deprecated; such drivers should be converted to use mac_link_up(). * - * Valid state members: interface, speed, duplex, pause, advertising. + * Other members of @state must be ignored. + * + * Valid state members: interface, advertising. + * Deprecated state members: speed, duplex, pause. * * %MLO_AN_INBAND: * place the link in an inband negotiation mode (such as 802.3z @@ -228,19 +240,34 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, /** * mac_link_up() - allow the link to come up * @config: a pointer to a &struct phylink_config. + * @phy: any attached phy * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode - * @phy: any attached phy + * @speed: link speed + * @duplex: link duplex + * @tx_pause: link transmit pause enablement status + * @rx_pause: link receive pause enablement status * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), allow the link to come up. If @phy - * is non-%NULL, configure Energy Efficient Ethernet by calling + * Configure the MAC for an established link. + * + * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link + * settings, and should be used to configure the MAC block appropriately + * where these settings are not automatically conveyed from the PCS block, + * or if in-band negotiation (as defined by phylink_autoneg_inband(@mode)) + * is disabled. + * + * Note that when 802.3z in-band negotiation is in use, it is possible + * that the user wishes to override the pause settings, and this should + * be allowed when considering the implementation of this method. + * + * If in-band negotiation mode is disabled, allow the link to come up. If + * @phy is non-%NULL, configure Energy Efficient Ethernet by calling * phy_init_eee() and perform appropriate MAC configuration for EEE. * Interface type selection must be done in mac_config(). */ -void mac_link_up(struct phylink_config *config, unsigned int mode, - phy_interface_t interface, - struct phy_device *phy); +void mac_link_up(struct phylink_config *config, struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause); #endif struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, -- cgit v1.2.3 From d7f10df86202273155a9d8f8553bc2ad28e0dd46 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 18:17:44 -0600 Subject: bpf: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200227001744.GA3317@embeddedor --- include/linux/bpf-cgroup.h | 2 +- include/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a11d5b7dbbf3..a7cd5c7a2509 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map; struct bpf_storage_buffer { struct rcu_head rcu; - char data[0]; + char data[]; }; struct bpf_cgroup_storage { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1acd5bf70350..9aa33b8f3d55 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -859,7 +859,7 @@ struct bpf_prog_array_item { struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog_array_item items[0]; + struct bpf_prog_array_item items[]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); -- cgit v1.2.3 From 057a0fb649f3bfcd3048e22e15fa613026076119 Mon Sep 17 00:00:00 2001 From: Kalyani Akula Date: Mon, 17 Feb 2020 15:56:41 +0530 Subject: firmware: xilinx: Add ZynqMP aes API for AES functionality Add ZynqMP firmware AES API to perform encryption/decryption of given data. Signed-off-by: Kalyani Akula Acked-by: Michal Simek Signed-off-by: Herbert Xu --- include/linux/firmware/xlnx-zynqmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 2cd12ebd6826..8acace459351 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -82,6 +82,7 @@ enum pm_api_id { PM_CLOCK_GETRATE, PM_CLOCK_SETPARENT, PM_CLOCK_GETPARENT, + PM_SECURE_AES = 47, PM_FEATURE_CHECK = 63, PM_API_MAX, }; @@ -315,6 +316,7 @@ struct zynqmp_eemi_ops { const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); + int (*aes)(const u64 address, u32 *out); }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, -- cgit v1.2.3 From 96e326878fa5e2727d14e9a23644119374619010 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Jan 2020 17:30:41 +0200 Subject: net/mlx5e: Eswitch, Use per vport tables for mirroring When using port mirroring, we forward the traffic to another table and use that table to forward to the mirrored vport. Since the hardware loses the values of reg c, and in particular reg c0, we fail the match on the input vport which previously existed in reg c0. To overcome this situation, we use a set of per vport tables, positioned at the lowest priority, and forward traffic to those tables. Since these tables are per vport, we can avoid matching on reg c0. Fixes: c01cfd0f1115 ("net/mlx5: E-Switch, Add match on vport metadata for rule in fast path") Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4cae16016b2b..a5cf5c76f348 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -84,6 +84,7 @@ enum { FDB_TC_OFFLOAD, FDB_FT_OFFLOAD, FDB_SLOW_PATH, + FDB_PER_VPORT, }; struct mlx5_pkt_reformat; -- cgit v1.2.3 From 5682d393b40e1fe7426a7b8c3471f05262f42010 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:09 -0800 Subject: inet_diag: Refactor inet_sk_diag_fill(), dump(), and dump_one() In a latter patch, there is a need to update "cb->min_dump_alloc" in inet_sk_diag_fill() as it learns the diffierent bpf_sk_storages stored in a sk while dumping all sk(s) (e.g. tcp_hashinfo). The inet_sk_diag_fill() currently does not take the "cb" as an argument. One of the reason is inet_sk_diag_fill() is used by both dump_one() and dump() (which belong to the "struct inet_diag_handler". The dump_one() interface does not pass the "cb" along. This patch is to make dump_one() pass a "cb". The "cb" is created in inet_diag_cmd_exact(). The "nlh" and "in_skb" are stored in "cb" as the dump() interface does. The total number of args in inet_sk_diag_fill() is also cut from 10 to 7 and that helps many callers to pass fewer args. In particular, "struct user_namespace *user_ns", "u32 pid", and "u32 seq" can be replaced by accessing "cb->nlh" and "cb->skb". A similar argument reduction is also made to inet_twsk_diag_fill() and inet_req_diag_fill(). inet_csk_diag_dump() and inet_csk_diag_fill() are also removed. They are mostly equivalent to inet_sk_diag_fill(). Their repeated usages are very limited. Thus, inet_sk_diag_fill() is directly used in those occasions. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230409.1975173-1-kafai@fb.com --- include/linux/inet_diag.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..6b157ce07d74 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -18,8 +18,7 @@ struct inet_diag_handler { const struct inet_diag_req_v2 *r, struct nlattr *bc); - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, + int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); void (*idiag_get_info)(struct sock *sk, @@ -42,16 +41,15 @@ struct inet_diag_handler { struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin); + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct netlink_callback *cb, const struct inet_diag_req_v2 *req); struct sock *inet_diag_find_one_icsk(struct net *net, -- cgit v1.2.3 From 0df6d32842b9a5f97a29ea90c8adc5cfac38341d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:15 -0800 Subject: inet_diag: Move the INET_DIAG_REQ_BYTECODE nlattr to cb->data The INET_DIAG_REQ_BYTECODE nlattr is currently re-found every time when the "dump()" is re-started. In a latter patch, it will also need to parse the new INET_DIAG_REQ_SK_BPF_STORAGES nlattr to learn the map_fds. Thus, this patch takes this chance to store the parsed nlattr in cb->data during the "start" time of a dump. By doing this, the "bc" argument also becomes unnecessary and is removed. Also, the two copies of the INET_DIAG_REQ_BYTECODE parsing-audit logic between compat/current version can be consolidated to one. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230415.1975555-1-kafai@fb.com --- include/linux/inet_diag.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 6b157ce07d74..1bb94cac265f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -15,8 +15,7 @@ struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); @@ -39,6 +38,11 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct inet_diag_dump_data { + struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; +#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +}; + struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, @@ -46,8 +50,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct netlink_callback *cb, const struct inet_diag_req_v2 *req); -- cgit v1.2.3 From 1ed4d92458a969e71e7914550b6f0c730c14d84e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:21 -0800 Subject: bpf: INET_DIAG support in bpf_sk_storage This patch adds INET_DIAG support to bpf_sk_storage. 1. Although this series adds bpf_sk_storage diag capability to inet sk, bpf_sk_storage is in general applicable to all fullsock. Hence, the bpf_sk_storage logic will operate on SK_DIAG_* nlattr. The caller will pass in its specific nesting nlattr (e.g. INET_DIAG_*) as the argument. 2. The request will be like: INET_DIAG_REQ_SK_BPF_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) ...... Considering there could have multiple bpf_sk_storages in a sk, instead of reusing INET_DIAG_INFO ("ss -i"), the user can select some specific bpf_sk_storage to dump by specifying an array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD. If no SK_DIAG_BPF_STORAGE_REQ_MAP_FD is specified (i.e. an empty INET_DIAG_REQ_SK_BPF_STORAGES), it will dump all bpf_sk_storages of a sk. 3. The reply will be like: INET_DIAG_BPF_SK_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) ...... 4. Unlike other INET_DIAG info of a sk which is pretty static, the size required to dump the bpf_sk_storage(s) of a sk is dynamic as the system adding more bpf_sk_storage_map. It is hard to set a static min_dump_alloc size. Hence, this series learns it at the runtime and adjust the cb->min_dump_alloc as it iterates all sk(s) of a system. The "unsigned int *res_diag_size" in bpf_sk_storage_diag_put() is for this purpose. The next patch will update the cb->min_dump_alloc as it iterates the sk(s). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230421.1975729-1-kafai@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9aa33b8f3d55..6015a4daf118 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1023,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); -- cgit v1.2.3 From 085c20cacf2b72991ce1c9d99a5e2f1d9e73bb68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:27 -0800 Subject: bpf: inet_diag: Dump bpf_sk_storages in inet_diag_dump() This patch will dump out the bpf_sk_storages of a sk if the request has the INET_DIAG_REQ_SK_BPF_STORAGES nlattr. An array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD can be specified in INET_DIAG_REQ_SK_BPF_STORAGES to select which bpf_sk_storage to dump. If no map_fd is specified, all bpf_sk_storages of a sk will be dumped. bpf_sk_storages can be added to the system at runtime. It is difficult to find a proper static value for cb->min_dump_alloc. This patch learns the nlattr size required to dump the bpf_sk_storages of a sk. If it happens to be the very first nlmsg of a dump and it cannot fit the needed bpf_sk_storages, it will try to expand the skb by "pskb_expand_head()". Instead of expanding it in inet_sk_diag_fill(), it is expanded at a sleepable context in __inet_diag_dump() so __GFP_DIRECT_RECLAIM can be used. In __inet_diag_dump(), it will retry as long as the skb is empty and the cb->min_dump_alloc becomes larger than before. cb->min_dump_alloc is bounded by KMALLOC_MAX_SIZE. The min_dump_alloc is also changed from 'u16' to 'u32' to accommodate a sk that may have a few large bpf_sk_storages. The updated cb->min_dump_alloc will also be used to allocate the skb in the next dump. This logic already exists in netlink_dump(). Here is the sample output of a locally modified 'ss' and it could be made more readable by using BTF later: [root@arch-fb-vm1 ~]# ss --bpf-map-id 14 --bpf-map-id 13 -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] [root@arch-fb-vm1 ~]# ~/devshare/github/iproute2/misc/ss --bpf-maps -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:Port Process ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230427.1976129-1-kafai@fb.com --- include/linux/inet_diag.h | 4 ++++ include/linux/netlink.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 1bb94cac265f..e4ba25d63913 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -38,9 +38,13 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct bpf_sk_storage_diag; struct inet_diag_dump_data { struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; #define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] + + struct bpf_sk_storage_diag *bpf_stg_diag; }; struct inet_connection_sock; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..788969ccbbde 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,10 +188,10 @@ struct netlink_callback { struct module *module; struct netlink_ext_ack *extack; u16 family; - u16 min_dump_alloc; - bool strict_check; u16 answer_flags; + u32 min_dump_alloc; unsigned int prev_seq, seq; + bool strict_check; union { u8 ctx[48]; -- cgit v1.2.3 From 253d3194c2b58152fe830fd27c2fd83ebc6fe5ee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2020 13:00:13 +0000 Subject: random: add arch_get_random_*long_early() Some architectures (e.g. arm64) can have heterogeneous CPUs, and the boot CPU may be able to provide entropy while secondary CPUs cannot. On such systems, arch_get_random_long() and arch_get_random_seed_long() will fail unless support for RNG instructions has been detected on all CPUs. This prevents the boot CPU from being able to provide (potentially) trusted entropy when seeding the primary CRNG. To make it possible to seed the primary CRNG from the boot CPU without adversely affecting the runtime versions of arch_get_random_long() and arch_get_random_seed_long(), this patch adds new early versions of the functions used when initializing the primary CRNG. Default implementations are provided atop of the existing arch_get_random_long() and arch_get_random_seed_long() so that only architectures with such constraints need to provide the new helpers. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Theodore Ts'o Link: https://lore.kernel.org/r/20200210130015.17664-3-mark.rutland@arm.com Signed-off-by: Theodore Ts'o --- include/linux/random.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index d319f9a1e429..45e1f8fa742b 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -7,6 +7,8 @@ #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H +#include +#include #include #include @@ -185,6 +187,26 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) } #endif +/* + * Called from the boot CPU during startup; not valid to call once + * secondary CPUs are up and preemption is possible. + */ +#ifndef arch_get_random_seed_long_early +static inline bool __init arch_get_random_seed_long_early(unsigned long *v) +{ + WARN_ON(system_state != SYSTEM_BOOTING); + return arch_get_random_seed_long(v); +} +#endif + +#ifndef arch_get_random_long_early +static inline bool __init arch_get_random_long_early(unsigned long *v) +{ + WARN_ON(system_state != SYSTEM_BOOTING); + return arch_get_random_long(v); +} +#endif + /* Pseudo random number generator from numerical recipes. */ static inline u32 next_pseudo_random32(u32 seed) { -- cgit v1.2.3 From 59a135f6fb669f4f79f43160c7b8c8d6bfb37f75 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 7 Nov 2019 11:42:49 +0100 Subject: tee: remove linked list of struct tee_shm Removes list_shm from struct tee_context since the linked list isn't used any longer. Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 7a03f68fb982..cbddb883a7f8 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -49,7 +49,6 @@ struct tee_shm_pool; */ struct tee_context { struct tee_device *teedev; - struct list_head list_shm; void *data; struct kref refcount; bool releasing; @@ -170,7 +169,6 @@ void tee_device_unregister(struct tee_device *teedev); * struct tee_shm - shared memory object * @teedev: device used to allocate the object * @ctx: context using the object, if NULL the context is gone - * @link link element * @paddr: physical address of the shared memory * @kaddr: virtual address of the shared memory * @size: size of shared memory @@ -187,7 +185,6 @@ void tee_device_unregister(struct tee_device *teedev); struct tee_shm { struct tee_device *teedev; struct tee_context *ctx; - struct list_head link; phys_addr_t paddr; void *kaddr; size_t size; -- cgit v1.2.3 From c180f9bbe29a403459dd76422f435382aec6adaa Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 7 Nov 2019 11:42:52 +0100 Subject: tee: remove unused tee_shm_priv_alloc() tee_shm_priv_alloc() isn't useful in the current state and it's also not not used so remove it. Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index cbddb883a7f8..42687f6c546d 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -315,18 +315,6 @@ void *tee_get_drvdata(struct tee_device *teedev); */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); -/** - * tee_shm_priv_alloc() - Allocate shared memory privately - * @dev: Device that allocates the shared memory - * @size: Requested size of shared memory - * - * Allocates shared memory buffer that is not associated with any client - * context. Such buffers are owned by TEE driver and used for internal calls. - * - * @returns a pointer to 'struct tee_shm' - */ -struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size); - /** * tee_shm_register() - Register shared memory buffer * @ctx: Context that registers the shared memory -- cgit v1.2.3 From 5271b2011e448f1be7433554e4684e91951476fa Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 7 Nov 2019 11:42:59 +0100 Subject: tee: remove redundant teedev in struct tee_shm The ctx element in struct tee_shm is always valid. So remove the now redundant teedev element. Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 42687f6c546d..1412e9cc79ce 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -167,8 +167,7 @@ void tee_device_unregister(struct tee_device *teedev); /** * struct tee_shm - shared memory object - * @teedev: device used to allocate the object - * @ctx: context using the object, if NULL the context is gone + * @ctx: context using the object * @paddr: physical address of the shared memory * @kaddr: virtual address of the shared memory * @size: size of shared memory @@ -183,7 +182,6 @@ void tee_device_unregister(struct tee_device *teedev); * subsystem and from drivers that implements their own shm pool manager. */ struct tee_shm { - struct tee_device *teedev; struct tee_context *ctx; phys_addr_t paddr; void *kaddr; -- cgit v1.2.3 From 098accf2da940189f4d62d3514d17f8bb05dc6e1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 13 Feb 2020 14:00:21 +0000 Subject: iommu: Use C99 flexible array in fwspec Although the 1-element array was a typical pre-C99 way to implement variable-length structures, and indeed is a fundamental construct in the APIs of certain other popular platforms, there's no good reason for it here (and in particular the sizeof() trick is far too "clever" for its own good). We can just as easily implement iommu_fwspec's preallocation behaviour using a standard flexible array member, so let's make it look the way most readers would expect. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d1b5f4d98569..4d1ba76c9a64 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -592,7 +592,7 @@ struct iommu_fwspec { u32 flags; u32 num_pasid_bits; unsigned int num_ids; - u32 ids[1]; + u32 ids[]; }; /* ATS is supported */ -- cgit v1.2.3 From ea23578611dce2eeaf31dcfe12cd7130cf3d1411 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Feb 2020 23:18:49 +0800 Subject: spi: Allow SPI controller override device buswidth Currently ACPI firmware description for a SPI device does not have any method to describe the data buswidth on the board. So even through the controller and device may support higher modes than standard SPI, it cannot be assumed that the board does - as such, that device is limited to standard SPI in such a circumstance. As a workaround, allow the controller driver supply buswidth override bits, which are used inform the core code that the controller driver knows the buswidth supported on that board for that device. A host controller driver might know this info from DMI tables, for example. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1582903131-160033-2-git-send-email-john.garry@huawei.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6d16ba01ff5a..600e3793303e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -481,6 +481,9 @@ struct spi_controller { /* spi_device.mode flags understood by this controller driver */ u32 mode_bits; + /* spi_device.mode flags override flags for this controller */ + u32 buswidth_override_bits; + /* bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) -- cgit v1.2.3 From 69879c01a0c3f70e0887cfb4d9ff439814361e46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 20 Feb 2020 08:08:20 -0600 Subject: proc: Remove the now unnecessary internal mount of proc There remains no more code in the kernel using pids_ns->proc_mnt, therefore remove it from the kernel. The big benefit of this change is that one of the most error prone and tricky parts of the pid namespace implementation, maintaining kernel mounts of proc is removed. In addition removing the unnecessary complexity of the kernel mount fixes a regression that caused the proc mount options to be ignored. Now that the initial mount of proc comes from userspace, those mount options are again honored. This fixes Android's usage of the proc hidepid option. Reported-by: Alistair Strachan Fixes: e94591d0d90c ("proc: Convert proc_mount to use mount_ns.") Signed-off-by: "Eric W. Biederman" --- include/linux/pid_namespace.h | 2 -- include/linux/proc_ns.h | 5 ----- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 2ed6af88794b..4956e362e55e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -33,7 +33,6 @@ struct pid_namespace { unsigned int level; struct pid_namespace *parent; #ifdef CONFIG_PROC_FS - struct vfsmount *proc_mnt; struct dentry *proc_self; struct dentry *proc_thread_self; #endif @@ -42,7 +41,6 @@ struct pid_namespace { #endif struct user_namespace *user_ns; struct ucounts *ucounts; - struct work_struct proc_work; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..e1106a077c1a 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -50,16 +50,11 @@ enum { #ifdef CONFIG_PROC_FS -extern int pid_ns_prepare_proc(struct pid_namespace *ns); -extern void pid_ns_release_proc(struct pid_namespace *ns); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else /* CONFIG_PROC_FS */ -static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } -static inline void pid_ns_release_proc(struct pid_namespace *ns) {} - static inline int proc_alloc_inum(unsigned int *inum) { *inum = 1; -- cgit v1.2.3 From 8402a31dd803e091fd2ec9cd22040b34a0b07085 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:33:37 -0600 Subject: net: dccp: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6b64b6cc2175..07e547c02fd8 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -198,7 +198,7 @@ enum dccp_role { struct dccp_service_list { __u32 dccpsl_nr; - __be32 dccpsl_list[0]; + __be32 dccpsl_list[]; }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) -- cgit v1.2.3 From e427cad6eee47e2daf207cd7a4156ae72496ee07 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 28 Feb 2020 14:45:22 +0100 Subject: net: datagram: drop 'destructor' argument from several helpers The only users for such argument are the UDP protocol and the UNIX socket family. We can safely reclaim the accounted memory directly from the UDP code and, after the previous patch, we can do scm stats accounting outside the datagram helpers. Overall this cleans up a bit some datagram-related helpers, and avoids an indirect call per packet in the UDP receive path. v1 -> v2: - call scm_stat_del() only when not peeking - Kirill - fix build issue with CONFIG_INET_ESPINTCP Signed-off-by: Paolo Abeni Reviewed-by: Kirill Tkhai Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b50278c4bc8..21749b2cdc9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3514,23 +3514,15 @@ int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err, + unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, - unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *off, int *err); + unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, -- cgit v1.2.3 From badc61982adb6018a48ed8fe32087b9754cae14b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 28 Feb 2020 13:14:04 +0100 Subject: efi/x86: Add RNG seed EFI table to unencrypted mapping check When booting with SME active, EFI tables must be mapped unencrypted since they were built by UEFI in unencrypted memory. Update the list of tables to be checked during early_memremap() processing to account for the EFI RNG seed table. Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: linux-efi@vger.kernel.org Cc: Ingo Molnar Cc: Thomas Gleixner Cc: David Hildenbrand Cc: Heinrich Schuchardt Link: https://lore.kernel.org/r/b64385fc13e5d7ad4b459216524f138e7879234f.1582662842.git.thomas.lendacky@amd.com Link: https://lore.kernel.org/r/20200228121408.9075-3-ardb@kernel.org --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2ab33d5d6ca5..e8a08a499131 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -526,6 +526,8 @@ typedef struct { efi_time_t time_of_revocation; } efi_cert_x509_sha256_t; +extern unsigned long __ro_after_init efi_rng_seed; /* RNG Seed table */ + /* * All runtime access to EFI goes through this structure: */ -- cgit v1.2.3 From 70ae1e127b486704c62d3537d69ca65c446d4d83 Mon Sep 17 00:00:00 2001 From: Cris Forno Date: Fri, 28 Feb 2020 14:12:04 -0600 Subject: ethtool: Factored out similar ethtool link settings for virtual devices to core Three virtual devices (ibmveth, virtio_net, and netvsc) all have similar code to set link settings and validate ethtool command. To eliminate duplication of code, it is factored out into core/ethtool.c. Signed-off-by: Cris Forno Signed-off-by: David S. Miller --- include/linux/ethtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 95991e4300bf..23373978cb3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -420,4 +420,10 @@ struct ethtool_rx_flow_rule * ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); +bool ethtool_virtdev_validate_cmd(const struct ethtool_link_ksettings *cmd); +int ethtool_virtdev_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd, + u32 *dev_speed, u8 *dev_duplex); + + #endif /* _LINUX_ETHTOOL_H */ -- cgit v1.2.3 From 88ac039cbed125bd9ed132d27ec9f689c6442748 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Feb 2020 12:18:38 +0200 Subject: dmaengine: Refactor dmaengine_check_align() to be bit operations only There is no need to have branch and temporary variable in the function. Simple convert it to be a set of bit and arithmetic operations. Signed-off-by: Andy Shevchenko Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200226101842.29426-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 64461fc64e1b..9f3f5582816a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1155,14 +1155,7 @@ static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc static inline bool dmaengine_check_align(enum dmaengine_alignment align, size_t off1, size_t off2, size_t len) { - size_t mask; - - if (!align) - return true; - mask = (1 << align) - 1; - if (mask & (off1 | off2 | len)) - return false; - return true; + return !(((1 << align) - 1) & (off1 | off2 | len)); } static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, -- cgit v1.2.3 From 3a92063be16873a10648a81be0b1be42a9d54ee9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Feb 2020 12:18:39 +0200 Subject: dmaengine: Use negative condition for better readability When negative condition is in use we may decrease indentation level and make the main part of logic better visible. Signed-off-by: Andy Shevchenko Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200226101842.29426-2-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 9f3f5582816a..ae56a91c2a05 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -618,10 +618,11 @@ static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap) static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx) { - if (tx->unmap) { - dmaengine_unmap_put(tx->unmap); - tx->unmap = NULL; - } + if (!tx->unmap) + return; + + dmaengine_unmap_put(tx->unmap); + tx->unmap = NULL; } #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH @@ -1408,11 +1409,12 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, static inline void dma_set_tx_state(struct dma_tx_state *st, dma_cookie_t last, dma_cookie_t used, u32 residue) { - if (st) { - st->last = last; - st->used = used; - st->residue = residue; - } + if (!st) + return; + + st->last = last; + st->used = used; + st->residue = residue; } #ifdef CONFIG_DMA_ENGINE @@ -1489,12 +1491,11 @@ static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) if (ret) return ret; - if (caps.descriptor_reuse) { - tx->flags |= DMA_CTRL_REUSE; - return 0; - } else { + if (!caps.descriptor_reuse) return -EPERM; - } + + tx->flags |= DMA_CTRL_REUSE; + return 0; } static inline void dmaengine_desc_clear_reuse(struct dma_async_tx_descriptor *tx) @@ -1510,10 +1511,10 @@ static inline bool dmaengine_desc_test_reuse(struct dma_async_tx_descriptor *tx) static inline int dmaengine_desc_free(struct dma_async_tx_descriptor *desc) { /* this is supported for reusable desc, so check that */ - if (dmaengine_desc_test_reuse(desc)) - return desc->desc_free(desc); - else + if (!dmaengine_desc_test_reuse(desc)) return -EPERM; + + return desc->desc_free(desc); } /* --- DMA device --- */ -- cgit v1.2.3 From 5f77dd850c0a32d4d5047d139077718ee7f1a8fe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Feb 2020 12:18:40 +0200 Subject: dmaengine: Drop redundant 'else' keyword It's obvious that 'else' keyword is redundant in the code like if (foo) return bar; else if (baz) ... Drop it for good. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200226101842.29426-3-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ae56a91c2a05..1bb5477ef7ec 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1230,9 +1230,9 @@ static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) { if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) return dma_dev_to_maxpq(dma); - else if (dmaf_p_disabled_continue(flags)) + if (dmaf_p_disabled_continue(flags)) return dma_dev_to_maxpq(dma) - 1; - else if (dmaf_continue(flags)) + if (dmaf_continue(flags)) return dma_dev_to_maxpq(dma) - 3; BUG(); } @@ -1243,7 +1243,7 @@ static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg, if (inc) { if (dir_icg) return dir_icg; - else if (sgl) + if (sgl) return icg; } -- cgit v1.2.3 From 1873300afa6147a1882aeba1e8bc9a13c5487571 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 26 Feb 2020 12:18:41 +0200 Subject: dmaengine: consistently return string literal from switch-case There is no need to have 'break;' statement in the default case followed by return certain string literal when all other cases have returned the string literals. So, refactor it accordingly. Signed-off-by: Andy Shevchenko Reviewed-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200226101842.29426-4-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1bb5477ef7ec..d3672f065a64 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1560,9 +1560,7 @@ dmaengine_get_direction_text(enum dma_transfer_direction dir) case DMA_DEV_TO_DEV: return "DEV_TO_DEV"; default: - break; + return "invalid"; } - - return "invalid"; } #endif /* DMAENGINE_H */ -- cgit v1.2.3 From 9e553376d3d44cecab2460a57085a82dbb3bc297 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Feb 2020 18:21:14 +0200 Subject: fbdev: simplefb: Platform data shan't include kernel.h Replace with appropriate types.h. Signed-off-by: Andy Shevchenko Acked-by: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz Link: https://patchwork.freedesktop.org/patch/msgid/20200204162114.28937-1-andriy.shevchenko@linux.intel.com --- include/linux/platform_data/simplefb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/simplefb.h b/include/linux/platform_data/simplefb.h index 4f733a411d18..ca8337695c2a 100644 --- a/include/linux/platform_data/simplefb.h +++ b/include/linux/platform_data/simplefb.h @@ -10,7 +10,7 @@ #include #include -#include +#include /* format array, use it to initialize a "struct simplefb_format" array */ #define SIMPLEFB_FORMATS \ -- cgit v1.2.3 From bb4cf02d4c74f0db60f58c406ddfdfed16d14f84 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 2 Mar 2020 05:59:33 -0600 Subject: netdevice: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c3f7032e8d9..b6fedd54cd8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -664,7 +664,7 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node struct rps_map { unsigned int len; struct rcu_head rcu; - u16 cpus[0]; + u16 cpus[]; }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) @@ -686,7 +686,7 @@ struct rps_dev_flow { struct rps_dev_flow_table { unsigned int mask; struct rcu_head rcu; - struct rps_dev_flow flows[0]; + struct rps_dev_flow flows[]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ ((_num) * sizeof(struct rps_dev_flow))) @@ -704,7 +704,7 @@ struct rps_dev_flow_table { struct rps_sock_flow_table { u32 mask; - u32 ents[0] ____cacheline_aligned_in_smp; + u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) @@ -767,7 +767,7 @@ struct xps_map { unsigned int len; unsigned int alloc_len; struct rcu_head rcu; - u16 queues[0]; + u16 queues[]; }; #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) #define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \ @@ -778,7 +778,7 @@ struct xps_map { */ struct xps_dev_maps { struct rcu_head rcu; - struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */ + struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; #define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ -- cgit v1.2.3 From 444ebb5768c5c43aadfc60111fecd6c4f946e77b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 24 Feb 2020 11:32:43 +0300 Subject: splice: make do_splice public Make do_splice(), so other kernel parts can reuse it Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/splice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 74b4911ac16d..ebbbfea48aa0 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -78,6 +78,9 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *, struct pipe_buffer *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *); +extern long do_splice(struct file *in, loff_t __user *off_in, + struct file *out, loff_t __user *off_out, + size_t len, unsigned int flags); /* * for dynamic pipe sizing -- cgit v1.2.3 From 70ed506c3bbcfa846d4636b23051ca79fa4781f7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 20:31:57 -0800 Subject: bpf: Introduce pinnable bpf_link abstraction Introduce bpf_link abstraction, representing an attachment of BPF program to a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates ownership of attached BPF program, reference counting of a link itself, when reference from multiple anonymous inodes, as well as ensures that release callback will be called from a process context, so that users can safely take mutex locks and sleep. Additionally, with a new abstraction it's now possible to generalize pinning of a link object in BPF FS, allowing to explicitly prevent BPF program detachment on process exit by pinning it in a BPF FS and let it open from independent other process to keep working with it. Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF program attachments) into utilizing bpf_link framework, making them pinnable in BPF FS. More FD-based bpf_links will be added in follow up patches. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com --- include/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6015a4daf118..f13c78c6f29d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1056,6 +1056,19 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); +struct bpf_link; + +struct bpf_link_ops { + void (*release)(struct bpf_link *link); +}; + +void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, + struct bpf_prog *prog); +void bpf_link_inc(struct bpf_link *link); +void bpf_link_put(struct bpf_link *link); +int bpf_link_new_fd(struct bpf_link *link); +struct bpf_link *bpf_link_get_from_fd(u32 ufd); + int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); -- cgit v1.2.3 From 0e72a6a3cfc3a32273f5e99bfaa4407f4917d343 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Jan 2020 17:35:45 +0100 Subject: efi: Export boot-services code and data as debugfs-blobs Sometimes it is useful to be able to dump the efi boot-services code and data. This commit adds these as debugfs-blobs to /sys/kernel/debug/efi, but only if efi=debug is passed on the kernel-commandline as this requires not freeing those memory-regions, which costs 20+ MB of RAM. Reviewed-by: Greg Kroah-Hartman Acked-by: Ard Biesheuvel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200115163554.101315-2-hdegoede@redhat.com Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7efd7072cca5..a6e1a2d8511e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1124,6 +1124,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ +#define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ #ifdef CONFIG_EFI /* -- cgit v1.2.3 From f0df68d5bae8825ee5b62f00af237ae82247f045 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Jan 2020 17:35:46 +0100 Subject: efi: Add embedded peripheral firmware support Just like with PCI options ROMs, which we save in the setup_efi_pci* functions from arch/x86/boot/compressed/eboot.c, the EFI code / ROM itself sometimes may contain data which is useful/necessary for peripheral drivers to have access to. Specifically the EFI code may contain an embedded copy of firmware which needs to be (re)loaded into the peripheral. Normally such firmware would be part of linux-firmware, but in some cases this is not feasible, for 2 reasons: 1) The firmware is customized for a specific use-case of the chipset / use with a specific hardware model, so we cannot have a single firmware file for the chipset. E.g. touchscreen controller firmwares are compiled specifically for the hardware model they are used with, as they are calibrated for a specific model digitizer. 2) Despite repeated attempts we have failed to get permission to redistribute the firmware. This is especially a problem with customized firmwares, these get created by the chip vendor for a specific ODM and the copyright may partially belong with the ODM, so the chip vendor cannot give a blanket permission to distribute these. This commit adds support for finding peripheral firmware embedded in the EFI code and makes the found firmware available through the new efi_get_embedded_fw() function. Support for loading these firmwares through the standard firmware loading mechanism is added in a follow-up commit in this patch-series. Note we check the EFI_BOOT_SERVICES_CODE for embedded firmware near the end of start_kernel(), just before calling rest_init(), this is on purpose because the typical EFI_BOOT_SERVICES_CODE memory-segment is too large for early_memremap(), so the check must be done after mm_init(). This relies on EFI_BOOT_SERVICES_CODE not being free-ed until efi_free_boot_services() is called, which means that this will only work on x86 for now. Reported-by: Dave Olsthoorn Suggested-by: Peter Jones Acked-by: Ard Biesheuvel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200115163554.101315-3-hdegoede@redhat.com Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 ++++++ include/linux/efi_embedded_fw.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 include/linux/efi_embedded_fw.h (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a6e1a2d8511e..23392b88bcc0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1554,6 +1554,12 @@ static inline void efi_enable_reset_attack_mitigation(void) { } #endif +#ifdef CONFIG_EFI_EMBEDDED_FIRMWARE +void efi_check_for_embedded_firmwares(void); +#else +static inline void efi_check_for_embedded_firmwares(void) { } +#endif + efi_status_t efi_random_get_seed(void); void efi_retrieve_tpm2_eventlog(void); diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h new file mode 100644 index 000000000000..3d066c6370c6 --- /dev/null +++ b/include/linux/efi_embedded_fw.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_EFI_EMBEDDED_FW_H +#define _LINUX_EFI_EMBEDDED_FW_H + +#include +#include + +#define EFI_EMBEDDED_FW_PREFIX_LEN 8 + +/* + * This struct and efi_embedded_fw_list are private to the efi-embedded fw + * implementation they are in this header for use by lib/test_firmware.c only! + */ +struct efi_embedded_fw { + struct list_head list; + const char *name; + const u8 *data; + size_t length; +}; + +extern struct list_head efi_embedded_fw_list; + +/** + * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw + * code to search for embedded firmwares. + * + * @name: Name to register the firmware with if found + * @prefix: First 8 bytes of the firmware + * @length: Length of the firmware in bytes including prefix + * @sha256: SHA256 of the firmware + */ +struct efi_embedded_fw_desc { + const char *name; + u8 prefix[EFI_EMBEDDED_FW_PREFIX_LEN]; + u32 length; + u8 sha256[32]; +}; + +int efi_get_embedded_fw(const char *name, const u8 **dat, size_t *sz); + +#endif -- cgit v1.2.3 From 89b74cac7834734d6b2733204c639917d3826083 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Mar 2020 20:24:50 +0900 Subject: tools/bootconfig: Show line and column in parse error Show line and column when we got a parse error in bootconfig tool. Current lib/bootconfig shows the parse error with byte offset, but that is not human readable. This makes xbc_init() not showing error message itself but able to pass the error message and position to caller, so that the caller can decode it and show the error message with line number and columns. With this patch, bootconfig tool shows an error with line:column as below. $ cat samples/bad-dotword.bconf # do not start keyword with . key { .word = 1 } $ ./bootconfig -a samples/bad-dotword.bconf initrd Parse Error: Invalid keyword at 3:3 Link: http://lkml.kernel.org/r/158323469002.10560.4023923847704522760.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index d11e183fcb54..9903088891fa 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -216,7 +216,8 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf); +int __init xbc_init(char *buf, const char **emsg, int *epos); + /* XBC cleanup data structures */ void __init xbc_destroy_all(void); -- cgit v1.2.3 From 55e8c8eb2c7b6bf30e99423ccfe7ca032f498f59 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2020 11:11:06 -0600 Subject: posix-cpu-timers: Store a reference to a pid not a task posix cpu timers do not handle the death of a process well. This is most clearly seen when a multi-threaded process calls exec from a thread that is not the leader of the thread group. The posix cpu timer code continues to pin the old thread group leader and is unable to find the siglock from there. This results in posix_cpu_timer_del being unable to delete a timer, posix_cpu_timer_set being unable to set a timer. Further to compensate for the problems in posix_cpu_timer_del on a multi-threaded exec all timers that point at the multi-threaded task are stopped. The code for the timers fundamentally needs to check if the target process/thread is alive. This needs an extra level of indirection. This level of indirection is already available in struct pid. So replace cpu.task with cpu.pid to get the needed extra layer of indirection. In addition to handling things more cleanly this reduces the amount of memory a timer can pin when a process exits and then is reaped from a task_struct to the vastly smaller struct pid. Fixes: e0a70217107e ("posix-cpu-timers: workaround to suppress the problems with mt exec") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87wo86tz6d.fsf@x220.int.ebiederm.org --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 3d10c84a97a9..e3f0f8585da4 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -69,7 +69,7 @@ static inline int clockid_to_fd(const clockid_t clk) struct cpu_timer { struct timerqueue_node node; struct timerqueue_head *head; - struct task_struct *task; + struct pid *pid; struct list_head elist; int firing; }; -- cgit v1.2.3 From 75f81a7ffe4d45b97a14f6d8075bacb8323ac10e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 26 Feb 2020 11:57:58 -0800 Subject: usb: typec: Add sysfs node to show cc orientation Export Type-C orientation information when available. - "normal": CC1 orientation - "reverse": CC2 orientation - "unknown": Orientation cannot be determined. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200226195758.150477-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 44d28387ced4..b00a2642a9cd 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -211,6 +211,7 @@ struct typec_capability { u16 pd_revision; /* 0300H = "3.0" */ int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; + unsigned int orientation_aware:1; struct fwnode_handle *fwnode; void *driver_data; -- cgit v1.2.3 From c111566bea7ccd8a05e2c56f1fb3cbb6f4b7b441 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 25 Feb 2020 11:31:02 +0200 Subject: PM: runtime: Add pm_runtime_get_if_active() pm_runtime_get_if_in_use() bumps up the PM-runtime usage count if it is not equal to zero and the device's PM-runtime status is 'active'. This works for drivers that do not use autoidle, but for those that do, the function returns zero even when the device is active. In order to maintain sane device state while the device is powered on in the hope that it'll be needed, pm_runtime_get_if_active(dev, true) returns a positive value if the device's PM-runtime status is 'active' when it is called, in which case it also increments the device's usage count. If the second argument of pm_runtime_get_if_active() is 'false', the function behaves just like pm_runtime_get_if_in_use(), so redefine the latter as a wrapper around the former. Signed-off-by: Sakari Ailus [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 22af69d237a6..3bdcbce8141a 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -38,7 +38,7 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); -extern int pm_runtime_get_if_in_use(struct device *dev); +extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -60,6 +60,11 @@ extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device *dev); +static inline int pm_runtime_get_if_in_use(struct device *dev) +{ + return pm_runtime_get_if_active(dev, false); +} + static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { dev->power.ignore_children = enable; @@ -143,6 +148,11 @@ static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } +static inline int pm_runtime_get_if_active(struct device *dev, + bool ign_usage_count) +{ + return -EINVAL; +} static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } -- cgit v1.2.3 From ef441dd6af91e1f4265e890021ac3ad631b2b10e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:45 +0300 Subject: usb: typec: mux: Allow the muxes to be named The mux devices have been named by using the name of the parent device as base until now, but if for example the parent device has multiple muxes that will not work. This makes it possible to supply the name for a mux during registration. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index be7292c0be5e..47ab5a828b07 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -17,6 +17,7 @@ typedef int (*typec_switch_set_fn_t)(struct typec_switch *sw, struct typec_switch_desc { struct fwnode_handle *fwnode; typec_switch_set_fn_t set; + const char *name; void *drvdata; }; @@ -42,6 +43,7 @@ typedef int (*typec_mux_set_fn_t)(struct typec_mux *mux, struct typec_mux_desc { struct fwnode_handle *fwnode; typec_mux_set_fn_t set; + const char *name; void *drvdata; }; -- cgit v1.2.3 From 774a9df6aeac236282dc0ec711b73865b64ef6a1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:46 +0300 Subject: usb: typec: mux: Add helpers for setting the mux state Adding helpers typec_switch_set() and typec_mux_set() that simply call the ->set callback function of the mux. These functions make it possible to set the mux states also from outside the class code. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 47ab5a828b07..4991c93df5d0 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -23,6 +23,9 @@ struct typec_switch_desc { struct typec_switch *typec_switch_get(struct device *dev); void typec_switch_put(struct typec_switch *sw); +int typec_switch_set(struct typec_switch *sw, + enum typec_orientation orientation); + struct typec_switch * typec_switch_register(struct device *parent, const struct typec_switch_desc *desc); @@ -50,6 +53,8 @@ struct typec_mux_desc { struct typec_mux * typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc); void typec_mux_put(struct typec_mux *mux); +int typec_mux_set(struct typec_mux *mux, struct typec_mux_state *state); + struct typec_mux * typec_mux_register(struct device *parent, const struct typec_mux_desc *desc); void typec_mux_unregister(struct typec_mux *mux); -- cgit v1.2.3 From d1c6a769cdf466053ae211789f2b0671c8a72331 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:47 +0300 Subject: usb: typec: mux: Allow the mux handles to be requested with fwnode Introducing fwnode_typec_switch_get() and fwnode_typec_mux_get() functions that work just like typec_switch_get() and typec_mux_get() but they take struct fwnode_handle as the first parameter instead of struct device. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 4991c93df5d0..a9d9957933dc 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -3,6 +3,7 @@ #ifndef __USB_TYPEC_MUX #define __USB_TYPEC_MUX +#include #include struct device; @@ -21,11 +22,16 @@ struct typec_switch_desc { void *drvdata; }; -struct typec_switch *typec_switch_get(struct device *dev); +struct typec_switch *fwnode_typec_switch_get(struct fwnode_handle *fwnode); void typec_switch_put(struct typec_switch *sw); int typec_switch_set(struct typec_switch *sw, enum typec_orientation orientation); +static inline struct typec_switch *typec_switch_get(struct device *dev) +{ + return fwnode_typec_switch_get(dev_fwnode(dev)); +} + struct typec_switch * typec_switch_register(struct device *parent, const struct typec_switch_desc *desc); @@ -50,11 +56,17 @@ struct typec_mux_desc { void *drvdata; }; -struct typec_mux * -typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc); +struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode, + const struct typec_altmode_desc *desc); void typec_mux_put(struct typec_mux *mux); int typec_mux_set(struct typec_mux *mux, struct typec_mux_state *state); +static inline struct typec_mux * +typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc) +{ + return fwnode_typec_mux_get(dev_fwnode(dev), desc); +} + struct typec_mux * typec_mux_register(struct device *parent, const struct typec_mux_desc *desc); void typec_mux_unregister(struct typec_mux *mux); -- cgit v1.2.3 From 69af044a7700552512a147e2ce3520741b65df41 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:48 +0300 Subject: usb: roles: Leave the private driver data pointer to the drivers Adding usb_role_switch_get/set_drvdata() functions that the switch drivers can use for setting and getting private data pointer that is associated with the switch. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-5-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index efac3af83d6b..02dae936cebd 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -25,6 +25,7 @@ typedef enum usb_role (*usb_role_switch_get_t)(struct device *dev); * @set: Callback for setting the role * @get: Callback for getting the role (optional) * @allow_userspace_control: If true userspace may change the role through sysfs + * @driver_data: Private data pointer * * @usb2_port and @usb3_port will point to the USB host port and @udc to the USB * device controller behind the USB connector with the role switch. If @@ -40,6 +41,7 @@ struct usb_role_switch_desc { usb_role_switch_set_t set; usb_role_switch_get_t get; bool allow_userspace_control; + void *driver_data; }; @@ -57,6 +59,9 @@ struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc); void usb_role_switch_unregister(struct usb_role_switch *sw); + +void usb_role_switch_set_drvdata(struct usb_role_switch *sw, void *data); +void *usb_role_switch_get_drvdata(struct usb_role_switch *sw); #else static inline int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) @@ -90,6 +95,17 @@ usb_role_switch_register(struct device *parent, } static inline void usb_role_switch_unregister(struct usb_role_switch *sw) { } + +static inline void +usb_role_switch_set_drvdata(struct usb_role_switch *sw, void *data) +{ +} + +static inline void *usb_role_switch_get_drvdata(struct usb_role_switch *sw) +{ + return NULL; +} + #endif #endif /* __LINUX_USB_ROLE_H */ -- cgit v1.2.3 From bce3052f0c165685a074e50136e4d341bcd59f4a Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:49 +0300 Subject: usb: roles: Provide the switch drivers handle to the switch in the API The USB role callback functions had a parameter pointing to the parent device (struct device) of the switch. The assumption was that the switch parent is always the controller. Firstly, that may not be true in every case, and secondly, it prevents us from supporting devices that supply multiple muxes. Changing the first parameter of usb_role_switch_set_t and usb_role_switch_get_t from struct device to struct usb_role_switch. Cc: Peter Chen Cc: Felipe Balbi Cc: Chunfeng Yun Cc: Bin Liu Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-6-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 02dae936cebd..c028ba8029ad 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -13,8 +13,9 @@ enum usb_role { USB_ROLE_DEVICE, }; -typedef int (*usb_role_switch_set_t)(struct device *dev, enum usb_role role); -typedef enum usb_role (*usb_role_switch_get_t)(struct device *dev); +typedef int (*usb_role_switch_set_t)(struct usb_role_switch *sw, + enum usb_role role); +typedef enum usb_role (*usb_role_switch_get_t)(struct usb_role_switch *sw); /** * struct usb_role_switch_desc - USB Role Switch Descriptor -- cgit v1.2.3 From e5256194cb51ced7a2f656590f1e2132235d442c Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:50 +0300 Subject: usb: roles: Allow the role switches to be named The switch devices have been named by using the name of the parent device as base for now, but if for example the parent device controls multiple muxes, that will not work. Adding an optional member "name" to the switch descriptor that can be used for naming the switch during registration. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-7-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index c028ba8029ad..0164fed31b06 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -27,6 +27,7 @@ typedef enum usb_role (*usb_role_switch_get_t)(struct usb_role_switch *sw); * @get: Callback for getting the role (optional) * @allow_userspace_control: If true userspace may change the role through sysfs * @driver_data: Private data pointer + * @name: Name for the switch (optional) * * @usb2_port and @usb3_port will point to the USB host port and @udc to the USB * device controller behind the USB connector with the role switch. If @@ -43,6 +44,7 @@ struct usb_role_switch_desc { usb_role_switch_get_t get; bool allow_userspace_control; void *driver_data; + const char *name; }; -- cgit v1.2.3 From ca469c292edc2248f1eaaef1ecec4576192fe743 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Mar 2020 16:53:52 +0300 Subject: usb: typec: Add definitions for Thunderbolt 3 Alternate Mode This adds separate header file for the Thunderbolt 3 Alternate Mode (aka. TBT). The header supplies definitions for all the Thunderbolt specific VDOs (Vendor Defined Objects) that are described in the USB Type-C Connector specification v2.0, as well as definition for the Thunderbolt 3 Standard ID (SID). There is also a new connector state value for the Thunderbolt 3 Alternate Mode that can be used with the mux drivers. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200302135353.56659-9-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/usb/typec_tbt.h (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h new file mode 100644 index 000000000000..47c2d501ddce --- /dev/null +++ b/include/linux/usb/typec_tbt.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __USB_TYPEC_TBT_H +#define __USB_TYPEC_TBT_H + +#include + +#define USB_TYPEC_VENDOR_INTEL 0x8087 +/* Alias for convenience */ +#define USB_TYPEC_TBT_SID USB_TYPEC_VENDOR_INTEL + +/* Connector state for Thunderbolt3 */ +#define TYPEC_TBT_MODE TYPEC_STATE_MODAL + +/** + * struct typec_thunderbolt_data - Thundebolt3 Alt Mode specific data + * @device_mode: Device Discover Mode VDO + * @cable_mode: Cable Discover Mode VDO + * @enter_vdo: Enter Mode VDO + */ +struct typec_thunderbolt_data { + u32 device_mode; + u32 cable_mode; + u32 enter_vdo; +}; + +/* TBT3 Device Discover Mode VDO bits */ +#define TBT_MODE BIT(0) +#define TBT_ADAPTER(_vdo_) (((_vdo_) & BIT(16)) >> 16) +#define TBT_ADAPTER_LEGACY 0 +#define TBT_ADAPTER_TBT3 1 +#define TBT_INTEL_SPECIFIC_B0 BIT(26) +#define TBT_VENDOR_SPECIFIC_B0 BIT(30) +#define TBT_VENDOR_SPECIFIC_B1 BIT(31) + +#define TBT_SET_ADAPTER(a) (((a) & 1) << 16) + +/* TBT3 Cable Discover Mode VDO bits */ +#define TBT_CABLE_SPEED(_vdo_) (((_vdo_) & GENMASK(18, 16)) >> 16) +#define TBT_CABLE_USB3_GEN1 1 +#define TBT_CABLE_USB3_PASSIVE 2 +#define TBT_CABLE_10_AND_20GBPS 3 +#define TBT_CABLE_ROUNDED BIT(19) +#define TBT_CABLE_OPTICAL BIT(21) +#define TBT_CABLE_RETIMER BIT(22) +#define TBT_CABLE_LINK_TRAINING BIT(23) + +#define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) + +/* TBT3 Device Enter Mode VDO bits */ +#define TBT_ENTER_MODE_CABLE_SPEED(s) TBT_SET_CABLE_SPEED(s) +#define TBT_ENTER_MODE_ACTIVE_CABLE BIT(24) + +#endif /* __USB_TYPEC_TBT_H */ -- cgit v1.2.3 From b2745d92bb015cc4454d4195c4ce6e2852db397e Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 27 Feb 2020 16:28:34 -0600 Subject: bus: ti-sysc: Add support for PRUSS SYSC type The PRU-ICSS present on AM33xx/AM43xx/AM57xx has a very unique SYSCFG register. The register follows the OMAP4-style SYSC_TYPE3 for Master Standby and Slave Idle, but also has two additional unique fields - STANDBY_INIT and SUB_MWAIT. The STANDBY_INIT is a control bit that is used to initiate a Standby sequence (when set) and trigger a MStandby request to the SoC's PRCM module. This same bit is also used to enable the OCP master ports (when cleared) to allow the PRU cores to access any peripherals or memory beyond the PRU subsystem. The SUB_MWAIT is a ready status field for the external access. Add support for this SYSC type. The STANDBY_INIT has to be set during suspend, without which it results in a hang in the resume sequence on AM33xx/AM43xx boards and requires a board reset to come out of the hang. Any PRU applications requiring external access are supposed to clear the STANDBY_INIT bit. Note that the PRUSS context is lost during a suspend sequence because the PRUSS module is reset and/or disabled. Signed-off-by: Suman Anna Signed-off-by: Roger Quadros [tony@atomide.com: updated quirk define number and to use -ENODEV] Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index ecd3a979a14d..c59999ce044e 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -17,6 +17,7 @@ enum ti_sysc_module_type { TI_SYSC_OMAP4_MCASP, TI_SYSC_OMAP4_USB_HOST_FS, TI_SYSC_DRA7_MCAN, + TI_SYSC_PRUSS, }; struct ti_sysc_cookie { @@ -49,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) #define SYSC_QUIRK_CLKDM_NOAUTO BIT(21) -- cgit v1.2.3 From 2333e829952fb437db915bbb17f4d8c43127d438 Mon Sep 17 00:00:00 2001 From: Yu Chen Date: Sun, 23 Feb 2020 15:28:52 +0800 Subject: workqueue: Make workqueue_init*() return void The return values of workqueue_init() and workqueue_early_int() are always 0, and there is no usage of their return value. So just make them return void. Signed-off-by: Yu Chen Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4261d1c6e87b..c86a7691e13c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -649,7 +649,7 @@ int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif -int __init workqueue_init_early(void); -int __init workqueue_init(void); +void __init workqueue_init_early(void); +void __init workqueue_init(void); #endif -- cgit v1.2.3 From 8375e74f2bca9802a4ddf431a6d7bd2ab9950f27 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 21 Feb 2020 17:40:35 -0800 Subject: driver core: Add fw_devlink kernel commandline option fwnode_operations.add_links allows creating device links from information provided by firmware. fwnode_operations.add_links is currently implemented only by OF/devicetree code and a specific case of efi. However, there's nothing preventing ACPI or other firmware types from implementing it. The OF implementation is currently controlled by a kernel commandline parameter called of_devlink. Since this feature is generic isn't limited to OF, add a generic fw_devlink kernel commandline parameter to control this feature across firmware types. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20200222014038.180923-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8feeb94b8acc..e0abafbb17f8 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -170,4 +170,6 @@ struct fwnode_operations { } while (false) #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) +extern u32 fw_devlink_get_flags(void); + #endif -- cgit v1.2.3 From 0e9f8d09d2806aa2e10a04a78f82544e4ee737a1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 25 Feb 2020 05:08:26 +0000 Subject: driver core: Remove driver_deferred_probe_check_state_continue() Now that driver_deferred_probe_check_state() works better, and we've converted the only user of driver_deferred_probe_check_state_continue() we can simply remove it and simplify some of the logic. Cc: linux-pm@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Thierry Reding Cc: Mark Brown Cc: Liam Girdwood Cc: Bjorn Andersson Cc: Saravana Kannan Cc: Todd Kjos Cc: Len Brown Cc: Pavel Machek Cc: Ulf Hansson Cc: Kevin Hilman Cc: "Rafael J. Wysocki" Cc: Rob Herring Reviewed-by: Bjorn Andersson Reviewed-by: Rafael J. Wysocki Signed-off-by: John Stultz Link: https://lore.kernel.org/r/20200225050828.56458-5-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 1188260f9a02..5242afabfaba 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -238,7 +238,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); -int driver_deferred_probe_check_state_continue(struct device *dev); void driver_init(void); /** -- cgit v1.2.3 From 64c775fb4b21ab2532555e833edf52055df5fb1c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 25 Feb 2020 05:08:27 +0000 Subject: driver core: Rename deferred_probe_timeout and make it global Since other subsystems (like regulator) have similar arbitrary timeouts for how long they try to resolve driver dependencies, rename deferred_probe_timeout to driver_deferred_probe_timeout and set it as global, so it can be shared. Cc: linux-pm@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Thierry Reding Cc: Mark Brown Cc: Liam Girdwood Cc: Bjorn Andersson Cc: Saravana Kannan Cc: Todd Kjos Cc: Len Brown Cc: Pavel Machek Cc: Ulf Hansson Cc: Kevin Hilman Cc: "Rafael J. Wysocki" Cc: Rob Herring Reviewed-by: Bjorn Andersson Reviewed-by: Rafael J. Wysocki Signed-off-by: John Stultz Link: https://lore.kernel.org/r/20200225050828.56458-6-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5242afabfaba..ee7ba5b5417e 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -236,6 +236,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) } #endif +extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); -- cgit v1.2.3 From 88fd9e5352fe05f7fe57778293aebd4cd106960b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:47 +0100 Subject: bpf: Refactor trampoline update code As we need to introduce a third type of attachment for trampolines, the flattened signature of arch_prepare_bpf_trampoline gets even more complicated. Refactor the prog and count argument to arch_prepare_bpf_trampoline to use bpf_tramp_progs to simplify the addition and accounting for new attachment types. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-2-kpsingh@chromium.org --- include/linux/bpf.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f13c78c6f29d..98ec10b23dbb 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -433,6 +433,16 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + */ +#define BPF_MAX_TRAMP_PROGS 40 + +struct bpf_tramp_progs { + struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; + int nr_progs; +}; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -455,8 +465,7 @@ struct btf_func_model { */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(void); -- cgit v1.2.3 From ae24082331d9bbaae283aafbe930a8f0eb85605a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:49 +0100 Subject: bpf: Introduce BPF_MODIFY_RETURN When multiple programs are attached, each program receives the return value from the previous program on the stack and the last program provides the return value to the attached function. The fmod_ret bpf programs are run after the fentry programs and before the fexit programs. The original function is only called if all the fmod_ret programs return 0 to avoid any unintended side-effects. The success value, i.e. 0 is not currently configurable but can be made so where user-space can specify it at load time. For example: int func_to_be_attached(int a, int b) { <--- do_fentry do_fmod_ret: if (ret != 0) goto do_fexit; original_function: } <--- do_fexit The fmod_ret program attached to this function can be defined as: SEC("fmod_ret/func_to_be_attached") int BPF_PROG(func_name, int a, int b, int ret) { // This will skip the original function logic. return 1; } The first fmod_ret program is passed 0 in its return argument. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-4-kpsingh@chromium.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98ec10b23dbb..f748b31e5888 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; -- cgit v1.2.3 From da00d2f117a08fbca262db5ea422c80a568b112b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:52 +0100 Subject: bpf: Add test ops for BPF_PROG_TYPE_TRACING The current fexit and fentry tests rely on a different program to exercise the functions they attach to. Instead of doing this, implement the test operations for tracing which will also be used for BPF_MODIFY_RETURN in a subsequent patch. Also, clean up the fexit test to use the generated skeleton. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-7-kpsingh@chromium.org --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f748b31e5888..40c53924571d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1156,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1313,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) -- cgit v1.2.3 From d6e055e8733da5ce53fc69c77e379915400068c5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 29 Feb 2020 23:23:44 +0100 Subject: PCI: Add constant PCI_STATUS_ERROR_BITS This collection of PCI error bits is used in more than one driver, so move it to the PCI core. Signed-off-by: Heiner Kallweit Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..101d71e0ad0d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -42,6 +42,13 @@ #include +#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \ + PCI_STATUS_SIG_SYSTEM_ERROR | \ + PCI_STATUS_REC_MASTER_ABORT | \ + PCI_STATUS_REC_TARGET_ABORT | \ + PCI_STATUS_SIG_TARGET_ABORT | \ + PCI_STATUS_PARITY) + /* * The PCI interface treats multi-function devices as independent * devices. The slot/function address of each device is encoded -- cgit v1.2.3 From ec5d9e87842a43be3a10ada0d5f560bbd3f31d5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 29 Feb 2020 23:24:23 +0100 Subject: PCI: Add pci_status_get_and_clear_errors Several drivers use the following code sequence: 1. Read PCI_STATUS 2. Mask out non-error bits 3. Action based on error bits set 4. Write back set error bits to clear them As this is a repeated pattern, add a helper to the PCI core. Signed-off-by: Heiner Kallweit Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 101d71e0ad0d..7beaf51e98ec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1210,6 +1210,7 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); struct pci_dev *pci_real_dma_dev(struct pci_dev *dev); +int pci_status_get_and_clear_errors(struct pci_dev *pdev); int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, irq_handler_t thread_fn, void *dev_id, -- cgit v1.2.3 From 51891498f2da78ee64dfad88fa53c9e85fb50abf Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 4 Mar 2020 11:05:17 -0700 Subject: seccomp: allow TSYNC and USER_NOTIF together The restriction introduced in 7a0df7fbc145 ("seccomp: Make NEW_LISTENER and TSYNC flags exclusive") is mostly artificial: there is enough information in a seccomp user notification to tell which thread triggered a notification. The reason it was introduced is because TSYNC makes the syscall return a thread-id on failure, and NEW_LISTENER returns an fd, and there's no way to distinguish between these two cases (well, I suppose the caller could check all fds it has, then do the syscall, and if the return value was an fd that already existed, then it must be a thread id, but bleh). Matthew would like to use these two flags together in the Chrome sandbox which wants to use TSYNC for video drivers and NEW_LISTENER to proxy syscalls. So, let's fix this ugliness by adding another flag, TSYNC_ESRCH, which tells the kernel to just return -ESRCH on a TSYNC error. This way, NEW_LISTENER (and any subsequent seccomp() commands that want to return positive values) don't conflict with each other. Suggested-by: Matthew Denton Signed-off-by: Tycho Andersen Link: https://lore.kernel.org/r/20200304180517.23867-1-tycho@tycho.ws Signed-off-by: Kees Cook --- include/linux/seccomp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 03583b6d1416..4192369b8418 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -7,7 +7,8 @@ #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ - SECCOMP_FILTER_FLAG_NEW_LISTENER) + SECCOMP_FILTER_FLAG_NEW_LISTENER | \ + SECCOMP_FILTER_FLAG_TSYNC_ESRCH) #ifdef CONFIG_SECCOMP -- cgit v1.2.3 From 1326034b3ce7073e3ed74bd0f4d24afee96a9e07 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 19 Feb 2020 21:05:17 +0200 Subject: net/mlx5: Expose raw packet pacing APIs Expose raw packet pacing APIs to be used by DEVX based applications. The existing code was refactored to have a single flow with the new raw APIs. The new raw APIs considered the input of 'pp_rate_limit_context', uid, 'dedicated', upon looking for an existing entry. This raw mode enables future device specification data in the raw context without changing the existing logic and code. The ability to ask for a dedicated entry gives control for application to allocate entries according to its needs. A dedicated entry may not be used by some other process and it also enables the process spreading its resources to some different entries for use different hardware resources as part of enforcing the rate. The counter per entry was changed to be u64 to prevent any option to overflow. Signed-off-by: Yishai Hadas Acked-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 11 ++++++++--- include/linux/mlx5/mlx5_ifc.h | 26 ++++++++++++++++---------- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 277a51d3ec40..f2b4225ed650 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -518,9 +518,11 @@ struct mlx5_rate_limit { }; struct mlx5_rl_entry { - struct mlx5_rate_limit rl; - u16 index; - u16 refcount; + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)]; + u16 index; + u64 refcount; + u16 uid; + u8 dedicated : 1; }; struct mlx5_rl_table { @@ -1007,6 +1009,9 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, struct mlx5_rate_limit *rl); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index); +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index); bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, struct mlx5_rate_limit *rl_1); int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..7d89ab64b372 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -810,7 +810,9 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x19]; + u8 reserved_at_7[0x4]; + u8 packet_pacing_uid[0x1]; + u8 reserved_at_c[0x14]; u8 reserved_at_20[0x20]; @@ -8262,9 +8264,20 @@ struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_set_pp_rate_limit_context_bits { + u8 rate_limit[0x20]; + + u8 burst_upper_bound[0x20]; + + u8 reserved_at_40[0x10]; + u8 typical_packet_size[0x10]; + + u8 reserved_at_60[0x120]; +}; + struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -8274,14 +8287,7 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 reserved_at_60[0x20]; - u8 rate_limit[0x20]; - - u8 burst_upper_bound[0x20]; - - u8 reserved_at_c0[0x10]; - u8 typical_packet_size[0x10]; - - u8 reserved_at_e0[0x120]; + struct mlx5_ifc_set_pp_rate_limit_context_bits ctx; }; struct mlx5_ifc_access_register_out_bits { -- cgit v1.2.3 From 6a726824aaa3adaaf3bcfca3b471408e225f33d6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Mar 2020 00:00:39 +0200 Subject: spi: Do spi_take_timestamp_pre for as many times as necessary When dealing with a SPI controller driver that is sending more than 1 byte at once (or the entire buffer at once), and the SPI peripheral driver has requested timestamping for a byte in the middle of the buffer, we find that spi_take_timestamp_pre never records a "pre" timestamp. This happens because the function currently expects to be called with the "progress" argument >= to what the peripheral has requested to be timestamped. But clearly there are cases when that isn't going to fly. And since we can't change the past when we realize that the opportunity to take a "pre" timestamp has just passed and there isn't going to be another one, the approach taken is to keep recording the "pre" timestamp on each call, overwriting the previously recorded one until the "post" timestamp is also taken. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200304220044.11193-8-olteanv@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 600e3793303e..87105272879b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -933,8 +933,7 @@ struct spi_transfer { struct ptp_system_timestamp *ptp_sts; - bool timestamped_pre; - bool timestamped_post; + bool timestamped; struct list_head transfer_list; }; -- cgit v1.2.3 From 95cddcb5cc202d3f2499596b9af5b77536c5f86a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Mar 2020 21:15:31 -0800 Subject: ethtool: add infrastructure for centralized checking of coalescing parameters Linux supports 22 different interrupt coalescing parameters. No driver implements them all. Some drivers just ignore the ones they don't support, while others have to carry a long list of checks to reject unsupported settings. To simplify the drivers add the ability to specify inside ethtool_ops which parameters are supported and let the core reject attempts to set any other one. This commit makes the mechanism an opt-in, only drivers which set ethtool_opts->coalesce_types to a non-zero value will have the checks enforced. The same mask is used for global and per queue settings. v3: - move the (temporary) check if driver defines types earlier (Michal) - rename used_types -> nonzero_params, and coalesce_types -> supported_coalesce_params (Alex) - use EOPNOTSUPP instead of EINVAL (Andrew, Michal) Leaving the long series of ifs for now, it seems nice to be able to grep for the field and flag names. This will probably have to be revisited once netlink support lands. Signed-off-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Michal Kubecek Reviewed-by: Andrew Lunn Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 23373978cb3c..e464c946bca4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -177,8 +177,44 @@ void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, const unsigned long *src); +#define ETHTOOL_COALESCE_RX_USECS BIT(0) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES BIT(1) +#define ETHTOOL_COALESCE_RX_USECS_IRQ BIT(2) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ BIT(3) +#define ETHTOOL_COALESCE_TX_USECS BIT(4) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES BIT(5) +#define ETHTOOL_COALESCE_TX_USECS_IRQ BIT(6) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ BIT(7) +#define ETHTOOL_COALESCE_STATS_BLOCK_USECS BIT(8) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_RX BIT(9) +#define ETHTOOL_COALESCE_USE_ADAPTIVE_TX BIT(10) +#define ETHTOOL_COALESCE_PKT_RATE_LOW BIT(11) +#define ETHTOOL_COALESCE_RX_USECS_LOW BIT(12) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW BIT(13) +#define ETHTOOL_COALESCE_TX_USECS_LOW BIT(14) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW BIT(15) +#define ETHTOOL_COALESCE_PKT_RATE_HIGH BIT(16) +#define ETHTOOL_COALESCE_RX_USECS_HIGH BIT(17) +#define ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH BIT(18) +#define ETHTOOL_COALESCE_TX_USECS_HIGH BIT(19) +#define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH BIT(20) +#define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL BIT(21) + +#define ETHTOOL_COALESCE_USECS \ + (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) +#define ETHTOOL_COALESCE_MAX_FRAMES \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES | ETHTOOL_COALESCE_TX_MAX_FRAMES) +#define ETHTOOL_COALESCE_USECS_IRQ \ + (ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_TX_USECS_IRQ) +#define ETHTOOL_COALESCE_MAX_FRAMES_IRQ \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) +#define ETHTOOL_COALESCE_USE_ADAPTIVE \ + (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) + /** * struct ethtool_ops - optional netdev operations + * @supported_coalesce_params: supported types of interrupt coalescing. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -207,8 +243,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * or zero. * @get_coalesce: Get interrupt coalescing parameters. Returns a negative * error code or zero. - * @set_coalesce: Set interrupt coalescing parameters. Returns a negative - * error code or zero. + * @set_coalesce: Set interrupt coalescing parameters. Supported coalescing + * types should be set in @supported_coalesce_params. + * Returns a negative error code or zero. * @get_ringparam: Report ring sizes * @set_ringparam: Set ring sizes. Returns a negative error code or zero. * @get_pauseparam: Report pause parameters @@ -292,7 +329,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * @set_per_queue_coalesce: Set interrupt coalescing parameters per queue. * It must check that the given queue number is valid. If neither a RX nor * a TX queue has this number, return -EINVAL. If only a RX queue or a TX - * queue has this number, ignore the inapplicable fields. + * queue has this number, ignore the inapplicable fields. Supported + * coalescing types should be set in @supported_coalesce_params. * Returns a negative error code or zero. * @get_link_ksettings: Get various device settings including Ethernet link * settings. The %cmd and %link_mode_masks_nwords fields should be @@ -323,6 +361,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * of the generic netdev features interface. */ struct ethtool_ops { + u32 supported_coalesce_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); -- cgit v1.2.3 From 780d2a9c86dc12594e263752cd8426a5794f1cc8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Mar 2020 15:09:19 +0100 Subject: include/bitmap.h: add missing parameter in docs bitmap_find_next_zero_area_off() has an additional parameter which was not specified in the list of functions. Add it. Fixes: 5e19b013f55a ("lib: bitmap: add alignment offset for bitmap_find_next_zero_area()") Signed-off-by: Wolfram Sang Signed-off-by: Dennis Zhou --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index e52ceb1a73d3..804600f7dc35 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -50,7 +50,7 @@ * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area - * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above + * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest -- cgit v1.2.3 From a392d26f32cdd87e09b1ea3849db79cfc4eae745 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 Mar 2020 15:09:20 +0100 Subject: include/bitmap.h: add new functions to documentation I found these functions only by chance although I was looking exactly for something like them. So, add them to the list of functions to make them more visible. Fixes: e837dfde15a4 ("bitmap: genericize percpu bitmap region iterators") Signed-off-by: Wolfram Sang Signed-off-by: Dennis Zhou --- include/linux/bitmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 804600f7dc35..99058eb81042 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -51,6 +51,12 @@ * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above + * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region + * bitmap_next_set_region(map, &start, &end, nbits) Find next set region + * bitmap_for_each_clear_region(map, rs, re, start, end) + * Iterate over all clear regions + * bitmap_for_each_set_region(map, rs, re, start, end) + * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest -- cgit v1.2.3 From aaca9408078914380fbfd8aef3c38a34b515a654 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Mar 2020 09:16:40 +0200 Subject: net: sched: Make FIFO Qdisc offloadable Invoke ndo_setup_tc() as appropriate to signal init / replacement, destroying and dumping of pFIFO / bFIFO Qdisc. A lot of the FIFO logic is used for pFIFO_head_drop as well, but that's a semantically very different Qdisc that isn't really in the same boat as pFIFO / bFIFO. Split some of the functions to keep the Qdisc intact. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b6fedd54cd8e..654808bfad83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -853,6 +853,7 @@ enum tc_setup_type { TC_SETUP_FT, TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, + TC_SETUP_QDISC_FIFO, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From acc670dba9f57298a6afd7cdfc9a7254292bbec9 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Wed, 26 Feb 2020 15:12:06 +0800 Subject: uacce: unmap remaining mmapping from user space When uacce parent device module is removed, user app may still keep the mmaped area, which can be accessed unsafely. When rmmod, Parent device driver will call uacce_remove, which unmap all remaining mapping from user space for safety. VM_FAULT_SIGBUS is also reported to user space accordingly. Suggested-by: Dave Jiang Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- include/linux/uacce.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 904a461591a3..0e215e6d0534 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -98,6 +98,7 @@ struct uacce_queue { * @priv: private pointer of the uacce * @mm_list: list head of uacce_mm->list * @mm_lock: lock for mm_list + * @inode: core vfs */ struct uacce_device { const char *algs; @@ -113,6 +114,7 @@ struct uacce_device { void *priv; struct list_head mm_list; struct mutex mm_lock; + struct inode *inode; }; /** -- cgit v1.2.3 From 70c0923b0ef10b1c8d8f78fb50fcaef8eaae619d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 2 Mar 2020 18:25:00 -0800 Subject: PCI: Introduce pci_get_dsn Several device drivers read their Device Serial Number from the PCIe extended config space. Introduce a new helper function, pci_get_dsn(). This function reads the eight bytes of the DSN and returns them as a u64. If the capability does not exist for the device, the function returns 0. Signed-off-by: Jacob Keller Cc: Bjorn Helgaas Cc: Jeff Kirsher Cc: Michael Chan Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7beaf51e98ec..fc54b8922e66 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1052,6 +1052,8 @@ int pci_find_ht_capability(struct pci_dev *dev, int ht_cap); int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); +u64 pci_get_dsn(struct pci_dev *dev); + struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, @@ -1707,6 +1709,9 @@ static inline int pci_find_next_capability(struct pci_dev *dev, u8 post, static inline int pci_find_ext_capability(struct pci_dev *dev, int cap) { return 0; } +static inline u64 pci_get_dsn(struct pci_dev *dev) +{ return 0; } + /* Power management related routines */ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } -- cgit v1.2.3 From f400991bf872debffb01c46da882dc97d7e3248e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:48 +0100 Subject: vt: switch vt_dont_switch to bool vt_dont_switch is pure boolean, no need for whole char. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-6-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/vt_kern.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 8dc77e40bc03..ded5c48598f3 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -135,7 +135,7 @@ extern int do_unbind_con_driver(const struct consw *csw, int first, int last, int deflt); int vty_init(const struct file_operations *console_fops); -extern char vt_dont_switch; +extern bool vt_dont_switch; extern int default_utf8; extern int global_cursor_default; -- cgit v1.2.3 From a10df4910cdbce3c606846467c7e6fc2e469951d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 19 Feb 2020 08:39:49 +0100 Subject: vt: vt_kern.h, remove extern from functions Unify the declarations of functions in vt_kern.h: some are with extern, some are not. Remove extern from the former as it is not needed for functions. Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20200219073951.16151-7-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/vt_kern.h | 62 ++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index ded5c48598f3..abf5bccf906a 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -28,8 +28,9 @@ #define BROKEN_GRAPHICS_PROGRAMS 1 #endif -extern void kd_mksound(unsigned int hz, unsigned int ticks); -extern int kbd_rate(struct kbd_repeat *rep); +void kd_mksound(unsigned int hz, unsigned int ticks); +int kbd_rate(struct kbd_repeat *rep); + extern int fg_console, last_console, want_console; /* console.c */ @@ -131,8 +132,8 @@ void vt_event_post(unsigned int event, unsigned int old, unsigned int new); int vt_waitactive(int n); void change_console(struct vc_data *new_vc); void reset_vc(struct vc_data *vc); -extern int do_unbind_con_driver(const struct consw *csw, int first, int last, - int deflt); +int do_unbind_con_driver(const struct consw *csw, int first, int last, + int deflt); int vty_init(const struct file_operations *console_fops); extern bool vt_dont_switch; @@ -146,7 +147,7 @@ struct vt_spawn_console { }; extern struct vt_spawn_console vt_spawn_con; -extern int vt_move_to_console(unsigned int vt, int alloc); +int vt_move_to_console(unsigned int vt, int alloc); /* Interfaces for VC notification of character events (for accessibility etc) */ @@ -155,35 +156,34 @@ struct vt_notifier_param { unsigned int c; /* Printed char */ }; -extern int register_vt_notifier(struct notifier_block *nb); -extern int unregister_vt_notifier(struct notifier_block *nb); +int register_vt_notifier(struct notifier_block *nb); +int unregister_vt_notifier(struct notifier_block *nb); -extern void hide_boot_cursor(bool hide); +void hide_boot_cursor(bool hide); /* keyboard provided interfaces */ -extern int vt_do_diacrit(unsigned int cmd, void __user *up, int eperm); -extern int vt_do_kdskbmode(int console, unsigned int arg); -extern int vt_do_kdskbmeta(int console, unsigned int arg); -extern int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc, - int perm); -extern int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, - int perm, int console); -extern int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, - int perm); -extern int vt_do_kdskled(int console, int cmd, unsigned long arg, int perm); -extern int vt_do_kdgkbmode(int console); -extern int vt_do_kdgkbmeta(int console); -extern void vt_reset_unicode(int console); -extern int vt_get_shift_state(void); -extern void vt_reset_keyboard(int console); -extern int vt_get_leds(int console, int flag); -extern int vt_get_kbd_mode_bit(int console, int bit); -extern void vt_set_kbd_mode_bit(int console, int bit); -extern void vt_clr_kbd_mode_bit(int console, int bit); -extern void vt_set_led_state(int console, int leds); -extern void vt_set_led_state(int console, int leds); -extern void vt_kbd_con_start(int console); -extern void vt_kbd_con_stop(int console); +int vt_do_diacrit(unsigned int cmd, void __user *up, int eperm); +int vt_do_kdskbmode(int console, unsigned int arg); +int vt_do_kdskbmeta(int console, unsigned int arg); +int vt_do_kbkeycode_ioctl(int cmd, struct kbkeycode __user *user_kbkc, + int perm); +int vt_do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, + int console); +int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm); +int vt_do_kdskled(int console, int cmd, unsigned long arg, int perm); +int vt_do_kdgkbmode(int console); +int vt_do_kdgkbmeta(int console); +void vt_reset_unicode(int console); +int vt_get_shift_state(void); +void vt_reset_keyboard(int console); +int vt_get_leds(int console, int flag); +int vt_get_kbd_mode_bit(int console, int bit); +void vt_set_kbd_mode_bit(int console, int bit); +void vt_clr_kbd_mode_bit(int console, int bit); +void vt_set_led_state(int console, int leds); +void vt_set_led_state(int console, int leds); +void vt_kbd_con_start(int console); +void vt_kbd_con_stop(int console); void vc_scrolldelta_helper(struct vc_data *c, int lines, unsigned int rolled_over, void *_base, unsigned int size); -- cgit v1.2.3 From 6e24628d78e4785385876125cba62315ca3b04b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:29 -0800 Subject: lib: Introduce generic min-heap Supports push, pop and converting an array into a heap. If the sense of the compare function is inverted then it can provide a max-heap. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-3-irogers@google.com --- include/linux/min_heap.h | 134 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 include/linux/min_heap.h (limited to 'include/linux') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h new file mode 100644 index 000000000000..44077837385f --- /dev/null +++ b/include/linux/min_heap.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MIN_HEAP_H +#define _LINUX_MIN_HEAP_H + +#include +#include +#include + +/** + * struct min_heap - Data structure to hold a min-heap. + * @data: Start of array holding the heap elements. + * @nr: Number of elements currently in the heap. + * @size: Maximum number of elements that can be held in current storage. + */ +struct min_heap { + void *data; + int nr; + int size; +}; + +/** + * struct min_heap_callbacks - Data/functions to customise the min_heap. + * @elem_size: The nr of each element in bytes. + * @less: Partial order function for this heap. + * @swp: Swap elements function. + */ +struct min_heap_callbacks { + int elem_size; + bool (*less)(const void *lhs, const void *rhs); + void (*swp)(void *lhs, void *rhs); +}; + +/* Sift the element at pos down the heap. */ +static __always_inline +void min_heapify(struct min_heap *heap, int pos, + const struct min_heap_callbacks *func) +{ + void *left, *right, *parent, *smallest; + void *data = heap->data; + + for (;;) { + if (pos * 2 + 1 >= heap->nr) + break; + + left = data + ((pos * 2 + 1) * func->elem_size); + parent = data + (pos * func->elem_size); + smallest = parent; + if (func->less(left, smallest)) + smallest = left; + + if (pos * 2 + 2 < heap->nr) { + right = data + ((pos * 2 + 2) * func->elem_size); + if (func->less(right, smallest)) + smallest = right; + } + if (smallest == parent) + break; + func->swp(smallest, parent); + if (smallest == left) + pos = (pos * 2) + 1; + else + pos = (pos * 2) + 2; + } +} + +/* Floyd's approach to heapification that is O(nr). */ +static __always_inline +void min_heapify_all(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + int i; + + for (i = heap->nr / 2; i >= 0; i--) + min_heapify(heap, i, func); +} + +/* Remove minimum element from the heap, O(log2(nr)). */ +static __always_inline +void min_heap_pop(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); + min_heapify(heap, 0, func); +} + +/* + * Remove the minimum element and then push the given element. The + * implementation performs 1 sift (O(log2(nr))) and is therefore more + * efficient than a pop followed by a push that does 2. + */ +static __always_inline +void min_heap_pop_push(struct min_heap *heap, + const void *element, + const struct min_heap_callbacks *func) +{ + memcpy(heap->data, element, func->elem_size); + min_heapify(heap, 0, func); +} + +/* Push an element on to the heap, O(log2(nr)). */ +static __always_inline +void min_heap_push(struct min_heap *heap, const void *element, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + void *child, *parent; + int pos; + + if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) + return; + + /* Place at the end of data. */ + pos = heap->nr; + memcpy(data + (pos * func->elem_size), element, func->elem_size); + heap->nr++; + + /* Sift child at pos up. */ + for (; pos > 0; pos = (pos - 1) / 2) { + child = data + (pos * func->elem_size); + parent = data + ((pos - 1) / 2) * func->elem_size; + if (func->less(parent, child)) + break; + func->swp(parent, child); + } +} + +#endif /* _LINUX_MIN_HEAP_H */ -- cgit v1.2.3 From 836196beb377e59e54ec9e04f7402076ef7a8bd8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:31 -0800 Subject: perf/core: Add per perf_cpu_context min_heap storage The storage required for visit_groups_merge's min heap needs to vary in order to support more iterators, such as when multiple nested cgroups' events are being visited. This change allows for 2 iterators and doesn't support growth. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-5-irogers@google.com --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 68e21e828893..8768a39b5258 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -862,6 +862,13 @@ struct perf_cpu_context { int sched_cb_usage; int online; + /* + * Per-CPU storage for iterators used in visit_groups_merge. The default + * storage is of size 2 to hold the CPU and any CPU event iterators. + */ + int heap_size; + struct perf_event **heap; + struct perf_event *heap_default[2]; }; struct perf_output_handle { -- cgit v1.2.3 From 36a0df85d2e85e1929e8cd607e19243e5a2754e7 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Fri, 21 Feb 2020 19:52:06 -0500 Subject: sched/topology: Add callback to read per CPU thermal pressure Introduce the arch_scale_thermal_pressure() callback to retrieve per CPU thermal pressure. Signed-off-by: Thara Gopinath Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200222005213.3873-3-thara.gopinath@linaro.org --- include/linux/sched/topology.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index f341163fedc9..af9319e4cfb9 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -225,6 +225,14 @@ unsigned long arch_scale_cpu_capacity(int cpu) } #endif +#ifndef arch_scale_thermal_pressure +static __always_inline +unsigned long arch_scale_thermal_pressure(int cpu) +{ + return 0; +} +#endif + static inline int task_node(const struct task_struct *p) { return cpu_to_node(task_cpu(p)); -- cgit v1.2.3 From ad58cc5cc50ca8423cf630778594bd38252a0a58 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Fri, 21 Feb 2020 19:52:07 -0500 Subject: drivers/base/arch_topology: Add infrastructure to store and update instantaneous thermal pressure Add architecture specific APIs to update and track thermal pressure on a per CPU basis. A per CPU variable thermal_pressure is introduced to keep track of instantaneous per CPU thermal pressure. Thermal pressure is the delta between maximum capacity and capped capacity due to a thermal event. topology_get_thermal_pressure can be hooked into the scheduler specified arch_scale_thermal_pressure to retrieve instantaneous thermal pressure of a CPU. arch_set_thermal_pressure can be used to update the thermal pressure. Considering topology_get_thermal_pressure reads thermal_pressure and arch_set_thermal_pressure writes into thermal_pressure, one can argue for some sort of locking mechanism to avoid a stale value. But considering topology_get_thermal_pressure can be called from a system critical path like scheduler tick function, a locking mechanism is not ideal. This means that it is possible the thermal_pressure value used to calculate average thermal pressure for a CPU can be stale for up to 1 tick period. Signed-off-by: Thara Gopinath Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200222005213.3873-4-thara.gopinath@linaro.org --- include/linux/arch_topology.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 3015ecbb90b1..88a115e81f27 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -33,6 +33,16 @@ unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +DECLARE_PER_CPU(unsigned long, thermal_pressure); + +static inline unsigned long topology_get_thermal_pressure(int cpu) +{ + return per_cpu(thermal_pressure, cpu); +} + +void arch_set_thermal_pressure(struct cpumask *cpus, + unsigned long th_pressure); + struct cpu_topology { int thread_id; int core_id; -- cgit v1.2.3 From bbce8eaa603236bf958b0d24e6377b3f3b623991 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:25 +0000 Subject: cpufreq: add function to get the hardware max frequency Add weak function to return the hardware maximum frequency of a CPU, with the default implementation returning cpuinfo.max_freq, which is the best information we can generically get from the cpufreq framework. The default can be overwritten by a strong function in platforms that want to provide an alternative implementation, with more accurate information, obtained either from hardware or firmware. Signed-off-by: Ionela Voinescu Reviewed-by: Valentin Schneider Acked-by: Viresh Kumar Cc: Viresh Kumar Cc: Rafael J. Wysocki Signed-off-by: Catalin Marinas --- include/linux/cpufreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0fb561d1b524..f7240251a949 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -205,6 +205,7 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); +unsigned int cpufreq_get_hw_max_freq(unsigned int cpu); void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); @@ -232,6 +233,10 @@ static inline unsigned int cpufreq_quick_get_max(unsigned int cpu) { return 0; } +static inline unsigned int cpufreq_get_hw_max_freq(unsigned int cpu) +{ + return 0; +} static inline void disable_cpufreq(void) { } #endif -- cgit v1.2.3 From cd0ed03a8903a0b0c6fc36e32d133d1ddfe70cd6 Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Thu, 5 Mar 2020 09:06:26 +0000 Subject: arm64: use activity monitors for frequency invariance The Frequency Invariance Engine (FIE) is providing a frequency scaling correction factor that helps achieve more accurate load-tracking. So far, for arm and arm64 platforms, this scale factor has been obtained based on the ratio between the current frequency and the maximum supported frequency recorded by the cpufreq policy. The setting of this scale factor is triggered from cpufreq drivers by calling arch_set_freq_scale. The current frequency used in computation is the frequency requested by a governor, but it may not be the frequency that was implemented by the platform. This correction factor can also be obtained using a core counter and a constant counter to get information on the performance (frequency based only) obtained in a period of time. This will more accurately reflect the actual current frequency of the CPU, compared with the alternative implementation that reflects the request of a performance level from the OS. Therefore, implement arch_scale_freq_tick to use activity monitors, if present, for the computation of the frequency scale factor. The use of AMU counters depends on: - CONFIG_ARM64_AMU_EXTN - depents on the AMU extension being present - CONFIG_CPU_FREQ - the current frequency obtained using counter information is divided by the maximum frequency obtained from the cpufreq policy. While it is possible to have a combination of CPUs in the system with and without support for activity monitors, the use of counters for frequency invariance is only enabled for a CPU if all related CPUs (CPUs in the same frequency domain) support and have enabled the core and constant activity monitor counters. In this way, there is a clear separation between the policies for which arch_set_freq_scale (cpufreq based FIE) is used, and the policies for which arch_scale_freq_tick (counter based FIE) is used to set the frequency scale factor. For this purpose, a late_initcall_sync is registered to trigger validation work for policies that will enable or disable the use of AMU counters for frequency invariance. If CONFIG_CPU_FREQ is not defined, the use of counters is enabled on all CPUs only if all possible CPUs correctly support the necessary counters. Signed-off-by: Ionela Voinescu Reviewed-by: Lukasz Luba Acked-by: Sudeep Holla Cc: Sudeep Holla Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Catalin Marinas --- include/linux/arch_topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 3015ecbb90b1..1ccdddb541a7 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -33,6 +33,8 @@ unsigned long topology_get_freq_scale(int cpu) return per_cpu(freq_scale, cpu); } +bool arch_freq_counters_available(struct cpumask *cpus); + struct cpu_topology { int thread_id; int core_id; -- cgit v1.2.3 From 241eaabc3c315cdfea505725a43de848f498527f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 6 Mar 2020 10:34:10 +0800 Subject: power: supply: Allow charger manager can be built as a module Allow charger manager can be built as a module like other charger drivers. Signed-off-by: Baolin Wang Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index ad19e68e1fc3..ae94dcebd936 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -248,7 +248,7 @@ struct charger_manager { u64 charging_end_time; }; -#ifdef CONFIG_CHARGER_MANAGER +#if IS_ENABLED(CONFIG_CHARGER_MANAGER) extern void cm_notify_event(struct power_supply *psy, enum cm_event_types type, char *msg); #else -- cgit v1.2.3 From 058bc104f7ca5c83d81695ee96f03dbd93bae518 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Feb 2020 14:31:06 +0100 Subject: serial: 8250: Generalize rs485 software emulation Commit e490c9144cfa ("tty: Add software emulated RS485 support for 8250") introduced support to use RTS as an rs485 Transmit Enable signal. So far the only drivers taking advantage of it are 8250_omap.c and 8250_of.c. We're about to make use of the feature in 8250_bcm2835aux.c as well. The bcm2835aux differs from omap chips by inverting the meaning of RTS in the MCR register. Moreover, omap achieves half-duplex mode by disabling the RX interrupt and clearing the RX FIFO when TX stops. The bcm2835aux requires disabling the receiver instead. Support these behavioral differences by generalizing the rs485 emulation: Introduce ->rs485_start_tx() and ->rs485_stop_tx() callbacks in struct uart_8250_port, provide generic implementations containing the existing code and use them as callbacks in 8250_omap.c and 8250_of.c. start_tx_rs485() is idempotent in that it recognizes whether RTS is already asserted. Achieve the same by introducing a tx_stopped flag in struct uart_8250_em485. This may even perform a little better on arches where memory access is faster than mmio access. Signed-off-by: Lukas Wunner Cc: Matwey V. Kornilov Link: https://lore.kernel.org/r/5ac0464ae4414708e723a1e0d52b0c1b2bd41b9b.1582895077.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 6a8e8c48c882..0901c2aa366c 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -81,6 +81,7 @@ struct uart_8250_em485 { struct hrtimer stop_tx_timer; /* "rs485 stop tx" timer */ struct hrtimer *active_timer; /* pointer to active timer */ struct uart_8250_port *port; /* for hrtimer callbacks */ + unsigned int tx_stopped:1; /* tx is currently stopped */ }; /* @@ -132,6 +133,8 @@ struct uart_8250_port { void (*dl_write)(struct uart_8250_port *, int); struct uart_8250_em485 *em485; + void (*rs485_start_tx)(struct uart_8250_port *); + void (*rs485_stop_tx)(struct uart_8250_port *); /* Serial port overrun backoff */ struct delayed_work overrun_backoff; -- cgit v1.2.3 From eaee41727e6d8a7d2b94421c25e82b00cb2fded5 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 2 Mar 2020 17:51:34 +0000 Subject: sysctl/sysrq: Remove __sysrq_enabled copy Many embedded boards have a disconnected TTL level serial which can generate some garbage that can lead to spurious false sysrq detects. Currently, sysrq can be either completely disabled for serial console or always disabled (with CONFIG_MAGIC_SYSRQ_SERIAL), since commit 732dbf3a6104 ("serial: do not accept sysrq characters via serial port") At Arista, we have such boards that can generate BREAK and random garbage. While disabling sysrq for serial console would solve the problem with spurious false sysrq triggers, it's also desirable to have a way to enable sysrq back. Having the way to enable sysrq was beneficial to debug lockups with a manual investigation in field and on the other side preventing false sysrq detections. As a preparation to add sysrq_toggle_support() call into uart, remove a private copy of sysrq_enabled from sysctl - it should reflect the actual status of sysrq. Furthermore, the private copy isn't correct already in case sysrq_always_enabled is true. So, remove __sysrq_enabled and use a getter-helper sysrq_mask() to check sysrq_key_op enabled status. Cc: Iurii Zaikin Cc: Jiri Slaby Cc: Luis Chamberlain Cc: Kees Cook Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20200302175135.269397-2-dima@arista.com Signed-off-by: Greg Kroah-Hartman --- include/linux/sysrq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 8c71874e8485..8e159e16850f 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -50,6 +50,7 @@ int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); int sysrq_toggle_support(int enable_mask); +int sysrq_mask(void); #else @@ -71,6 +72,12 @@ static inline int unregister_sysrq_key(int key, struct sysrq_key_op *op) return -EINVAL; } +static inline int sysrq_mask(void) +{ + /* Magic SysRq disabled mask */ + return 0; +} + #endif #endif /* _LINUX_SYSRQ_H */ -- cgit v1.2.3 From 68af43173d3fcece70bef49cb992c64c4c68ff23 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 2 Mar 2020 17:51:35 +0000 Subject: serial/sysrq: Add MAGIC_SYSRQ_SERIAL_SEQUENCE Many embedded boards have a disconnected TTL level serial which can generate some garbage that can lead to spurious false sysrq detects. Currently, sysrq can be either completely disabled for serial console or always disabled (with CONFIG_MAGIC_SYSRQ_SERIAL), since commit 732dbf3a6104 ("serial: do not accept sysrq characters via serial port") At Arista, we have such boards that can generate BREAK and random garbage. While disabling sysrq for serial console would solve the problem with spurious false sysrq triggers, it's also desirable to have a way to enable sysrq back. As a measure of balance between on and off options, add MAGIC_SYSRQ_SERIAL_SEQUENCE which is a string sequence that can enable sysrq if it follows BREAK on a serial line. The longer the string - the less likely it may be in the garbage. Having the way to enable sysrq was beneficial to debug lockups with a manual investigation in field and on the other side preventing false sysrq detections. Based-on-patch-by: Vasiliy Khoruzhick Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20200302175135.269397-3-dima@arista.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52404ef1694e..1f4443db5474 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -243,6 +243,7 @@ struct uart_port { unsigned long sysrq; /* sysrq timeout */ unsigned int sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; + unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; -- cgit v1.2.3 From 86f5d0f3d4995b8b8909da7eea235e5bc1ceefbe Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 2 Mar 2020 16:15:19 -0800 Subject: net/mlx5: Introduce egress acl forward-to-vport capability Add HCA_CAP.egress_acl_forward_to_vport field to check whether HW supports e-switch vport's egress acl to forward packets to other e-switch vport or not. By default E-Switch egress ACL forwards eswitch vports egress packets to their corresponding NIC/VF vports. With this cap enabled, the driver is allowed to alter this behavior and forward packets to arbitrary NIC/VF vports with the following limitations: a. Multiple processing paths are supported if all of the following conditions are met: - HCA_CAP.egress_acl_forward_to_vport is set ==1. - A destination of type Flow Table only appears once, as the last destination in the list. - Vport destination is supported if HCA_CAP.egress_acl_forward_to_vport==1. Vport must not be the Uplink. b. Flow_tag not supported. c. This table is only applicable after an FDB table is created. d. Push VLAN action is not supported. e. Pop VLAN action cannot be added concurrently to this table and FDB table. This feature will be used during port failover in bonding scenario where two VFs representors are bonded to handle failover egress traffic (VM's ingress/receive traffic). Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7d89ab64b372..07be46e713fc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -738,7 +738,7 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 flow_source[0x1]; u8 reserved_at_18[0x2]; u8 multi_fdb_encap[0x1]; - u8 reserved_at_1b[0x1]; + u8 egress_acl_forward_to_vport[0x1]; u8 fdb_multi_path_to_table[0x1]; u8 reserved_at_1d[0x3]; -- cgit v1.2.3 From bd673da6d933e3c8f652b011afba6cee0f8bbe45 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 2 Mar 2020 16:15:20 -0800 Subject: net/mlx5: Introduce TLS and IPSec objects enums Expose the TLS encryption key general object type enum correctly, and add the IPSec encryption key general object type enum. Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07be46e713fc..2e98bba12356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10489,7 +10489,8 @@ enum { }; enum { - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, }; struct mlx5_ifc_tls_static_params_bits { -- cgit v1.2.3 From dc392fc56f39a00a46d6db2d150571ccafe99734 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 2 Mar 2020 16:15:21 -0800 Subject: net/mlx5: Expose link speed directly Expose port rate as part of the port speed register fields. Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2e98bba12356..d0a678c82ccd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8423,7 +8423,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x1c]; + u8 reserved_at_24[0xc]; + u8 data_rate_oper[0x10]; u8 ext_eth_proto_capability[0x20]; -- cgit v1.2.3 From e0ebd8eb36ed850a22a9a0ca83edc4a40ad67c16 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 2 Mar 2020 16:15:22 -0800 Subject: net/mlx5: HW bit for goto chain offload support Add the HW bit definition indecating goto chain offload support. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d0a678c82ccd..9b8ff4e57002 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -414,7 +414,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_16[0x1]; u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; - u8 reserved_at_19[0x7]; + u8 reformat_and_fwd_to_table[0x1]; + u8 reserved_at_1a[0x6]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; -- cgit v1.2.3 From c16816acd08697b02a53f56f8936497a9f6f6e7a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 6 Mar 2020 14:03:43 +0100 Subject: genirq: Add protection against unsafe usage of generic_handle_irq() In general calling generic_handle_irq() with interrupts disabled from non interrupt context is harmless. For some interrupt controllers like the x86 trainwrecks this is outright dangerous as it might corrupt state if an interrupt affinity change is pending. Add infrastructure which allows to mark interrupts as unsafe and catch such usage in generic_handle_irq(). Reported-by: sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lkml.kernel.org/r/20200306130623.590923677@linutronix.de --- include/linux/irq.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3ed5a055b5f4..9315fbb87db3 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -211,6 +211,8 @@ struct irq_data { * IRQD_CAN_RESERVE - Can use reservation mode * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change * required + * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked + * from actual interrupt context. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -234,6 +236,7 @@ enum { IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), + IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -303,6 +306,16 @@ static inline bool irqd_is_single_target(struct irq_data *d) return __irqd_to_state(d) & IRQD_SINGLE_TARGET; } +static inline void irqd_set_handle_enforce_irqctx(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_HANDLE_ENFORCE_IRQCTX; +} + +static inline bool irqd_is_handle_enforce_irqctx(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_HANDLE_ENFORCE_IRQCTX; +} + static inline bool irqd_is_wakeup_set(struct irq_data *d) { return __irqd_to_state(d) & IRQD_WAKEUP_STATE; -- cgit v1.2.3 From acd26bcf362708594ea081ef55140e37d0854ed2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 6 Mar 2020 14:03:47 +0100 Subject: genirq: Provide interrupt injection mechanism Error injection mechanisms need a half ways safe way to inject interrupts as invoking generic_handle_irq() or the actual device interrupt handler directly from e.g. a debugfs write is not guaranteed to be safe. On x86 generic_handle_irq() is unsafe due to the hardware trainwreck which is the base of x86 interrupt delivery and affinity management. Move the irq debugfs injection code into a separate function which can be used by error injection code as well. The implementation prevents at least that state is corrupted, but it cannot close a very tiny race window on x86 which might result in a stale and not serviced device interrupt under very unlikely circumstances. This is explicitly for debugging and testing and not for production use or abuse in random driver code. Signed-off-by: Thomas Gleixner Tested-by: Kuppuswamy Sathyanarayanan Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Marc Zyngier Link: https://lkml.kernel.org/r/20200306130623.990928309@linutronix.de --- include/linux/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c5fe60ec6b84..80f637c3a6f3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -248,6 +248,8 @@ extern void enable_percpu_nmi(unsigned int irq, unsigned int type); extern int prepare_percpu_nmi(unsigned int irq); extern void teardown_percpu_nmi(unsigned int irq); +extern int irq_inject_interrupt(unsigned int irq); + /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); -- cgit v1.2.3 From 3f17ada8f38c57b6c81d41db6d911932b280d5b5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 10 Feb 2020 15:26:01 +0200 Subject: iio: imu: adis: add unlocked __adis_initial_startup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change splits the __adis_initial_startup() away from adis_initial_startup(). The unlocked version can be used in certain calls during probe, where races won't happen since the ADIS driver may not be registered yet with IIO. Signed-off-by: Nuno Sá Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index d2fcf45b4cef..15e75670f923 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -297,6 +297,7 @@ static inline int adis_read_reg_32(struct adis *adis, unsigned int reg, int adis_enable_irq(struct adis *adis, bool enable); int __adis_check_status(struct adis *adis); +int __adis_initial_startup(struct adis *adis); static inline int adis_check_status(struct adis *adis) { @@ -309,7 +310,17 @@ static inline int adis_check_status(struct adis *adis) return ret; } -int adis_initial_startup(struct adis *adis); +/* locked version of __adis_initial_startup() */ +static inline int adis_initial_startup(struct adis *adis) +{ + int ret; + + mutex_lock(&adis->state_lock); + ret = __adis_initial_startup(adis); + mutex_unlock(&adis->state_lock); + + return ret; +} int adis_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int error_mask, -- cgit v1.2.3 From fdcf6bbb4ed388844e74810e117ac87db1347f3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 10 Feb 2020 15:26:02 +0200 Subject: iio: imu: adis: Add self_test_reg variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a dedicated self_test_reg variable. This is also a step to let new drivers make use of `adis_initial_startup()`. Some devices use MSG_CTRL reg to request a self_test command while others use the GLOB_CMD register. Signed-off-by: Nuno Sá Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 15e75670f923..b7feca4e5f26 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -41,6 +41,7 @@ struct adis_timeout { * @glob_cmd_reg: Register address of the GLOB_CMD register * @msc_ctrl_reg: Register address of the MSC_CTRL register * @diag_stat_reg: Register address of the DIAG_STAT register + * @self_test_reg: Register address to request self test command * @status_error_msgs: Array of error messgaes * @status_error_mask: * @timeouts: Chip specific delays @@ -55,6 +56,7 @@ struct adis_data { unsigned int diag_stat_reg; unsigned int self_test_mask; + unsigned int self_test_reg; bool self_test_no_autoclear; const struct adis_timeout *timeouts; -- cgit v1.2.3 From 1fd4567026926d01a94a74d0543e324fc794aa73 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 10 Feb 2020 15:26:04 +0200 Subject: iio: imu: adis: add support product ID check in adis_initial_startup Each driver/chip that wants to validate it's product id, can now specify a 'prod_id_reg' and an expected 'prod_id' value. The 'prod_id' value is intentionally left 0 (uninitialized). There aren't (yet) any product IDs with value 0; this enforces that both 'prod_id_reg' and 'prod_id' are specified. At the very least, this routine validates that the SPI connection to the ADIS chip[s] works well. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index b7feca4e5f26..ac7cfd073804 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -41,6 +41,8 @@ struct adis_timeout { * @glob_cmd_reg: Register address of the GLOB_CMD register * @msc_ctrl_reg: Register address of the MSC_CTRL register * @diag_stat_reg: Register address of the DIAG_STAT register + * @prod_id_reg: Register address of the PROD_ID register + * @prod_id: Product ID code that should be expected when reading @prod_id_reg * @self_test_reg: Register address to request self test command * @status_error_msgs: Array of error messgaes * @status_error_mask: @@ -54,6 +56,9 @@ struct adis_data { unsigned int glob_cmd_reg; unsigned int msc_ctrl_reg; unsigned int diag_stat_reg; + unsigned int prod_id_reg; + + unsigned int prod_id; unsigned int self_test_mask; unsigned int self_test_reg; -- cgit v1.2.3 From 3543b1998dd396c66c7a9bce1a7962c10648de6e Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 25 Feb 2020 12:33:17 +0200 Subject: iio: imu: adis: add doc-string for 'adis' struct This change adds a doc-string for the 'adis' struct. It details the fields and their roles. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index ac7cfd073804..63e456aa5c8a 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -73,6 +73,20 @@ struct adis_data { bool has_paging; }; +/** + * struct adis - ADIS device instance data + * @spi: Reference to SPI device which owns this ADIS IIO device + * @trig: IIO trigger object data + * @data: ADIS chip variant specific data + * @burst: ADIS burst transfer information + * @state_lock: Lock used by the device to protect state + * @msg: SPI message object + * @xfer: SPI transfer objects to be used for a @msg + * @current_page: Some ADIS devices have registers, this selects current page + * @buffer: Data buffer for information read from the device + * @tx: DMA safe TX buffer for SPI transfers + * @rx: DMA safe RX buffer for SPI transfers + */ struct adis { struct spi_device *spi; struct iio_trigger *trig; -- cgit v1.2.3 From 2dd86ba821330840661e7102e3304309299aa635 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 25 Feb 2020 12:33:18 +0200 Subject: iio: imu: adis: update 'adis_data' struct doc-string The doc-string has been neglected over time. This change updates it with all the missing info. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 63e456aa5c8a..20339025c75f 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -43,10 +43,14 @@ struct adis_timeout { * @diag_stat_reg: Register address of the DIAG_STAT register * @prod_id_reg: Register address of the PROD_ID register * @prod_id: Product ID code that should be expected when reading @prod_id_reg + * @self_test_mask: Bitmask of supported self-test operations * @self_test_reg: Register address to request self test command + * @self_test_no_autoclear: True if device's self-test needs clear of ctrl reg * @status_error_msgs: Array of error messgaes - * @status_error_mask: + * @status_error_mask: Bitmask of errors supported by the device * @timeouts: Chip specific delays + * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable + * @has_paging: True if ADIS device has paged registers */ struct adis_data { unsigned int read_delay; -- cgit v1.2.3 From 2303248b291f537f0414a644a53483c71023136f Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 25 Feb 2020 12:33:19 +0200 Subject: iio: imu: adis: add a note better explaining state_lock The 'state_lock' mutex was renamed from 'txrx_lock' in a previous patch and is intended to be used by ADIS drivers to protect the state of devices during consecutive R/W ops. The initial patch that introduced this change did not do a good [well, any] job at explaining this. This patch adds a comment to the 'state_lock' better explaining it's use. Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 20339025c75f..dd8219138c2e 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -98,6 +98,17 @@ struct adis { const struct adis_data *data; struct adis_burst *burst; + /** + * The state_lock is meant to be used during operations that require + * a sequence of SPI R/W in order to protect the SPI transfer + * information (fields 'xfer', 'msg' & 'current_page') between + * potential concurrent accesses. + * This lock is used by all "adis_{functions}" that have to read/write + * registers. These functions also have unlocked variants + * (see "__adis_{functions}"), which don't hold this lock. + * This allows users of the ADIS library to group SPI R/W into + * the drivers, but they also must manage this lock themselves. + */ struct mutex state_lock; struct spi_message msg; struct spi_transfer *xfer; -- cgit v1.2.3 From 2ddc982a6c172c141183ac954d44f01bc4f2fe72 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 21 Feb 2020 14:06:55 +0200 Subject: iio: industrialio-core: Fix debugfs read Currently iio_debugfs_read_reg calls debugfs_reg_access every time it is ran. Reading the same hardware register multiple times during the same reading of a debugfs file can cause unintended effects. For example for each: cat iio:device0/direct_reg_access the file_operations.read function will be called at least twice. First will return the full length of the string in bytes and the second will return 0. This patch makes iio_debugfs_read_reg to call debugfs_reg_access only when the user's buffer position (*ppos) is 0. (meaning it is the beginning of a new reading of the debugfs file). Fixes: e553f182d55b ("staging: iio: core: Introduce debugfs support, add support for direct register access") Signed-off-by: Alexandru Ardelean Signed-off-by: Alexandru Tachici Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 862ce0019eba..eed58ed2f368 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -568,6 +568,8 @@ struct iio_dev { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; unsigned cached_reg_addr; + char read_buf[20]; + unsigned int read_buf_len; #endif }; -- cgit v1.2.3 From d7f5f3c89c1a2344e88842ba0de327cc0098583e Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 5 Mar 2020 22:28:15 -0600 Subject: remoteproc: add IPA notification to q6v5 driver Set up a subdev in the q6v5 modem remoteproc driver that generates event notifications for the IPA driver to use for initialization and recovery following a modem shutdown or crash. A pair of new functions provides a way for the IPA driver to register and deregister a notification callback function that will be called whenever modem events (about to boot, running, about to shut down, etc.) occur. A void pointer value (provided by the IPA driver at registration time) and an event type are supplied to the callback function. One event, MODEM_REMOVING, is signaled whenever the q6v5 driver is about to remove the notification subdevice. It requires the IPA driver de-register its callback. This sub-device is only used by the modem subsystem (MSS) driver, so the code that adds the new subdev and allows registration and deregistration of the notifier is found in "qcom_q6v6_mss.c". Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- include/linux/remoteproc/qcom_q6v5_ipa_notify.h | 82 +++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 include/linux/remoteproc/qcom_q6v5_ipa_notify.h (limited to 'include/linux') diff --git a/include/linux/remoteproc/qcom_q6v5_ipa_notify.h b/include/linux/remoteproc/qcom_q6v5_ipa_notify.h new file mode 100644 index 000000000000..0820edc0ab7d --- /dev/null +++ b/include/linux/remoteproc/qcom_q6v5_ipa_notify.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (C) 2019 Linaro Ltd. */ + +#ifndef __QCOM_Q6V5_IPA_NOTIFY_H__ +#define __QCOM_Q6V5_IPA_NOTIFY_H__ + +#if IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) + +#include + +enum qcom_rproc_event { + MODEM_STARTING = 0, /* Modem is about to be started */ + MODEM_RUNNING = 1, /* Startup complete; modem is operational */ + MODEM_STOPPING = 2, /* Modem is about to shut down */ + MODEM_CRASHED = 3, /* Modem has crashed (implies stopping) */ + MODEM_OFFLINE = 4, /* Modem is now offline */ + MODEM_REMOVING = 5, /* Modem is about to be removed */ +}; + +typedef void (*qcom_ipa_notify_t)(void *data, enum qcom_rproc_event event); + +struct qcom_rproc_ipa_notify { + struct rproc_subdev subdev; + + qcom_ipa_notify_t notify; + void *data; +}; + +/** + * qcom_add_ipa_notify_subdev() - Register IPA notification subdevice + * @rproc: rproc handle + * @ipa_notify: IPA notification subdevice handle + * + * Register the @ipa_notify subdevice with the @rproc so modem events + * can be sent to IPA when they occur. + * + * This is defined in "qcom_q6v5_ipa_notify.c". + */ +void qcom_add_ipa_notify_subdev(struct rproc *rproc, + struct qcom_rproc_ipa_notify *ipa_notify); + +/** + * qcom_remove_ipa_notify_subdev() - Remove IPA SSR subdevice + * @rproc: rproc handle + * @ipa_notify: IPA notification subdevice handle + * + * This is defined in "qcom_q6v5_ipa_notify.c". + */ +void qcom_remove_ipa_notify_subdev(struct rproc *rproc, + struct qcom_rproc_ipa_notify *ipa_notify); + +/** + * qcom_register_ipa_notify() - Register IPA notification function + * @rproc: Remote processor handle + * @notify: Non-null IPA notification callback function pointer + * @data: Data supplied to IPA notification callback function + * + * @Return: 0 if successful, or a negative error code otherwise + * + * This is defined in "qcom_q6v5_mss.c". + */ +int qcom_register_ipa_notify(struct rproc *rproc, qcom_ipa_notify_t notify, + void *data); +/** + * qcom_deregister_ipa_notify() - Deregister IPA notification function + * @rproc: Remote processor handle + * + * This is defined in "qcom_q6v5_mss.c". + */ +void qcom_deregister_ipa_notify(struct rproc *rproc); + +#else /* !IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) */ + +struct qcom_rproc_ipa_notify { /* empty */ }; + +#define qcom_add_ipa_notify_subdev(rproc, ipa_notify) /* no-op */ +#define qcom_remove_ipa_notify_subdev(rproc, ipa_notify) /* no-op */ + +#endif /* !IS_ENABLED(CONFIG_QCOM_Q6V5_IPA_NOTIFY) */ + +#endif /* !__QCOM_Q6V5_IPA_NOTIFY_H__ */ -- cgit v1.2.3 From 0cfb07c5f221be5a1219f590f576c48de4efedf3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jan 2020 17:49:45 +0100 Subject: zorro: Make zorro_match_device() static Unlike its PCI counterpart, zorro_match_device() was never used outside the Zorro bus code. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200112164949.20196-2-geert@linux-m68k.org --- include/linux/zorro.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index 63fbba0740c2..cb72515b0ac1 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -70,7 +70,6 @@ struct zorro_driver { /* New-style probing */ extern int zorro_register_driver(struct zorro_driver *); extern void zorro_unregister_driver(struct zorro_driver *); -extern const struct zorro_device_id *zorro_match_device(const struct zorro_device_id *ids, const struct zorro_dev *z); static inline struct zorro_driver *zorro_dev_driver(const struct zorro_dev *z) { return z->driver; -- cgit v1.2.3 From 1c5b915a13068313a0809404964737bf5aedace7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jan 2020 17:49:48 +0100 Subject: zorro: Remove unused zorro_dev_driver() This function was never used. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200112164949.20196-5-geert@linux-m68k.org --- include/linux/zorro.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index cb72515b0ac1..22f3f80fbcb5 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -70,10 +70,6 @@ struct zorro_driver { /* New-style probing */ extern int zorro_register_driver(struct zorro_driver *); extern void zorro_unregister_driver(struct zorro_driver *); -static inline struct zorro_driver *zorro_dev_driver(const struct zorro_dev *z) -{ - return z->driver; -} extern unsigned int zorro_num_autocon; /* # of autoconfig devices found */ -- cgit v1.2.3 From 7332bc4dc89c87a64a19dab96a41525d5630e9cf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jan 2020 17:49:49 +0100 Subject: zorro: Move zorro_bus_type to bus-private header file zorro_bus_type was never used outside the Zorro bus code. Hence move it from the public to the bus-private header file. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200112164949.20196-6-geert@linux-m68k.org --- include/linux/zorro.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index 22f3f80fbcb5..e2e4de188d84 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -40,13 +40,6 @@ struct zorro_dev { #define to_zorro_dev(n) container_of(n, struct zorro_dev, dev) - /* - * Zorro bus - */ - -extern struct bus_type zorro_bus_type; - - /* * Zorro device drivers */ -- cgit v1.2.3 From b3875759332e19d3b88da1e7258cdd37ac474d32 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jan 2020 17:56:11 +0100 Subject: dio: Make dio_match_device() static Unlike its PCI counterpart, dio_match_device() was never used outside the DIO bus code. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200112165613.20960-2-geert@linux-m68k.org --- include/linux/dio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dio.h b/include/linux/dio.h index 1470d1d943b4..ca07243690ed 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -247,7 +247,6 @@ extern int dio_create_sysfs_dev_files(struct dio_dev *); /* New-style probing */ extern int dio_register_driver(struct dio_driver *); extern void dio_unregister_driver(struct dio_driver *); -extern const struct dio_device_id *dio_match_device(const struct dio_device_id *ids, const struct dio_dev *z); static inline struct dio_driver *dio_dev_driver(const struct dio_dev *d) { return d->driver; -- cgit v1.2.3 From 435cb3eaf34e821072134afc2a61329ca2fd7549 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 12 Jan 2020 17:56:13 +0100 Subject: dio: Remove unused dio_dev_driver() This function was never used. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200112165613.20960-4-geert@linux-m68k.org --- include/linux/dio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dio.h b/include/linux/dio.h index ca07243690ed..5abd07361eb5 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -247,10 +247,6 @@ extern int dio_create_sysfs_dev_files(struct dio_dev *); /* New-style probing */ extern int dio_register_driver(struct dio_driver *); extern void dio_unregister_driver(struct dio_driver *); -static inline struct dio_driver *dio_dev_driver(const struct dio_dev *d) -{ - return d->driver; -} #define dio_resource_start(d) ((d)->resource.start) #define dio_resource_end(d) ((d)->resource.end) -- cgit v1.2.3 From f1541773af49ecd1edae29c8ac0775253a0b0760 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Sat, 8 Feb 2020 15:43:50 +0800 Subject: mtd: spinand: rework detect procedure for different READ_ID operation Currently there are 3 different variants of read_id implementation: 1. opcode only. Found in GD5FxGQ4xF. 2. opcode + 1 addr byte. Found in GD5GxGQ4xA/E 3. opcode + 1 dummy byte. Found in other currently supported chips. Original implementation was for variant 1 and let detect function of chips with variant 2 and 3 to ignore the first byte. This isn't robust: 1. For chips of variant 2, if SPI master doesn't keep MOSI low during read, chip will get a random id offset, and the entire id buffer will shift by that offset, causing detect failure. 2. For chips of variant 1, if it happens to get a devid that equals to manufacture id of variant 2 or 3 chips, it'll get incorrectly detected. This patch reworks detect procedure to address problems above. New logic do detection for all variants separatedly, in 1-2-3 order. Since all current detect methods do exactly the same id matching procedure, unify them into core.c and remove detect method from manufacture_ops. Tested on GD5F1GQ4UAYIG and W25N01GVZEIG. Signed-off-by: Chuanhong Guo Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200208074439.146296-1-gch981213@gmail.com --- include/linux/mtd/spinand.h | 66 ++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 4ea558bd3c46..f4c4ae87181b 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -32,9 +32,9 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_READID_OP(ndummy, buf, len) \ +#define SPINAND_READID_OP(naddr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1), \ - SPI_MEM_OP_NO_ADDR, \ + SPI_MEM_OP_ADDR(naddr, 0, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) @@ -176,37 +176,46 @@ struct spinand_device; * @data: buffer containing the id bytes. Currently 4 bytes large, but can * be extended if required * @len: ID length - * - * struct_spinand_id->data contains all bytes returned after a READ_ID command, - * including dummy bytes if the chip does not emit ID bytes right after the - * READ_ID command. The responsibility to extract real ID bytes is left to - * struct_manufacurer_ops->detect(). */ struct spinand_id { u8 data[SPINAND_MAX_ID_LEN]; int len; }; +enum spinand_readid_method { + SPINAND_READID_METHOD_OPCODE, + SPINAND_READID_METHOD_OPCODE_ADDR, + SPINAND_READID_METHOD_OPCODE_DUMMY, +}; + +/** + * struct spinand_devid - SPI NAND device id structure + * @id: device id of current chip + * @len: number of bytes in device id + * @method: method to read chip id + * There are 3 possible variants: + * SPINAND_READID_METHOD_OPCODE: chip id is returned immediately + * after read_id opcode. + * SPINAND_READID_METHOD_OPCODE_ADDR: chip id is returned after + * read_id opcode + 1-byte address. + * SPINAND_READID_METHOD_OPCODE_DUMMY: chip id is returned after + * read_id opcode + 1 dummy byte. + */ +struct spinand_devid { + const u8 *id; + const u8 len; + const enum spinand_readid_method method; +}; + /** * struct manufacurer_ops - SPI NAND manufacturer specific operations - * @detect: detect a SPI NAND device. Every time a SPI NAND device is probed - * the core calls the struct_manufacurer_ops->detect() hook of each - * registered manufacturer until one of them return 1. Note that - * the first thing to check in this hook is that the manufacturer ID - * in struct_spinand_device->id matches the manufacturer whose - * ->detect() hook has been called. Should return 1 if there's a - * match, 0 if the manufacturer ID does not match and a negative - * error code otherwise. When true is returned, the core assumes - * that properties of the NAND chip (spinand->base.memorg and - * spinand->base.eccreq) have been filled * @init: initialize a SPI NAND device * @cleanup: cleanup a SPI NAND device * * Each SPI NAND manufacturer driver should implement this interface so that - * NAND chips coming from this vendor can be detected and initialized properly. + * NAND chips coming from this vendor can be initialized properly. */ struct spinand_manufacturer_ops { - int (*detect)(struct spinand_device *spinand); int (*init)(struct spinand_device *spinand); void (*cleanup)(struct spinand_device *spinand); }; @@ -215,11 +224,16 @@ struct spinand_manufacturer_ops { * struct spinand_manufacturer - SPI NAND manufacturer instance * @id: manufacturer ID * @name: manufacturer name + * @devid_len: number of bytes in device ID + * @chips: supported SPI NANDs under current manufacturer + * @nchips: number of SPI NANDs available in chips array * @ops: manufacturer operations */ struct spinand_manufacturer { u8 id; char *name; + const struct spinand_info *chips; + const size_t nchips; const struct spinand_manufacturer_ops *ops; }; @@ -291,7 +305,7 @@ struct spinand_ecc_info { */ struct spinand_info { const char *model; - u16 devid; + struct spinand_devid devid; u32 flags; struct nand_memory_organization memorg; struct nand_ecc_req eccreq; @@ -305,6 +319,13 @@ struct spinand_info { unsigned int target); }; +#define SPINAND_ID(__method, ...) \ + { \ + .id = (const u8[]){ __VA_ARGS__ }, \ + .len = sizeof((u8[]){ __VA_ARGS__ }), \ + .method = __method, \ + } + #define SPINAND_INFO_OP_VARIANTS(__read, __write, __update) \ { \ .read_cache = __read, \ @@ -451,9 +472,10 @@ static inline void spinand_set_of_node(struct spinand_device *spinand, nanddev_set_of_node(&spinand->base, np); } -int spinand_match_and_init(struct spinand_device *dev, +int spinand_match_and_init(struct spinand_device *spinand, const struct spinand_info *table, - unsigned int table_size, u16 devid); + unsigned int table_size, + enum spinand_readid_method rdid_method); int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); -- cgit v1.2.3 From dceeb0f0e61071b1d990459dbd6a53f590cdaf77 Mon Sep 17 00:00:00 2001 From: Tejas Patel Date: Thu, 9 Jan 2020 11:06:03 -0800 Subject: include: linux: firmware: Correct config dependency of zynqmp_eemi_ops zynqmp_eemi_ops will be compiled only when CONFIG_ZYNQMP_FIRMWARE is enabled. So check for CONFIG_ZYNQMP_FIRMWARE instead of checking for CONFIG_ARCH_ZYNQMP. Signed-off-by: Tejas Patel Signed-off-by: Jolly Shah Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 2cd12ebd6826..ed1aace0cbbc 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -320,7 +320,7 @@ struct zynqmp_eemi_ops { int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 *ret_payload); -#if IS_REACHABLE(CONFIG_ARCH_ZYNQMP) +#if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void); #else static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void) -- cgit v1.2.3 From 7b70973d7edb2f005511102d5a2e0116464a46a1 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:32 +0000 Subject: bpf: sockmap: Only check ULP for TCP sockets The sock map code checks that a socket does not have an active upper layer protocol before inserting it into the map. This requires casting via inet_csk, which isn't valid for UDP sockets. Guard checks for ULP by checking inet_sk(sk)->is_icsk first. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-2-lmb@cloudflare.com --- include/linux/skmsg.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 112765bd146d..4d3d75d63066 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -360,7 +360,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { sk->sk_prot->unhash = psock->saved_unhash; - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + if (inet_csk_has_ulp(sk)) { + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); + } else { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, psock->sk_proto); + } } static inline void sk_psock_set_state(struct sk_psock *psock, -- cgit v1.2.3 From 1a2e20132db7bb76dd4f97b8364bd167227dd15f Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:33 +0000 Subject: skmsg: Update saved hooks only once Only update psock->saved_* if psock->sk_proto has not been initialized yet. This allows us to get rid of tcp_bpf_reinit_sk_prot. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-3-lmb@cloudflare.com --- include/linux/skmsg.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4d3d75d63066..2be51b7a5800 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -347,11 +347,23 @@ static inline void sk_psock_update_proto(struct sock *sk, struct sk_psock *psock, struct proto *ops) { - psock->saved_unhash = sk->sk_prot->unhash; - psock->saved_close = sk->sk_prot->close; - psock->saved_write_space = sk->sk_write_space; + /* Initialize saved callbacks and original proto only once, since this + * function may be called multiple times for a psock, e.g. when + * psock->progs.msg_parser is updated. + * + * Since we've not installed the new proto, psock is not yet in use and + * we can initialize it without synchronization. + */ + if (!psock->sk_proto) { + struct proto *orig = READ_ONCE(sk->sk_prot); + + psock->saved_unhash = orig->unhash; + psock->saved_close = orig->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = orig; + } - psock->sk_proto = sk->sk_prot; /* Pairs with lockless read in sk_clone_lock() */ WRITE_ONCE(sk->sk_prot, ops); } -- cgit v1.2.3 From f747632b608f90217a4e9ebb1deba8a37612aa32 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 9 Mar 2020 11:12:36 +0000 Subject: bpf: sockmap: Move generic sockmap hooks from BPF TCP The init, close and unhash handlers from TCP sockmap are generic, and can be reused by UDP sockmap. Move the helpers into the sockmap code base and expose them. This requires tcp_bpf_get_proto and tcp_bpf_clone to be conditional on BPF_STREAM_PARSER. The moved functions are unmodified, except that sk_psock_unlink is renamed to sock_map_unlink to better match its behaviour. Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309111243.6982-6-lmb@cloudflare.com --- include/linux/bpf.h | 4 +++- include/linux/skmsg.h | 28 ---------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 40c53924571d..94a329b9da81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1419,6 +1419,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_BPF_STREAM_PARSER) int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +void sock_map_unhash(struct sock *sk); +void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which) @@ -1431,7 +1433,7 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } -#endif +#endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2be51b7a5800..8a709f63c5e5 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -323,14 +323,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -#if defined(CONFIG_BPF_STREAM_PARSER) -void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); -#else -static inline void sk_psock_unlink(struct sock *sk, - struct sk_psock_link *link) -{ -} -#endif void __sk_psock_purge_ingress_msg(struct sk_psock *psock); @@ -399,26 +391,6 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } -static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) -{ - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (psock) { - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { - psock = ERR_PTR(-EBUSY); - goto out; - } - - if (!refcount_inc_not_zero(&psock->refcnt)) - psock = ERR_PTR(-EBUSY); - } -out: - rcu_read_unlock(); - return psock; -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; -- cgit v1.2.3 From fcb26bd2b6cab573f06e5855638368cf88e99c2b Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 9 Mar 2020 09:36:26 +0100 Subject: net: phy: Add Synopsys DesignWare XPCS MDIO module Synopsys DesignWare XPCS is an MMD that can manage link status, auto-negotiation, link training, ... In this commit we add basic support for XPCS using USXGMII interface and Clause 73 Auto-negotiation. This is highly tied with PHYLINK and can't be used without it. A given ethernet driver can use the provided callbacks to add the support for XPCS. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/mdio-xpcs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/mdio-xpcs.h (limited to 'include/linux') diff --git a/include/linux/mdio-xpcs.h b/include/linux/mdio-xpcs.h new file mode 100644 index 000000000000..9a841aa5982d --- /dev/null +++ b/include/linux/mdio-xpcs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates. + * Synopsys DesignWare XPCS helpers + */ + +#ifndef __LINUX_MDIO_XPCS_H +#define __LINUX_MDIO_XPCS_H + +#include +#include + +struct mdio_xpcs_args { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + struct mii_bus *bus; + int addr; +}; + +struct mdio_xpcs_ops { + int (*validate)(struct mdio_xpcs_args *xpcs, + unsigned long *supported, + struct phylink_link_state *state); + int (*config)(struct mdio_xpcs_args *xpcs, + const struct phylink_link_state *state); + int (*get_state)(struct mdio_xpcs_args *xpcs, + struct phylink_link_state *state); + int (*link_up)(struct mdio_xpcs_args *xpcs, int speed, + phy_interface_t interface); + int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface); +}; + +#if IS_ENABLED(CONFIG_MDIO_XPCS) +struct mdio_xpcs_ops *mdio_xpcs_get_ops(void); +#else +static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void) +{ + return NULL; +} +#endif + +#endif /* __LINUX_MDIO_XPCS_H */ -- cgit v1.2.3 From f213bbe8a9d6ba1d0adf424787c02f361ea78c38 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Mon, 9 Mar 2020 09:36:27 +0100 Subject: net: stmmac: Integrate it with DesignWare XPCS Adds all the necessary logic so that stmmac can be used with Synopsys DesignWare XPCS. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 19190c609282..fbafb353e9be 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -80,6 +80,7 @@ struct stmmac_mdio_bus_data { unsigned int phy_mask; + unsigned int has_xpcs; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From ed680522268da2f6f2a67505dd144e718d726712 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 28 Feb 2020 18:12:20 +0100 Subject: i2c: convert SMBus alert setup function to return an ERRPTR Only few drivers use this call, so drivers and I2C core are converted at once with this patch. By simply using i2c_new_client_device() instead of i2c_new_device(), we easily can return an ERRPTR for this function as well. To make out of tree users aware that something changed, the function is renamed to i2c_new_smbus_alert_device(). Signed-off-by: Wolfram Sang Reviewed-by: Luca Ceresoli Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 585ad6fc3847..802aac0d2010 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -31,8 +31,8 @@ struct i2c_smbus_alert_setup { int irq; }; -struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter, - struct i2c_smbus_alert_setup *setup); +struct i2c_client *i2c_new_smbus_alert_device(struct i2c_adapter *adapter, + struct i2c_smbus_alert_setup *setup); int i2c_handle_smbus_alert(struct i2c_client *ara); #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_OF) -- cgit v1.2.3 From a47070aac935b9c0e5d0f99843e0c8784f455ea7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 28 Feb 2020 18:12:21 +0100 Subject: i2c: smbus: remove outdated references to irq level triggers IRQ levels are now handled within the IRQ core. Remove the forgotten references from the documentation. Fixes: 9b9f2b8bc2ac ("i2c: i2c-smbus: Use threaded irq for smbalert") Signed-off-by: Wolfram Sang Reviewed-by: Luca Ceresoli Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 802aac0d2010..8c5459034f92 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -15,17 +15,12 @@ /** * i2c_smbus_alert_setup - platform data for the smbus_alert i2c client - * @alert_edge_triggered: whether the alert interrupt is edge (1) or level (0) - * triggered * @irq: IRQ number, if the smbus_alert driver should take care of interrupt * handling * * If irq is not specified, the smbus_alert driver doesn't take care of * interrupt handling. In that case it is up to the I2C bus driver to either * handle the interrupts or to poll for alerts. - * - * If irq is specified then it it crucial that alert_edge_triggered is - * properly set. */ struct i2c_smbus_alert_setup { int irq; -- cgit v1.2.3 From 2dd209f00fc5a1caafa493066c7cd692fd2fd57c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Mar 2020 21:26:16 -0700 Subject: blk-mq: Fix a comment in include/linux/blk-mq.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'hctx_list' member of struct blk_mq_hw_ctx is not a list head but instead an entry in q->unused_hctx_list. Fix the comment above this struct member. Fixes: d386732bc142 ("blk-mq: fill header with kernel-doc") Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Cc: André Almeida Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 11cfd6470b1a..31344d5f83e2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -162,7 +162,10 @@ struct blk_mq_hw_ctx { struct dentry *sched_debugfs_dir; #endif - /** @hctx_list: List of all hardware queues. */ + /** + * @hctx_list: if this hctx is not in use, this is an entry in + * q->unused_hctx_list. + */ struct list_head hctx_list; /** -- cgit v1.2.3 From 7a86a419ff62c69f91544ea95f4ae6ef83880dcc Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Tue, 10 Mar 2020 01:16:19 +0800 Subject: spi: update the structure documentation some members were not described in documentation. Signed-off-by: Qiujun Huang Link: https://lore.kernel.org/r/1583774179-30736-1-git-send-email-hqjagain@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 87105272879b..38286de779e3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -135,6 +135,8 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging + * @driver_override: If the name of a driver is written to this attribute, then + * the device will bind to the named driver and only the named driver. * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when * not using a GPIO line) use cs_gpiod in new drivers by opting in on * the spi_master. @@ -443,6 +445,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @spi_transfer->ptp_sts_word_post were transmitted. * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. + * @irq_flags: Interrupt enable state during PTP system timestamping * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals -- cgit v1.2.3 From 0a384abfae66651b28e4bbe16883b1ff046ba3b3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Feb 2020 08:11:20 -0700 Subject: net: abstract out normal and compat msghdr import This splits it into two parts, one that imports the message, and one that imports the iovec. This allows a caller to only do the first part, and import the iovec manually afterwards. No functional changes in this patch. Acked-by: David Miller Signed-off-by: Jens Axboe --- include/linux/socket.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 2d2313403101..fc59ac825561 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -391,6 +391,10 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); +extern int __copy_msghdr_from_user(struct msghdr *kmsg, + struct user_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec __user **uiov, size_t *nsegs); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, -- cgit v1.2.3 From 41e684ef3f37ce6e5eac3fb5b9c7c1853f4b0447 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 5 Mar 2020 14:38:41 +0200 Subject: IB/mlx5: Replace tunnel mpls capability bits for tunnel_offloads Until now the flex parser capability was used in ib_query_device() to indicate tunnel_offloads_caps support for mpls_over_gre/mpls_over_udp. Newer devices and firmware will have configurations with the flexparser but without mpls support. Testing for the flex parser capability was a mistake, the tunnel_stateless capability was intended for detecting mpls and was introduced at the same time as the flex parser capability. Otherwise userspace will be incorrectly informed that a future device supports MPLS when it does not. Link: https://lore.kernel.org/r/20200305123841.196086-1-leon@kernel.org Cc: # 4.17 Fixes: e818e255a58d ("IB/mlx5: Expose MPLS related tunneling offloads") Signed-off-by: Alex Vesker Reviewed-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f9bcbe653fda..f3a7189f9d6d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -877,7 +877,11 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp_csum[0x1]; u8 swp_lso[0x1]; u8 cqe_checksum_full[0x1]; - u8 reserved_at_24[0x5]; + u8 tunnel_stateless_geneve_tx[0x1]; + u8 tunnel_stateless_mpls_over_udp[0x1]; + u8 tunnel_stateless_mpls_over_gre[0x1]; + u8 tunnel_stateless_vxlan_gpe[0x1]; + u8 tunnel_stateless_ipv4_over_vxlan[0x1]; u8 tunnel_stateless_ip_over_ip[0x1]; u8 reserved_at_2a[0x6]; u8 max_vxlan_udp_ports[0x8]; -- cgit v1.2.3 From e56faff57f0b39661093c00e0262d4ab9088830e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 28 Feb 2020 15:02:03 -0600 Subject: PCI: Add pci_speed_string() Add pci_speed_string() to return a text description of the supplied bus or link speed. The slot code previously used the private pci_bus_speed_strings[] array for this purpose, but adding this interface will enable us to consolidate similar code elsewhere. Export pcie_link_speed[] and pci_speed_string() so they can be used by modules. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..76f4806a154c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -236,7 +236,7 @@ enum pcie_link_width { PCIE_LNK_WIDTH_UNKNOWN = 0xff, }; -/* Based on the PCI Hotplug Spec, but some values are made up by us */ +/* See matching string table in pci_speed_string() */ enum pci_bus_speed { PCI_SPEED_33MHz = 0x00, PCI_SPEED_66MHz = 0x01, -- cgit v1.2.3 From 812df69beb86b0e6decbb109ee3fa408dcb7fa5d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2020 19:15:06 -0700 Subject: net: liquidio: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e464c946bca4..9efeebde3514 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,6 +211,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) #define ETHTOOL_COALESCE_USE_ADAPTIVE \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) +#define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ + (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ + ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ + ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \ + ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL) /** * struct ethtool_ops - optional netdev operations -- cgit v1.2.3 From e937cc1dd7966df33a478943817302502a164e25 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 6 Mar 2020 16:28:37 +0200 Subject: dmaengine: Add basic debugfs support Via the /sys/kernel/debug/dmaengine/summary users can get information about the DMA devices and the used channels. Example output on am654-evm with audio using two channels and after running dmatest on 4 channels: dma0 (285c0000.dma-controller): number of channels: 96 dma1 (31150000.dma-controller): number of channels: 267 dma1chan0 | 2b00000.mcasp:tx dma1chan1 | 2b00000.mcasp:rx dma1chan2 | in-use dma1chan3 | in-use dma1chan4 | in-use dma1chan5 | in-use For slave channels we can show the device and the channel name a given channel is requested. For non slave devices the only information we know is that the channel is in use. DMA drivers can implement the optional dbg_summary_show callback to provide controller specific information instead of the generic one. It is easy to extend the generic dmaengine_summary_show() to print additional information about the used channels. I have taken the idea from gpiolib and clk subsystems. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200306142839.17910-2-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d3672f065a64..72920b5cf2d7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -300,6 +300,8 @@ struct dma_router { * @chan_id: channel ID for sysfs * @dev: class device for sysfs * @name: backlink name for sysfs + * @dbg_client_name: slave name for debugfs in format: + * dev_name(requester's dev):channel name, for example: "2b00000.mcasp:tx" * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu * @client_count: how many clients are using this channel @@ -318,6 +320,9 @@ struct dma_chan { int chan_id; struct dma_chan_dev *dev; const char *name; +#ifdef CONFIG_DEBUG_FS + char *dbg_client_name; +#endif struct list_head device_node; struct dma_chan_percpu __percpu *local; @@ -806,7 +811,9 @@ struct dma_filter { * called and there are no further references to this structure. This * must be implemented to free resources however many existing drivers * do not and are therefore not safe to unbind while in use. - * + * @dbg_summary_show: optional routine to show contents in debugfs; default code + * will be used when this is omitted, but custom code can show extra, + * controller specific information. */ struct dma_device { struct kref ref; @@ -892,6 +899,10 @@ struct dma_device { struct dma_tx_state *txstate); void (*device_issue_pending)(struct dma_chan *chan); void (*device_release)(struct dma_device *dev); + /* debugfs support */ +#ifdef CONFIG_DEBUG_FS + void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); +#endif }; static inline int dmaengine_slave_config(struct dma_chan *chan, -- cgit v1.2.3 From 26cf132de6f79c06025706ddc61e045d591d404d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 6 Mar 2020 16:28:39 +0200 Subject: dmaengine: Create debug directories for DMA devices Create a placeholder directory for each registered DMA device. DMA drivers can use the dmaengine_get_debugfs_root() call to get their debugfs root and can populate with custom files to aim debugging. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200306142839.17910-4-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 72920b5cf2d7..21065c04c4ac 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -902,6 +902,7 @@ struct dma_device { /* debugfs support */ #ifdef CONFIG_DEBUG_FS void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); + struct dentry *dbg_dev_root; #endif }; -- cgit v1.2.3 From babf3164095b0670435910340c2a1eec37757b57 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Mar 2020 16:10:51 -0700 Subject: bpf: Add bpf_link_new_file that doesn't install FD Add bpf_link_new_file() API for cases when we need to ensure anon_inode is successfully created before we proceed with expensive BPF program attachment procedure, which will require equally (if not more so) expensive and potentially failing compensation detachment procedure just because anon_inode creation failed. This API allows to simplify code by ensuring first that anon_inode is created and after BPF program is attached proceed with fd_install() that can't fail. After anon_inode file is created, link can't be just kfree()'d anymore, because its destruction will be performed by deferred file_operations->release call. For this, bpf_link API required specifying two separate operations: release() and dealloc(), former performing detachment only, while the latter frees memory used by bpf_link itself. dealloc() needs to be specified, because struct bpf_link is frequently embedded into link type-specific container struct (e.g., struct bpf_raw_tp_link), so bpf_link itself doesn't know how to properly free the memory. In case when anon_inode file was successfully created, but subsequent BPF attachment failed, bpf_link needs to be marked as "defunct", so that file's release() callback will perform only memory deallocation, but no detachment. Convert raw tracepoint and tracing attachment to new API and eliminate detachment from error handling path. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200309231051.1270337-1-andriin@fb.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 94a329b9da81..4fd91b7c95ea 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1070,13 +1070,16 @@ struct bpf_link; struct bpf_link_ops { void (*release)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_defunct(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); +struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -- cgit v1.2.3 From 46b5889cc2c54bac7d7e727a44d28a298df23cef Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 14 Jan 2020 10:09:52 +0100 Subject: mtd: implement proper partition handling Instead of collecting partitions in a flat list, create a hierarchy within the mtd_info structure: use a partitions list to keep track of the partitions of an MTD device (which might be itself a partition of another MTD device), a pointer to the parent device (NULL when the MTD device is the root one, not a partition). By also saving directly in mtd_info the offset of the partition, we can get rid of the mtd_part structure. While at it, be consistent in the naming of the mtd_info structures to ease the understanding of the new hierarchy: these structures are usually called 'mtd', unless there are multiple instances of the same structure. In this case, there is usually a parent/child bound so we will call them 'parent' and 'child'. Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200114090952.11232-1-miquel.raynal@bootlin.com --- include/linux/mtd/mtd.h | 125 +++++++++++++++++++++++++++++++++++++---- include/linux/mtd/partitions.h | 1 - 2 files changed, 115 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 249e8d9bfbcd..2d1f4a61f4ac 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -194,10 +195,43 @@ struct mtd_debug_info { const char *partid; }; +/** + * struct mtd_part - MTD partition specific fields + * + * @node: list node used to add an MTD partition to the parent partition list + * @offset: offset of the partition relatively to the parent offset + * @flags: original flags (before the mtdpart logic decided to tweak them based + * on flash constraints, like eraseblock/pagesize alignment) + * + * This struct is embedded in mtd_info and contains partition-specific + * properties/fields. + */ +struct mtd_part { + struct list_head node; + u64 offset; + u32 flags; +}; + +/** + * struct mtd_master - MTD master specific fields + * + * @partitions_lock: lock protecting accesses to the partition list. Protects + * not only the master partition list, but also all + * sub-partitions. + * @suspended: et to 1 when the device is suspended, 0 otherwise + * + * This struct is embedded in mtd_info and contains master-specific + * properties/fields. The master is the root MTD device from the MTD partition + * point of view. + */ +struct mtd_master { + struct mutex partitions_lock; + unsigned int suspended : 1; +}; + struct mtd_info { u_char type; uint32_t flags; - uint32_t orig_flags; /* Flags as before running mtd checks */ uint64_t size; // Total size of the MTD /* "Major" erase size for the device. Naïve users may take this @@ -339,8 +373,52 @@ struct mtd_info { int usecount; struct mtd_debug_info dbg; struct nvmem_device *nvmem; + + /* + * Parent device from the MTD partition point of view. + * + * MTD masters do not have any parent, MTD partitions do. The parent + * MTD device can itself be a partition. + */ + struct mtd_info *parent; + + /* List of partitions attached to this MTD device */ + struct list_head partitions; + + union { + struct mtd_part part; + struct mtd_master master; + }; }; +static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd) +{ + while (mtd->parent) + mtd = mtd->parent; + + return mtd; +} + +static inline u64 mtd_get_master_ofs(struct mtd_info *mtd, u64 ofs) +{ + while (mtd->parent) { + ofs += mtd->part.offset; + mtd = mtd->parent; + } + + return ofs; +} + +static inline bool mtd_is_partition(const struct mtd_info *mtd) +{ + return mtd->parent; +} + +static inline bool mtd_has_partitions(const struct mtd_info *mtd) +{ + return !list_empty(&mtd->partitions); +} + int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobecc); int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, @@ -392,13 +470,16 @@ static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) static inline int mtd_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len) { - if (!mtd->_max_bad_blocks) + struct mtd_info *master = mtd_get_master(mtd); + + if (!master->_max_bad_blocks) return -ENOTSUPP; if (mtd->size < (len + ofs) || ofs < 0) return -EINVAL; - return mtd->_max_bad_blocks(mtd, ofs, len); + return master->_max_bad_blocks(master, mtd_get_master_ofs(mtd, ofs), + len); } int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, @@ -439,8 +520,10 @@ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, static inline void mtd_sync(struct mtd_info *mtd) { - if (mtd->_sync) - mtd->_sync(mtd); + struct mtd_info *master = mtd_get_master(mtd); + + if (master->_sync) + master->_sync(master); } int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); @@ -452,13 +535,31 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs); static inline int mtd_suspend(struct mtd_info *mtd) { - return mtd->_suspend ? mtd->_suspend(mtd) : 0; + struct mtd_info *master = mtd_get_master(mtd); + int ret; + + if (master->master.suspended) + return 0; + + ret = master->_suspend ? master->_suspend(master) : 0; + if (ret) + return ret; + + master->master.suspended = 1; + return 0; } static inline void mtd_resume(struct mtd_info *mtd) { - if (mtd->_resume) - mtd->_resume(mtd); + struct mtd_info *master = mtd_get_master(mtd); + + if (!master->master.suspended) + return; + + if (master->_resume) + master->_resume(master); + + master->master.suspended = 0; } static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) @@ -538,7 +639,9 @@ static inline loff_t mtd_wunit_to_offset(struct mtd_info *mtd, loff_t base, static inline int mtd_has_oob(const struct mtd_info *mtd) { - return mtd->_read_oob && mtd->_write_oob; + struct mtd_info *master = mtd_get_master((struct mtd_info *)mtd); + + return master->_read_oob && master->_write_oob; } static inline int mtd_type_is_nand(const struct mtd_info *mtd) @@ -548,7 +651,9 @@ static inline int mtd_type_is_nand(const struct mtd_info *mtd) static inline int mtd_can_have_bb(const struct mtd_info *mtd) { - return !!mtd->_block_isbad; + struct mtd_info *master = mtd_get_master((struct mtd_info *)mtd); + + return !!master->_block_isbad; } /* Kernel-side ioctl definitions */ diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 11cb0c50cd84..e545c050d3e8 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -105,7 +105,6 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); module_driver(__mtd_part_parser, register_mtd_parser, \ deregister_mtd_parser) -int mtd_is_partition(const struct mtd_info *mtd); int mtd_add_partition(struct mtd_info *master, const char *name, long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); -- cgit v1.2.3 From c6fbcb70132ffc66696a94dd3d8e6215c750254f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 23 Feb 2020 19:06:33 +0100 Subject: mtd: rawnand: Fix a typo ("manufecturer") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200223180634.8736-1-j.neuschaefer@gmx.net --- include/linux/mtd/rawnand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 4ab9bccfcde0..3c7c15aadcee 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1215,7 +1215,7 @@ static inline struct device_node *nand_get_flash_node(struct nand_chip *chip) * struct nand_flash_dev - NAND Flash Device ID Structure * @name: a human-readable name of the NAND chip * @dev_id: the device ID (the second byte of the full chip ID array) - * @mfr_id: manufecturer ID part of the full chip ID array (refers the same + * @mfr_id: manufacturer ID part of the full chip ID array (refers the same * memory address as ``id[0]``) * @dev_id: device ID part of the full chip ID array (refers the same memory * address as ``id[1]``) -- cgit v1.2.3 From 92270086b7e5ada7ab381c06cc3da2e95ed17088 Mon Sep 17 00:00:00 2001 From: Mason Yang Date: Tue, 3 Mar 2020 15:21:21 +0800 Subject: mtd: rawnand: Add support for manufacturer specific lock/unlock operation Add nand_lock() & nand_unlock() for manufacturer specific lock & unlock operation while the device supports Block Portection function. Signed-off-by: Mason Yang Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/1583220084-10890-2-git-send-email-masonccyang@mxic.com.tw --- include/linux/mtd/rawnand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 3c7c15aadcee..49ed50fb44ab 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1077,6 +1077,8 @@ struct nand_legacy { * @manufacturer: [INTERN] Contains manufacturer information * @manufacturer.desc: [INTERN] Contains manufacturer's description * @manufacturer.priv: [INTERN] Contains manufacturer private information + * @lock_area: [REPLACEABLE] specific NAND chip lock operation + * @unlock_area: [REPLACEABLE] specific NAND chip unlock operation */ struct nand_chip { @@ -1136,6 +1138,9 @@ struct nand_chip { const struct nand_manufacturer *desc; void *priv; } manufacturer; + + int (*lock_area)(struct nand_chip *chip, loff_t ofs, uint64_t len); + int (*unlock_area)(struct nand_chip *chip, loff_t ofs, uint64_t len); }; extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; -- cgit v1.2.3 From 7d3d3254adaa61cba896f71497f56901deb618e5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 11 Mar 2020 12:51:17 +0100 Subject: docs: fix pointers to io-mapping.rst and io_ordering.rst files Those files got moved, but cross-references still point to the wrong places. Fixes: fcd680727157 ("Documentation: Add io-mapping.rst to driver-api manual") Fixes: d1ce350015d8 ("Documentation: Add io_ordering.rst to driver-api manual") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Alex Shi Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/c0205119db4fef536272cb0a183b6c14c2c8bf4c.1583927470.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/io-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 837058bc1c9f..b336622612f3 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -16,7 +16,7 @@ * The io_mapping mechanism provides an abstraction for mapping * individual pages from an io device to the CPU in an efficient fashion. * - * See Documentation/io-mapping.txt + * See Documentation/driver-api/io-mapping.rst */ struct io_mapping { -- cgit v1.2.3 From 0fa81b304a7973a499f844176ca031109487dd31 Mon Sep 17 00:00:00 2001 From: Alexander Bersenev Date: Fri, 6 Mar 2020 01:33:16 +0500 Subject: cdc_ncm: Implement the 32-bit version of NCM Transfer Block The NCM specification defines two formats of transfer blocks: with 16-bit fields (NTB-16) and with 32-bit fields (NTB-32). Currently only NTB-16 is implemented. This patch adds the support of NTB-32. The motivation behind this is that some devices such as E5785 or E5885 from the current generation of Huawei LTE routers do not support NTB-16. The previous generations of Huawei devices are also use NTB-32 by default. Also this patch enables NTB-32 by default for Huawei devices. During the 2019 ValdikSS made five attempts to contact Huawei to add the NTB-16 support to their router firmware, but they were unsuccessful. Signed-off-by: Alexander Bersenev Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 1646c06989df..0ce4377545f8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -46,9 +46,12 @@ #define CDC_NCM_DATA_ALTSETTING_NCM 1 #define CDC_NCM_DATA_ALTSETTING_MBIM 2 -/* CDC NCM subclass 3.2.1 */ +/* CDC NCM subclass 3.3.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 +/* CDC NCM subclass 3.3.2 */ +#define USB_CDC_NCM_NDP32_LENGTH_MIN 0x20 + /* Maximum NTB length */ #define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ #define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ @@ -84,7 +87,7 @@ /* Driver flags */ #define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ -#define CDC_NCM_FLAG_RESET_NTB16 0x08 /* set NDP16 one more time after altsetting switch */ +#define CDC_NCM_FLAG_PREFER_NTB32 0x08 /* prefer NDP32 over NDP16 */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) @@ -113,7 +116,11 @@ struct cdc_ncm_ctx { u32 timer_interval; u32 max_ndp_size; - struct usb_cdc_ncm_ndp16 *delayed_ndp16; + u8 is_ndp16; + union { + struct usb_cdc_ncm_ndp16 *delayed_ndp16; + struct usb_cdc_ncm_ndp32 *delayed_ndp32; + }; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -150,6 +157,8 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); +int cdc_ncm_rx_verify_nth32(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); +int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset); struct sk_buff * cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in); -- cgit v1.2.3 From 9a8da6082ddb4689baf34dcdf2a53985e25753f3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 11 Mar 2020 09:31:31 +0100 Subject: tty: serial: ifx6x60: Convert to GPIO descriptors This driver for the Intel MID never seems to have been properly integrated upstream: the platform data in is not used anywhere in the kernel and haven't been since it was merged into the kernel in 2010. There might be out-of-tree users, so I don't want to delete the driver, but I will refactor it to use GPIO descriptors, which means that out-of-tree users will need to adapt. There are several examples in the kernel of how to provide the resources necessary for using GPIO descriptors to pass in the GPIO lines, for the MID platform in particular, it will suffice to inspect the code in files like: arch/x86/platform/intel-mid/device_libs/platform_bt.c This refactoring transfers all GPIOs in the driver, including a hard-coded "PMU reset" in the driver to use GPIO descriptors instead. The following named GPIO descriptors need to be supplied: - reset - power - mrdy - srdy - rst_out - pmu_reset Cc: Russ Gorby Signed-off-by: Linus Walleij Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200311083131.693908-2-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/spi/ifx_modem.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/ifx_modem.h b/include/linux/spi/ifx_modem.h index 694268c78d5d..6d19b09139d0 100644 --- a/include/linux/spi/ifx_modem.h +++ b/include/linux/spi/ifx_modem.h @@ -3,12 +3,7 @@ #define LINUX_IFX_MODEM_H struct ifx_modem_platform_data { - unsigned short rst_out; /* modem reset out */ - unsigned short pwr_on; /* power on */ - unsigned short rst_pmu; /* reset modem */ unsigned short tx_pwr; /* modem power threshold */ - unsigned short srdy; /* SRDY */ - unsigned short mrdy; /* MRDY */ unsigned char modem_type; /* Modem type */ unsigned long max_hz; /* max SPI frequency */ unsigned short use_dma:1; /* spi protocol driver supplies -- cgit v1.2.3 From 34471abfc8fedcebf7c45a5e37ef383b8bb16de3 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sat, 22 Feb 2020 08:08:49 +0800 Subject: thermal: of-thermal: add API for getting sensor ID from DT This patch adds new API thermal_zone_of_get_sensor_id() to provide the feature of getting sensor ID from DT thermal zone's node. It's useful for thermal driver to register the specific thermal zone devices from DT in a common way. Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1582330132-13461-2-git-send-email-Anson.Huang@nxp.com --- include/linux/thermal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 126913c6a53b..53e6f677761f 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -364,6 +364,9 @@ struct thermal_trip { /* Function declarations */ #ifdef CONFIG_THERMAL_OF +int thermal_zone_of_get_sensor_id(struct device_node *tz_np, + struct device_node *sensor_np, + u32 *id); struct thermal_zone_device * thermal_zone_of_sensor_register(struct device *dev, int id, void *data, const struct thermal_zone_of_device_ops *ops); @@ -375,6 +378,13 @@ struct thermal_zone_device *devm_thermal_zone_of_sensor_register( void devm_thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *tz); #else + +static int thermal_zone_of_get_sensor_id(struct device_node *tz_np, + struct device_node *sensor_np, + u32 *id) +{ + return -ENOENT; +} static inline struct thermal_zone_device * thermal_zone_of_sensor_register(struct device *dev, int id, void *data, const struct thermal_zone_of_device_ops *ops) -- cgit v1.2.3 From 15a26319c41962b7cf87603bc51d667f43d2ca67 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 3 Mar 2020 16:04:43 +0800 Subject: thermal: Fix build warning of !defined(CONFIG_THERMAL_OF) Add "inline" to thermal_zone_of_get_sensor_id() function to avoid below build warning of !defined(CONFIG_THERMAL_OF). In file included from drivers/hwmon/hwmon.c:22: include/linux/thermal.h:382:12: warning: 'thermal_zone_of_get_sensor_id' defined but not used [-Wunused-function] 382 | static int thermal_zone_of_get_sensor_id(struct device_node *tz_np, Signed-off-by: Anson Huang Reported-by: Stephen Rothwell Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1583222684-10229-1-git-send-email-Anson.Huang@nxp.com --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 53e6f677761f..c91b1e344d56 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -379,7 +379,7 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, struct thermal_zone_device *tz); #else -static int thermal_zone_of_get_sensor_id(struct device_node *tz_np, +static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, struct device_node *sensor_np, u32 *id) { -- cgit v1.2.3 From 9e2b4be377f0d715d9d910507890f9620cc22a9d Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Sun, 8 Mar 2020 20:57:51 -0400 Subject: ima: add a new CONFIG for loading arch-specific policies Every time a new architecture defines the IMA architecture specific functions - arch_ima_get_secureboot() and arch_ima_get_policy(), the IMA include file needs to be updated. To avoid this "noise", this patch defines a new IMA Kconfig IMA_SECURE_AND_OR_TRUSTED_BOOT option, allowing the different architectures to select it. Suggested-by: Linus Torvalds Signed-off-by: Nayna Jain Acked-by: Ard Biesheuvel Acked-by: Philipp Rudo (s390) Acked-by: Michael Ellerman (powerpc) Signed-off-by: Mimi Zohar --- include/linux/ima.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 1659217e9b60..aefe758f4466 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -30,8 +30,7 @@ extern void ima_kexec_cmdline(const void *buf, int size); extern void ima_add_kexec_buffer(struct kimage *image); #endif -#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \ - || defined(CONFIG_PPC_SECURE_BOOT) +#ifdef CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); #else -- cgit v1.2.3 From 0bc68af9137dc3f30b161de4ce546c7799f88d1e Mon Sep 17 00:00:00 2001 From: Shivamurthy Shastri Date: Wed, 11 Mar 2020 18:57:33 +0100 Subject: mtd: spinand: micron: identify SPI NAND device with Continuous Read mode Add SPINAND_HAS_CR_FEAT_BIT flag to identify the SPI NAND device with the Continuous Read mode. Some of the Micron SPI NAND devices have the "Continuous Read" feature enabled by default, which does not fit the subsystem needs. In this mode, the READ CACHE command doesn't require the starting column address. The device always output the data starting from the first column of the cache register, and once the end of the cache register reached, the data output continues through the next page. With the continuous read mode, it is possible to read out the entire block using a single READ command, and once the end of the block reached, the output pins become High-Z state. However, during this mode the read command doesn't output the OOB area. Hence, we disable the feature at probe time. Signed-off-by: Shivamurthy Shastri Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200311175735.2007-5-sshivamurthy@micron.com --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index f4c4ae87181b..1077c45721ff 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -284,6 +284,7 @@ struct spinand_ecc_info { }; #define SPINAND_HAS_QE_BIT BIT(0) +#define SPINAND_HAS_CR_FEAT_BIT BIT(1) /** * struct spinand_info - Structure used to describe SPI NAND chips -- cgit v1.2.3 From 30a2da7b7e225ef6c87a660419ea04d3cef3f6a7 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 11 Mar 2020 16:07:50 +0530 Subject: block: Fix use-after-free issue accessing struct io_cq There is a potential race between ioc_release_fn() and ioc_clear_queue() as shown below, due to which below kernel crash is observed. It also can result into use-after-free issue. context#1: context#2: ioc_release_fn() __ioc_clear_queue() gets the same icq ->spin_lock(&ioc->lock); ->spin_lock(&ioc->lock); ->ioc_destroy_icq(icq); ->list_del_init(&icq->q_node); ->call_rcu(&icq->__rcu_head, icq_free_icq_rcu); ->spin_unlock(&ioc->lock); ->ioc_destroy_icq(icq); ->hlist_del_init(&icq->ioc_node); This results into below crash as this memory is now used by icq->__rcu_head in context#1. There is a chance that icq could be free'd as well. 22150.386550: <6> Unable to handle kernel write to read-only memory at virtual address ffffffaa8d31ca50 ... Call trace: 22150.607350: <2> ioc_destroy_icq+0x44/0x110 22150.611202: <2> ioc_clear_queue+0xac/0x148 22150.615056: <2> blk_cleanup_queue+0x11c/0x1a0 22150.619174: <2> __scsi_remove_device+0xdc/0x128 22150.623465: <2> scsi_forget_host+0x2c/0x78 22150.627315: <2> scsi_remove_host+0x7c/0x2a0 22150.631257: <2> usb_stor_disconnect+0x74/0xc8 22150.635371: <2> usb_unbind_interface+0xc8/0x278 22150.639665: <2> device_release_driver_internal+0x198/0x250 22150.644897: <2> device_release_driver+0x24/0x30 22150.649176: <2> bus_remove_device+0xec/0x140 22150.653204: <2> device_del+0x270/0x460 22150.656712: <2> usb_disable_device+0x120/0x390 22150.660918: <2> usb_disconnect+0xf4/0x2e0 22150.664684: <2> hub_event+0xd70/0x17e8 22150.668197: <2> process_one_work+0x210/0x480 22150.672222: <2> worker_thread+0x32c/0x4c8 Fix this by adding a new ICQ_DESTROYED flag in ioc_destroy_icq() to indicate this icq is once marked as destroyed. Also, ensure __ioc_clear_queue() is accessing icq within rcu_read_lock/unlock so that icq doesn't get free'd up while it is still using it. Signed-off-by: Sahitya Tummala Co-developed-by: Pradeep P V K Signed-off-by: Pradeep P V K Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index dba15ca8e60b..1dcd9198beb7 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_EXITED = 1 << 2, + ICQ_DESTROYED = 1 << 3, }; /* -- cgit v1.2.3 From 9acb9fe18d863aacc99948963f8d5d447dc311be Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Mar 2020 20:50:07 +0800 Subject: PCI: Add Loongson vendor ID Add the Loongson vendor ID to pci_ids.h to be used by the controller driver in the future. The Loongson vendor ID can be found at the following link: https://git.kernel.org/pub/scm/utils/pciutils/pciutils.git/tree/pci.ids Signed-off-by: Tiezhu Yang Signed-off-by: Jens Axboe --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 352c0d708720..977e66875a96 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -148,6 +148,8 @@ /* Vendors and devices. Sort key: vendor first, device next. */ +#define PCI_VENDOR_ID_LOONGSON 0x0014 + #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a -- cgit v1.2.3 From 9243c6f3e012a92dd900d97ef45efaf8a8edc448 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Sat, 7 Mar 2020 15:56:59 +0100 Subject: block: Document genhd capability flags The kernel documentation includes a brief section about genhd capabilities, but it turns out that the only documented capability (GENHD_FL_MEDIA_CHANGE_NOTIFY) isn't used any more. This patch removes that flag, and documents the rest, based on my understanding of the current uses of these flags in the kernel. The documentation is kept in the header file, alongside the declarations, in the hope that it will be kept up-to-date in future; the kernel documentation is changed to include the documentation generated from the header file. Because the ultimate goal is to provide some end-user documentation (or end-administrator documentation), the comments are perhaps more user-oriented than might be expected. Since the values are shown to users in hexadecimal, the documentation lists them in hexadecimal, and the constant declarations are adjusted to match. Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Stephen Kitt Signed-off-by: Jens Axboe --- include/linux/genhd.h | 69 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fbe58538ad6..e7244fb6b6ad 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -133,17 +133,64 @@ struct hd_struct { struct rcu_work rcu_work; }; -#define GENHD_FL_REMOVABLE 1 -/* 2 is unused */ -#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 -#define GENHD_FL_CD 8 -#define GENHD_FL_UP 16 -#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ -#define GENHD_FL_NATIVE_CAPACITY 128 -#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 -#define GENHD_FL_NO_PART_SCAN 512 -#define GENHD_FL_HIDDEN 1024 +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device + * gives access to removable media. + * When set, the device remains present even when media is not + * inserted. + * Must not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style + * device. + * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. + * + * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up", + * with a similar meaning to network interfaces. + * + * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include + * partition information in ``/proc/partitions`` or in the output of + * printk_all_partitions(). + * Used for the null block device and some MMC devices. + * + * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended + * dynamic ``dev_t``, i.e. it wants extended device numbers + * (``BLOCK_EXT_MAJOR``). + * This affects the maximum number of partitions. + * + * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the + * partition table, the device's capacity has been extended to its + * native capacity; i.e. the device has hidden capacity used by one + * of the partitions (this is a flag used so that native capacity is + * only ever unlocked once). + * + * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is + * blocked whenever a writer holds an exclusive lock. + * + * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. + * Used for loop devices in their default settings and some MMC + * devices. + * + * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it + * doesn't produce events, doesn't appear in sysfs, and doesn't have + * an associated ``bdev``. + * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and + * ``GENHD_FL_NO_PART_SCAN``. + * Used for multipath devices. + */ +#define GENHD_FL_REMOVABLE 0x0001 +/* 2 is unused (used to be GENHD_FL_DRIVERFS) */ +/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ +#define GENHD_FL_CD 0x0008 +#define GENHD_FL_UP 0x0010 +#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 +#define GENHD_FL_EXT_DEVT 0x0040 +#define GENHD_FL_NATIVE_CAPACITY 0x0080 +#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 +#define GENHD_FL_NO_PART_SCAN 0x0200 +#define GENHD_FL_HIDDEN 0x0400 enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ -- cgit v1.2.3 From 58def851063d2178c10622e436897e6285850077 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 3 Mar 2020 10:37:34 +0100 Subject: libata: drop BPRINTK() No users, drop it. Signed-off-by: Hannes Reinecke Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2ca9b7056a82..710e09dae910 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -57,8 +57,6 @@ #define VPRINTK(fmt, args...) #endif /* ATA_DEBUG */ -#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args) - #define ata_print_version_once(dev, version) \ ({ \ static bool __print_once; \ -- cgit v1.2.3 From da9a5aa3402db0ff3b57216d8dbf2478e1046cae Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Mar 2020 19:43:37 +0200 Subject: serial: core: Refactor uart_unlock_and_check_sysrq() Refactor uart_unlock_and_check_sysrq() to: - explicitly show that we release a port lock which makes static analyzers happy: CHECK drivers/tty/serial/serial_core.c .../serial_core.c:3290:17: warning: context imbalance in 'uart_unlock_and_check_sysrq' - unexpected unlock - use flags instead of irqflags to avoid confusion with IRQ flags - provide one return point - be more compact Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20200310174337.74109-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1f4443db5474..92f5eba86052 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -462,8 +462,7 @@ extern void uart_insert_char(struct uart_port *port, unsigned int status, extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, - unsigned long irqflags); +extern void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags); extern int uart_handle_break(struct uart_port *port); /* -- cgit v1.2.3 From 8213f6c9a275da084dc9363f36f93138547f46f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Mar 2020 15:32:48 -0700 Subject: net: be2net: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9efeebde3514..acfce915a02b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,6 +211,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ) #define ETHTOOL_COALESCE_USE_ADAPTIVE \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_ADAPTIVE_TX) +#define ETHTOOL_COALESCE_USECS_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_TX_USECS_LOW | \ + ETHTOOL_COALESCE_RX_USECS_HIGH | ETHTOOL_COALESCE_TX_USECS_HIGH) #define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ -- cgit v1.2.3 From 4f9546d24a12e9041eae20574ab244f5410e02c4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Mar 2020 15:32:52 -0700 Subject: net: hns: reject unsupported coalescing params Set ethtool_ops->supported_coalesce_params to let the core reject unsupported coalescing parameters. This driver did not previously reject unsupported parameters. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index acfce915a02b..be355f37337d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -214,6 +214,11 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_USECS_LOW_HIGH \ (ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_TX_USECS_LOW | \ ETHTOOL_COALESCE_RX_USECS_HIGH | ETHTOOL_COALESCE_TX_USECS_HIGH) +#define ETHTOOL_COALESCE_MAX_FRAMES_LOW_HIGH \ + (ETHTOOL_COALESCE_RX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_LOW | \ + ETHTOOL_COALESCE_RX_MAX_FRAMES_HIGH | \ + ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH) #define ETHTOOL_COALESCE_PKT_RATE_RX_USECS \ (ETHTOOL_COALESCE_USE_ADAPTIVE_RX | \ ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \ -- cgit v1.2.3 From 7e934cf5ace1dceeb804f7493fa28bb697ed3c52 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Mar 2020 17:29:11 -0400 Subject: xarray: Fix early termination of xas_for_each_marked xas_for_each_marked() is using entry == NULL as a termination condition of the iteration. When xas_for_each_marked() is used protected only by RCU, this can however race with xas_store(xas, NULL) in the following way: TASK1 TASK2 page_cache_delete() find_get_pages_range_tag() xas_for_each_marked() xas_find_marked() off = xas_find_chunk() xas_store(&xas, NULL) xas_init_marks(&xas); ... rcu_assign_pointer(*slot, NULL); entry = xa_entry(off); And thus xas_for_each_marked() terminates prematurely possibly leading to missed entries in the iteration (translating to missing writeback of some pages or a similar problem). If we find a NULL entry that has been marked, skip it (unless we're trying to allocate an entry). Reported-by: Jan Kara CC: stable@vger.kernel.org Fixes: ef8e5717db01 ("page cache: Convert delete_batch to XArray") Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index a491653d8882..14c893433139 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1648,6 +1648,7 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) { struct xa_node *node = xas->xa_node; + void *entry; unsigned int offset; if (unlikely(xas_not_node(node) || node->shift)) @@ -1659,7 +1660,10 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max, return NULL; if (offset == XA_CHUNK_SIZE) return xas_find_marked(xas, max, mark); - return xa_entry(xas->xa, node, offset); + entry = xa_entry(xas->xa, node, offset); + if (!entry) + return xas_find_marked(xas, max, mark); + return entry; } /* -- cgit v1.2.3 From 5b7cb7451585f83d414512a70b79b2086b8c6ed1 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 12 Mar 2020 12:23:03 +0200 Subject: net/mlx5: E-Switch, Enable reg c1 loopback when possible Enable reg c1 loopback if firmware reports it's supported, as this is needed for restoring packet metadata (e.g chain). Also define helper to query if it is enabled. Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- include/linux/mlx5/eswitch.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 61705e74a5bb..c16827eeba9c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -70,6 +70,7 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); enum devlink_eswitch_encap_mode mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw); bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw); /* Reg C0 usage: @@ -108,6 +109,12 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) return DEVLINK_ESWITCH_ENCAP_MODE_NONE; } +static inline bool +mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return false; +}; + static inline bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) { -- cgit v1.2.3 From e31a50162feb352147d3fc87b9e036703c8f2636 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 12 Mar 2020 11:44:27 -0500 Subject: bitfield.h: add FIELD_MAX() and field_max() Define FIELD_MAX(), which supplies the maximum value that can be represented by a field value. Define field_max() as well, to go along with the lower-case forms of the field mask functions. Signed-off-by: Alex Elder Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/bitfield.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4bbb5f1c8b5b..48ea093ff04c 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -55,6 +55,19 @@ (1ULL << __bf_shf(_mask))); \ }) +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + /** * FIELD_FIT() - check if value fits in the field * @_mask: shifted mask defining the field's length and position @@ -110,6 +123,7 @@ static __always_inline u64 field_mask(u64 field) { return field / field_multiplier(field); } +#define field_max(field) ((typeof(field))field_mask(field)) #define ____MAKE_OP(type,base,to,from) \ static __always_inline __##type type##_encode_bits(base v, base field) \ { \ -- cgit v1.2.3 From 1e2328e762548c7d17b7ba8ded9f409d05710dd1 Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:55 -0300 Subject: fs/nsfs.c: Added ns_match ns_match returns true if the namespace inode and dev_t matches the ones provided by the caller. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200304204157.58695-2-cneirabustos@gmail.com --- include/linux/proc_ns.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..adff08bfecf9 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *); extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, void *private_data); +extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); extern void nsfs_init(void); -- cgit v1.2.3 From b4490c5c4e023f09b7d27c9a9d3e7ad7d09ea6bf Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:56 -0300 Subject: bpf: Added new helper bpf_get_ns_current_pid_tgid New bpf helper bpf_get_ns_current_pid_tgid, This helper will return pid and tgid from current task which namespace matches dev_t and inode number provided, this will allows us to instrument a process inside a container. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200304204157.58695-3-cneirabustos@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fd91b7c95ea..4ec835334a1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1497,6 +1497,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); -- cgit v1.2.3 From fc6a9f86f08acd3665f788619afae0d2b2d5a480 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:28 +0200 Subject: {IB,net}/mlx5: Assign mkey variant in mlx5_ib only mkey variant is not required for mlx5_core use, move the mkey variant counter to mlx5_ib. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2b4225ed650..e044703c056b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; -- cgit v1.2.3 From a3cfdd3928113012d0f2c5353277f4e27878a663 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 10 Mar 2020 10:22:30 +0200 Subject: {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the logic inside mlx5_ib and cleanup the code in mlx5_core. Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e044703c056b..1de78f001d26 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -943,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); -- cgit v1.2.3 From 98868668367b24487c0b0b3298d7ca98409baf07 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 12 Mar 2020 17:21:28 -0700 Subject: bpf: Abstract away entire bpf_link clean up procedure Instead of requiring users to do three steps for cleaning up bpf_link, its anon_inode file, and unused fd, abstract that away into bpf_link_cleanup() helper. bpf_link_defunct() is removed, as it shouldn't be needed as an individual operation anymore. v1->v2: - keep bpf_link_cleanup() static for now (Daniel). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200313002128.2028680-1-andriin@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4ec835334a1f..c2f815e9f7d0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1075,7 +1075,6 @@ struct bpf_link_ops { void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); -void bpf_link_defunct(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 6a64037d4bf252bb8cf13917320c8e001da8997a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 12 Mar 2020 20:55:57 +0100 Subject: bpf: Add bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding bpf_trampoline_ name prefix for DECLARE_BPF_DISPATCHER, so all the dispatchers have the common name prefix. And also a small '_' cleanup for bpf_dispatcher_nopfunc function name. Signed-off-by: Björn Töpel Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 21 +++++++++++---------- include/linux/filter.h | 9 ++++----- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2f815e9f7d0..fe1f8b075378 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -522,7 +522,7 @@ struct bpf_dispatcher { u32 image_off; }; -static __always_inline unsigned int bpf_dispatcher_nopfunc( +static __always_inline unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -537,7 +537,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##func, \ + .func = &name##_func, \ .progs = {}, \ .num_progs = 0, \ .image = NULL, \ @@ -545,7 +545,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); } #define DEFINE_BPF_DISPATCHER(name) \ - noinline unsigned int name##func( \ + noinline unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ @@ -553,17 +553,18 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); { \ return bpf_func(ctx, insnsi); \ } \ - EXPORT_SYMBOL(name##func); \ - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ + struct bpf_dispatcher bpf_dispatcher_##name = \ + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ - unsigned int name##func( \ + unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ unsigned int (*bpf_func)(const void *, \ const struct bpf_insn *)); \ - extern struct bpf_dispatcher name; -#define BPF_DISPATCHER_FUNC(name) name##func -#define BPF_DISPATCHER_PTR(name) (&name) + extern struct bpf_dispatcher bpf_dispatcher_##name; +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func +#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); struct bpf_image { @@ -589,7 +590,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) #define DECLARE_BPF_DISPATCHER(name) -#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func #define BPF_DISPATCHER_PTR(name) NULL static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/include/linux/filter.h b/include/linux/filter.h index 43b5e455d2f5..6249679275b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -577,7 +577,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); ret; }) #define BPF_PROG_RUN(prog, ctx) \ - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) /* * Use in preemptible and therefore migratable context to make sure that @@ -596,7 +596,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, u32 ret; migrate_disable(); - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); migrate_enable(); return ret; } @@ -722,7 +722,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } -DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) +DECLARE_BPF_DISPATCHER(xdp) static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) @@ -733,8 +733,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return __BPF_PROG_RUN(prog, xdp, - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); -- cgit v1.2.3 From 535911c80ad4f5801700e9d827a1985bbff41519 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:55:58 +0100 Subject: bpf: Add struct bpf_ksym Adding 'struct bpf_ksym' object that will carry the kallsym information for bpf symbol. Adding the start and end address to begin with. It will be used by bpf_prog, bpf_trampoline, bpf_dispatcher objects. The symbol_start/symbol_end values were originally used to sort bpf_prog objects. For the address displayed in /proc/kallsyms we are using prog->bpf_func value. I'm using the bpf_func value for program symbol start instead of the symbol_start, because it makes no difference for sorting bpf_prog objects and we can use it directly as an address to display it in /proc/kallsyms. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fe1f8b075378..6ca3d5c8ccf3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -471,6 +471,11 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); +struct bpf_ksym { + unsigned long start; + unsigned long end; +}; + enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, @@ -653,6 +658,7 @@ struct bpf_prog_aux { u32 size_poke_tab; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; + struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; -- cgit v1.2.3 From bfea9a8574f34597581f74f792d044d38497b775 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:55:59 +0100 Subject: bpf: Add name to struct bpf_ksym Adding name to 'struct bpf_ksym' object to carry the name of the symbol for bpf_prog, bpf_trampoline, bpf_dispatcher objects. The current benefit is that name is now generated only when the symbol is added to the list, so we don't need to generate it every time it's accessed. The future benefit is that we will have all the bpf objects symbols represented by struct bpf_ksym. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/filter.h | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6ca3d5c8ccf3..047b44deb3c5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -18,6 +18,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -474,6 +475,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); struct bpf_ksym { unsigned long start; unsigned long end; + char name[KSYM_NAME_LEN]; }; enum bpf_tramp_prog_type { diff --git a/include/linux/filter.h b/include/linux/filter.h index 6249679275b3..9b5aa5c483cc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1083,7 +1083,6 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); -void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1152,11 +1151,6 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } -static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) -{ - sym[0] = '\0'; -} - #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); -- cgit v1.2.3 From ecb60d1c670e9b205197d8e4381b19e77bc2d834 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:00 +0100 Subject: bpf: Move lnode list node to struct bpf_ksym Adding lnode list node to 'struct bpf_ksym' object, so the struct bpf_ksym itself can be chained and used in other objects like bpf_trampoline and bpf_dispatcher. Changing iterator to bpf_ksym in bpf_get_kallsym function. The ksym->start is holding the prog->bpf_func value, so it's ok to use it as value in bpf_get_kallsym. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200312195610.346362-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 047b44deb3c5..4fad2fa4135c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -476,6 +476,7 @@ struct bpf_ksym { unsigned long start; unsigned long end; char name[KSYM_NAME_LEN]; + struct list_head lnode; }; enum bpf_tramp_prog_type { @@ -659,7 +660,6 @@ struct bpf_prog_aux { struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; struct latch_tree_node ksym_tnode; - struct list_head ksym_lnode; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; -- cgit v1.2.3 From ca4424c920f574b7246ff1b6d83cfdfd709e42c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:01 +0100 Subject: bpf: Move ksym_tnode to bpf_ksym Moving ksym_tnode list node to 'struct bpf_ksym' object, so the symbol itself can be chained and used in other objects like bpf_trampoline and bpf_dispatcher. We need bpf_ksym object to be linked both in bpf_kallsyms via lnode for /proc/kallsyms and in bpf_tree via tnode for bpf address lookup functions like __bpf_address_lookup or bpf_prog_kallsyms_find. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-7-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fad2fa4135c..68d66b0078df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -477,6 +477,7 @@ struct bpf_ksym { unsigned long end; char name[KSYM_NAME_LEN]; struct list_head lnode; + struct latch_tree_node tnode; }; enum bpf_tramp_prog_type { @@ -659,7 +660,6 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; u32 size_poke_tab; - struct latch_tree_node ksym_tnode; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; struct bpf_map **used_maps; -- cgit v1.2.3 From cbd76f8d5ac9c4e99c4ffe5e39a1e907cdf5a76f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:03 +0100 Subject: bpf: Add prog flag to struct bpf_ksym object Adding 'prog' bool flag to 'struct bpf_ksym' to mark that this object belongs to bpf_prog object. This change allows having bpf_prog objects together with other types (trampolines and dispatchers) in the single bpf_tree. It's used when searching for bpf_prog exception tables by the bpf_prog_ksym_find function, where we need to get the bpf_prog pointer. >From now we can safely add bpf_ksym support for trampoline or dispatcher objects, because we can differentiate them from bpf_prog objects. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-9-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 68d66b0078df..a0cef664c1a9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -478,6 +478,7 @@ struct bpf_ksym { char name[KSYM_NAME_LEN]; struct list_head lnode; struct latch_tree_node tnode; + bool prog; }; enum bpf_tramp_prog_type { -- cgit v1.2.3 From dba122fb5e122e8e07e2f11cdebc10ba4f425cf7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:04 +0100 Subject: bpf: Add bpf_ksym_add/del functions Separating /proc/kallsyms add/del code and adding bpf_ksym_add/del functions for that. Moving bpf_prog_ksym_node_add/del functions to __bpf_ksym_add/del and changing their argument to 'struct bpf_ksym' object. This way we can call them for other bpf objects types like trampoline and dispatcher. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-10-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a0cef664c1a9..ec1de88b8487 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -584,6 +584,9 @@ struct bpf_image { #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) bool is_bpf_image_address(unsigned long address); void *bpf_image_alloc(void); +/* Called only from JIT-enabled code, so there's no need for stubs. */ +void bpf_ksym_add(struct bpf_ksym *ksym); +void bpf_ksym_del(struct bpf_ksym *ksym); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { -- cgit v1.2.3 From a108f7dcfa010e3da825af90d77ac0a6a0240992 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:05 +0100 Subject: bpf: Add trampolines to kallsyms Adding trampolines to kallsyms. It's displayed as bpf_trampoline_ [bpf] where ID is the BTF id of the trampoline function. Adding bpf_image_ksym_add/del functions that setup the start/end values and call KSYMBOL perf events handlers. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-11-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ec1de88b8487..083860be1944 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -513,6 +513,7 @@ struct bpf_trampoline { /* Executable image of trampoline */ void *image; u64 selector; + struct bpf_ksym ksym; }; #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ @@ -585,6 +586,8 @@ struct bpf_image { bool is_bpf_image_address(unsigned long address); void *bpf_image_alloc(void); /* Called only from JIT-enabled code, so there's no need for stubs. */ +void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); #else -- cgit v1.2.3 From 517b75e44c7be9c776aa5f7beaa85baff3868f80 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:06 +0100 Subject: bpf: Add dispatchers to kallsyms Adding dispatchers to kallsyms. It's displayed as bpf_dispatcher_ where NAME is the name of dispatcher. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-12-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 083860be1944..86cacb54ba23 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -531,6 +531,7 @@ struct bpf_dispatcher { int num_progs; void *image; u32 image_off; + struct bpf_ksym ksym; }; static __always_inline unsigned int bpf_dispatcher_nop_func( @@ -546,13 +547,17 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -#define BPF_DISPATCHER_INIT(name) { \ - .mutex = __MUTEX_INITIALIZER(name.mutex), \ - .func = &name##_func, \ - .progs = {}, \ - .num_progs = 0, \ - .image = NULL, \ - .image_off = 0 \ +#define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0, \ + .ksym = { \ + .name = #_name, \ + .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ + }, \ } #define DEFINE_BPF_DISPATCHER(name) \ -- cgit v1.2.3 From 7ac88eba185b4d0e06a71678e54bc092edcd3af3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Mar 2020 20:56:07 +0100 Subject: bpf: Remove bpf_image tree Now that we have all the objects (bpf_prog, bpf_trampoline, bpf_dispatcher) linked in bpf_tree, there's no need to have separate bpf_image tree for images. Reverting the bpf_image tree together with struct bpf_image, because it's no longer needed. Also removing bpf_image_alloc function and adding the original bpf_jit_alloc_exec_page interface instead. The kernel_text_address function can now rely only on is_bpf_text_address, because it checks the bpf_tree that contains all the objects. Keeping bpf_image_ksym_add and bpf_image_ksym_del because they are useful wrappers with perf's ksymbol interface calls. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200312195610.346362-13-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 86cacb54ba23..bdb981c204fa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -583,14 +583,8 @@ void bpf_trampoline_put(struct bpf_trampoline *tr); #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); -struct bpf_image { - struct latch_tree_node tnode; - unsigned char data[]; -}; -#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) -bool is_bpf_image_address(unsigned long address); -void *bpf_image_alloc(void); /* Called only from JIT-enabled code, so there's no need for stubs. */ +void *bpf_jit_alloc_exec_page(void); void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); -- cgit v1.2.3 From 172e7890406d6183b9b39271ffb434ff0a97ce72 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 17 Feb 2020 14:44:05 +0100 Subject: tty/serial: cleanup after ioc*_serial driver removal Commit 9c860e4cf708 ("tty/serial: remove the ioc3_serial driver") and commit a017ef17cfd8 ("tty/serial: remove the ioc4_serial driver") removed the ioc{3,4}_serial driver, but missed some files. Fortunately, ./scripts/get_maintainer.pl --self-test complains: warning: no file matches F: drivers/tty/serial/ioc?_serial.c The driver is gone, so remove the header and maintainer entry as well. The serial.rst Documentation might be useful, so we keep it and update the maintainer entry to the document's actual maintainers. Signed-off-by: Lukas Bulwahn Reviewed-by: Christoph Hellwig Signed-off-by: Tony Luck --- include/linux/ioc3.h | 93 ---------------------------------------------------- 1 file changed, 93 deletions(-) delete mode 100644 include/linux/ioc3.h (limited to 'include/linux') diff --git a/include/linux/ioc3.h b/include/linux/ioc3.h deleted file mode 100644 index 38b286e9a46c..000000000000 --- a/include/linux/ioc3.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2005 Stanislaw Skowronek - */ - -#ifndef _LINUX_IOC3_H -#define _LINUX_IOC3_H - -#include - -#define IOC3_MAX_SUBMODULES 32 - -#define IOC3_CLASS_NONE 0 -#define IOC3_CLASS_BASE_IP27 1 -#define IOC3_CLASS_BASE_IP30 2 -#define IOC3_CLASS_MENET_123 3 -#define IOC3_CLASS_MENET_4 4 -#define IOC3_CLASS_CADDUO 5 -#define IOC3_CLASS_SERIAL 6 - -/* One of these per IOC3 */ -struct ioc3_driver_data { - struct list_head list; - int id; /* IOC3 sequence number */ - /* PCI mapping */ - unsigned long pma; /* physical address */ - struct ioc3 __iomem *vma; /* pointer to registers */ - struct pci_dev *pdev; /* PCI device */ - /* IRQ stuff */ - int dual_irq; /* set if separate IRQs are used */ - int irq_io, irq_eth; /* IRQ numbers */ - /* GPIO magic */ - spinlock_t gpio_lock; - unsigned int gpdr_shadow; - /* NIC identifiers */ - char nic_part[32]; - char nic_serial[16]; - char nic_mac[6]; - /* submodule set */ - int class; - void *data[IOC3_MAX_SUBMODULES]; /* for submodule use */ - int active[IOC3_MAX_SUBMODULES]; /* set if probe succeeds */ - /* is_ir_lock must be held while - * modifying sio_ie values, so - * we can be sure that sio_ie is - * not changing when we read it - * along with sio_ir. - */ - spinlock_t ir_lock; /* SIO_IE[SC] mod lock */ -}; - -/* One per submodule */ -struct ioc3_submodule { - char *name; /* descriptive submodule name */ - struct module *owner; /* owning kernel module */ - int ethernet; /* set for ethernet drivers */ - int (*probe) (struct ioc3_submodule *, struct ioc3_driver_data *); - int (*remove) (struct ioc3_submodule *, struct ioc3_driver_data *); - int id; /* assigned by IOC3, index for the "data" array */ - /* IRQ stuff */ - unsigned int irq_mask; /* IOC3 IRQ mask, leave clear for Ethernet */ - int reset_mask; /* non-zero if you want the ioc3.c module to reset interrupts */ - int (*intr) (struct ioc3_submodule *, struct ioc3_driver_data *, unsigned int); - /* private submodule data */ - void *data; /* assigned by submodule */ -}; - -/********************************** - * Functions needed by submodules * - **********************************/ - -#define IOC3_W_IES 0 -#define IOC3_W_IEC 1 - -/* registers a submodule for all existing and future IOC3 chips */ -extern int ioc3_register_submodule(struct ioc3_submodule *); -/* unregisters a submodule */ -extern void ioc3_unregister_submodule(struct ioc3_submodule *); -/* enables IRQs indicated by irq_mask for a specified IOC3 chip */ -extern void ioc3_enable(struct ioc3_submodule *, struct ioc3_driver_data *, unsigned int); -/* ackowledges specified IRQs */ -extern void ioc3_ack(struct ioc3_submodule *, struct ioc3_driver_data *, unsigned int); -/* disables IRQs indicated by irq_mask for a specified IOC3 chip */ -extern void ioc3_disable(struct ioc3_submodule *, struct ioc3_driver_data *, unsigned int); -/* atomically sets GPCR bits */ -extern void ioc3_gpcr_set(struct ioc3_driver_data *, unsigned int); -/* general ireg writer */ -extern void ioc3_write_ireg(struct ioc3_driver_data *idd, uint32_t value, int reg); - -#endif -- cgit v1.2.3 From 161aff1d93abf0e5b5e9dbca88928998c155f677 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 11 Jan 2020 22:52:26 -0500 Subject: LOOKUP_MOUNTPOINT: fold path_mountpointat() into path_lookupat() New LOOKUP flag, telling path_lookupat() to act as path_mountpointat(). IOW, traverse mounts at the final point and skip revalidation of the location where it ends up. Signed-off-by: Al Viro --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 0dd980d7318f..d9576a051808 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -23,6 +23,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ #define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ #define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ +#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ #define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ @@ -64,7 +65,6 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); -extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -- cgit v1.2.3 From b4c0353693d22f9dcfa4757262dab0aab8376d19 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jan 2020 11:44:51 -0500 Subject: sanitize handling of nd->last_type, kill LAST_BIND ->last_type values are set in 3 places: path_init() (sets to LAST_ROOT), link_path_walk (LAST_NORM/DOT/DOTDOT) and pick_link (LAST_BIND). The are checked in walk_component(), lookup_last() and do_last(). They also get copied to the caller by filename_parentat(). In the last 3 cases the value is what we had at the return from link_path_walk(). In case of walk_component() it's either directly downstream from assignment in link_path_walk() or, when called by lookup_last(), the value we have at the return from link_path_walk(). The value at the entry into link_path_walk() can survive to return only if the pathname contains nothing but slashes. Note that pick_link() never returns such - pure jumps are handled directly. So for the calls of link_path_walk() for trailing symlinks it does not matter what value had been there at the entry; the value at the return won't depend upon it. There are 3 call chains that might have pick_link() storing LAST_BIND: 1) pick_link() from step_into() from walk_component() from link_path_walk(). In that case we will either be parsing the next component immediately after return into link_path_walk(), which will overwrite the ->last_type before anyone has a chance to look at it, or we'll fail, in which case nobody will be looking at ->last_type at all. 2) pick_link() from step_into() from walk_component() from lookup_last(). The value is never looked at due to the above; it won't affect the value seen at return from any link_path_walk(). 3) pick_link() from step_into() from do_last(). Ditto. In other words, assignemnt in pick_link() is pointless, and so is LAST_BIND itself; nothing ever looks at that value. Kill it off. And make link_path_walk() _always_ assign ->last_type - in the only case when the value at the entry might survive to the return that value is always LAST_ROOT, inherited from path_init(). Move that assignment from path_init() into the beginning of link_path_walk(), to consolidate the things. Historical note: LAST_BIND used to be used for the kludge with trailing pure jump symlinks (extra iteration through the top-level loop). No point keeping it anymore... Signed-off-by: Al Viro --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index d9576a051808..a4bb992623c4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -15,7 +15,7 @@ enum { MAX_NESTED_LINKS = 8 }; /* * Type of the last component on LOOKUP_PARENT */ -enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; +enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ #define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ -- cgit v1.2.3 From 58b05e58d155fd5a9a181d51b4c9c8a69a0816d3 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 12 Mar 2020 18:10:09 +0100 Subject: net: phy: Add XLGMII interface define Add a define for XLGMII interface. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7a08023bdbc5..6b872aed8ba6 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -94,6 +94,7 @@ typedef enum { PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, PHY_INTERFACE_MODE_XGMII, + PHY_INTERFACE_MODE_XLGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, @@ -165,6 +166,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "smii"; case PHY_INTERFACE_MODE_XGMII: return "xgmii"; + case PHY_INTERFACE_MODE_XLGMII: + return "xlgmii"; case PHY_INTERFACE_MODE_MOCA: return "moca"; case PHY_INTERFACE_MODE_QSGMII: -- cgit v1.2.3 From fbe639b44a82755d639df1c5d147c93f02ac5a0f Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 12 Mar 2020 17:38:40 +0530 Subject: soc: qcom: Introduce Protection Domain Restart helpers Qualcomm SoCs (starting with MSM8998) allow for multiple protection domains to run on the same Q6 sub-system. This allows for services like ATH10K WLAN FW to have their own separate address space and crash/recover without disrupting the modem and other PDs running on the same sub-system. The PDR helpers introduces an abstraction that allows for tracking/controlling the life cycle of protection domains running on various Q6 sub-systems. Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200312120842.21991-2-sibis@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/pdr.h | 29 +++++++++++++++++++++++++++++ include/linux/soc/qcom/qmi.h | 1 + 2 files changed, 30 insertions(+) create mode 100644 include/linux/soc/qcom/pdr.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h new file mode 100644 index 000000000000..83a8ea612e69 --- /dev/null +++ b/include/linux/soc/qcom/pdr.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __QCOM_PDR_HELPER__ +#define __QCOM_PDR_HELPER__ + +#include + +#define SERVREG_NAME_LENGTH 64 + +struct pdr_service; +struct pdr_handle; + +enum servreg_service_state { + SERVREG_LOCATOR_ERR = 0x1, + SERVREG_SERVICE_STATE_DOWN = 0x0FFFFFFF, + SERVREG_SERVICE_STATE_UP = 0x1FFFFFFF, + SERVREG_SERVICE_STATE_EARLY_DOWN = 0x2FFFFFFF, + SERVREG_SERVICE_STATE_UNINIT = 0x7FFFFFFF, +}; + +struct pdr_handle *pdr_handle_alloc(void (*status)(int state, + char *service_path, + void *priv), void *priv); +struct pdr_service *pdr_add_lookup(struct pdr_handle *pdr, + const char *service_name, + const char *service_path); +int pdr_restart_pd(struct pdr_handle *pdr, struct pdr_service *pds); +void pdr_handle_release(struct pdr_handle *pdr); + +#endif diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index 5efa2b67fa55..e712f94b89fc 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -88,6 +88,7 @@ struct qmi_elem_info { #define QMI_ERR_CLIENT_IDS_EXHAUSTED_V01 5 #define QMI_ERR_INVALID_ID_V01 41 #define QMI_ERR_ENCODING_V01 58 +#define QMI_ERR_DISABLED_V01 69 #define QMI_ERR_INCOMPATIBLE_STATE_V01 90 #define QMI_ERR_NOT_SUPPORTED_V01 94 -- cgit v1.2.3 From 83473566260288c560e5443ea4cc40a458aa9e6a Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 12 Mar 2020 17:38:42 +0530 Subject: soc: qcom: apr: Add avs/audio tracking functionality Use PDR helper functions to track the protection domains that the apr services are dependent upon on SDM845 SoC, specifically the "avs/audio" service running on ADSP Q6. Reviewed-by: Bjorn Andersson Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200312120842.21991-4-sibis@codeaurora.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/apr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index c5d52e2cb275..7f0bc3cf4d61 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -85,6 +85,7 @@ struct apr_device { uint16_t domain_id; uint32_t version; char name[APR_NAME_SIZE]; + const char *service_path; spinlock_t lock; struct list_head node; }; -- cgit v1.2.3 From 6daf14140129d30207ed6a0a69851fa6a3636bda Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 20 Feb 2020 07:59:14 -0600 Subject: netfilter: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] Lastly, fix checkpatch.pl warning WARNING: __aligned(size) is preferred over __attribute__((aligned(size))) in net/bridge/netfilter/ebtables.c This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- include/linux/netfilter/x_tables.h | 8 ++++---- include/linux/netfilter_arp/arp_tables.h | 2 +- include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5448c8b443db..ab192720e2d6 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -98,7 +98,7 @@ struct ip_set_counter { struct ip_set_comment_rcu { struct rcu_head rcu; - char str[0]; + char str[]; }; struct ip_set_comment { diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b261c51b3a3..5da88451853b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -264,7 +264,7 @@ struct xt_table_info { unsigned int stacksize; void ***jumpstack; - unsigned char entries[0] __aligned(8); + unsigned char entries[] __aligned(8); }; int xt_register_target(struct xt_target *target); @@ -464,7 +464,7 @@ struct compat_xt_entry_match { } kernel; u_int16_t match_size; } u; - unsigned char data[0]; + unsigned char data[]; }; struct compat_xt_entry_target { @@ -480,7 +480,7 @@ struct compat_xt_entry_target { } kernel; u_int16_t target_size; } u; - unsigned char data[0]; + unsigned char data[]; }; /* FIXME: this works only on 32 bit tasks @@ -494,7 +494,7 @@ struct compat_xt_counters { struct compat_xt_counters_info { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t num_counters; - struct compat_xt_counters counters[0]; + struct compat_xt_counters counters[]; }; struct _compat_xt_align { diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e98028f00e47..7d3537c40ec9 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -67,7 +67,7 @@ struct compat_arpt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 162f59d0d17a..2f5c4e6ecd8a 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -85,7 +85,7 @@ struct ebt_table_info { /* room to maintain the stack used for jumping from and into udc */ struct ebt_chainstack **chainstack; char *entries; - struct ebt_counter counters[0] ____cacheline_aligned; + struct ebt_counter counters[] ____cacheline_aligned; }; struct ebt_table { diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index e9e1ed74cdf1..b394bd4f68a3 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -76,7 +76,7 @@ struct compat_ipt_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; /* Helper functions */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 78ab959c4575..8225f7821a29 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -43,7 +43,7 @@ struct compat_ip6t_entry { __u16 next_offset; compat_uint_t comefrom; struct compat_xt_counters counters; - unsigned char elems[0]; + unsigned char elems[]; }; static inline struct xt_entry_target * -- cgit v1.2.3 From f6554187855a4ddce235d22cc1486c118846b592 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Mar 2020 10:09:53 +0000 Subject: net: mii: convert mii_lpa_to_ethtool_lpa_x() to linkmode variant Add a LPA to linkmode decoder for 1000BASE-X protocols; this decoder only provides the modify semantics similar to other such decoders. This replaces the unused mii_lpa_to_ethtool_lpa_x() helper. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 18c6208f56fc..309de4a3e6e7 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -354,24 +354,6 @@ static inline u32 mii_adv_to_ethtool_adv_x(u32 adv) return result; } -/** - * mii_lpa_to_ethtool_lpa_x - * @adv: value of the MII_LPA register - * - * A small helper function that translates MII_LPA - * bits, when in 1000Base-X mode, to ethtool - * LP advertisement settings. - */ -static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) -{ - u32 result = 0; - - if (lpa & LPA_LPACK) - result |= ADVERTISED_Autoneg; - - return result | mii_adv_to_ethtool_adv_x(lpa); -} - /** * mii_lpa_mod_linkmode_adv_sgmii * @lp_advertising: pointer to destination link mode. @@ -535,6 +517,25 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) return lcl_adv; } +/** + * mii_lpa_mod_linkmode_x - decode the link partner's config_reg to linkmodes + * @linkmodes: link modes array + * @lpa: config_reg word from link partner + * @fd_bit: link mode for 1000XFULL bit + */ +static inline void mii_lpa_mod_linkmode_x(unsigned long *linkmodes, u16 lpa, + int fd_bit) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, linkmodes, + lpa & LPA_LPACK); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes, + lpa & LPA_1000XPAUSE_ASYM); + linkmode_mod_bit(fd_bit, linkmodes, + lpa & LPA_1000XFULL); +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From a9f28eba6eaab651e4cec6d936a107b03d61754a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Mar 2020 10:09:58 +0000 Subject: net: mii: add linkmode_adv_to_mii_adv_x() Add a helper to convert a linkmode advertisement to a clause 37 advertisement value for 1000base-x and 2500base-x. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mii.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 309de4a3e6e7..219b93cad1dd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -536,6 +536,26 @@ static inline void mii_lpa_mod_linkmode_x(unsigned long *linkmodes, u16 lpa, lpa & LPA_1000XFULL); } +/** + * linkmode_adv_to_mii_adv_x - encode a linkmode to config_reg + * @linkmodes: linkmodes + * @fd_bit: full duplex bit + */ +static inline u16 linkmode_adv_to_mii_adv_x(const unsigned long *linkmodes, + int fd_bit) +{ + u16 adv = 0; + + if (linkmode_test_bit(fd_bit, linkmodes)) + adv |= ADVERTISE_1000XFULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPAUSE; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes)) + adv |= ADVERTISE_1000XPSE_ASYM; + + return adv; +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From 4f8232bbf887123f78bcdca3dfd2b3dfa52a0112 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Feb 2020 12:24:02 -0800 Subject: dma-direct: remove the cached_kernel_address hook dma-direct now finds the kernel address for coherent allocations based on the dma address, so the cached_kernel_address hooks is unused and can be removed entirely. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-noncoherent.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index ca9b5770caee..b6b72e19b0cd 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -109,6 +109,5 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ void *uncached_kernel_address(void *addr); -void *cached_kernel_address(void *addr); #endif /* _LINUX_DMA_NONCOHERENT_H */ -- cgit v1.2.3 From fa7e2247c5729f990c7456fe09f3af99c8f2571b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Feb 2020 15:55:43 -0800 Subject: dma-direct: make uncached_kernel_address more general Rename the symbol to arch_dma_set_uncached, and pass a size to it as well as allow an error return. That will allow reusing this hook for in-place pagetable remapping. As the in-place remap doesn't always require an explicit cache flush, also detangle ARCH_HAS_DMA_PREP_COHERENT from ARCH_HAS_DMA_SET_UNCACHED. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-noncoherent.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index b6b72e19b0cd..1a4039506673 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -108,6 +108,6 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ -void *uncached_kernel_address(void *addr); +void *arch_dma_set_uncached(void *addr, size_t size); #endif /* _LINUX_DMA_NONCOHERENT_H */ -- cgit v1.2.3 From 999a5d1203baa7cff00586361feae263ee3f23a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Feb 2020 12:35:05 -0800 Subject: dma-direct: provide a arch_dma_clear_uncached hook This allows the arch code to reset the page tables to cached access when freeing a dma coherent allocation that was set to uncached using arch_dma_set_uncached. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-noncoherent.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 1a4039506673..b59f1b6be3e9 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -109,5 +109,6 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ void *arch_dma_set_uncached(void *addr, size_t size); +void arch_dma_clear_uncached(void *addr, size_t size); #endif /* _LINUX_DMA_NONCOHERENT_H */ -- cgit v1.2.3 From 92fd86864ec4ac089de47f0f1f4c47418b343448 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 5 Mar 2020 13:57:14 +0530 Subject: clocksource/drivers/timer-ti-dm: Add support to get pwm current status omap_dm_timer_ops provide support to configure the pwm but there is no support to get the current status. For configuring pwm it is advised to check the current hw status instead of relying on pwm framework. So implement a new timer ops to get the current status of pwm. Signed-off-by: Lokesh Vutla Acked-by: Tony Lindgen Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200305082715.15861-6-lokeshvutla@ti.com --- include/linux/platform_data/dmtimer-omap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h index bdaaf537604a..3173b7b6ff6f 100644 --- a/include/linux/platform_data/dmtimer-omap.h +++ b/include/linux/platform_data/dmtimer-omap.h @@ -36,6 +36,7 @@ struct omap_dm_timer_ops { unsigned int match); int (*set_pwm)(struct omap_dm_timer *timer, int def_on, int toggle, int trigger); + int (*get_pwm_status)(struct omap_dm_timer *timer); int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler); unsigned int (*read_counter)(struct omap_dm_timer *timer); -- cgit v1.2.3 From 02e6d546e3bdc1a8a764343cd1ba354da07e8623 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 5 Mar 2020 13:57:15 +0530 Subject: clocksource/drivers/timer-ti-dm: Enable autoreload in set_pwm dm timer ops set_load() api allows to configure the load value and to set the auto reload feature. But auto reload feature is independent of load value and should be part of configuring pwm. This way pwm can be disabled by disabling auto reload feature using set_pwm() so that the current pwm cycle will be completed. Else pwm disabling causes the cycle to be stopped abruptly. Signed-off-by: Lokesh Vutla Acked-by: Tony Lindgren Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200305082715.15861-7-lokeshvutla@ti.com --- include/linux/platform_data/dmtimer-omap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h index 3173b7b6ff6f..95d852aef130 100644 --- a/include/linux/platform_data/dmtimer-omap.h +++ b/include/linux/platform_data/dmtimer-omap.h @@ -30,12 +30,11 @@ struct omap_dm_timer_ops { int (*stop)(struct omap_dm_timer *timer); int (*set_source)(struct omap_dm_timer *timer, int source); - int (*set_load)(struct omap_dm_timer *timer, int autoreload, - unsigned int value); + int (*set_load)(struct omap_dm_timer *timer, unsigned int value); int (*set_match)(struct omap_dm_timer *timer, int enable, unsigned int match); int (*set_pwm)(struct omap_dm_timer *timer, int def_on, - int toggle, int trigger); + int toggle, int trigger, int autoreload); int (*get_pwm_status)(struct omap_dm_timer *timer); int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler); -- cgit v1.2.3 From 7eac52648a4c24ad23a05f62db97867c92a5747b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Feb 2020 19:11:12 -0500 Subject: SUNRPC: Add a flag to avoid reference counts on credentials Add a flag to signal to the RPC layer that the credential is already pinned for the duration of the RPC call. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a6ef35184ef1..df696efdd675 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -132,6 +132,7 @@ struct rpc_task_setup { #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ #define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_TASK_NO_RETRANS_TIMEOUT 0x4000 /* wait forever for a reply */ +#define RPC_TASK_CRED_NOREF 0x8000 /* No refcount on the credential */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) -- cgit v1.2.3 From 8d6bda7f23a9b3ef1d7e386f01924c37f18fe771 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Mar 2020 11:21:12 -0400 Subject: SUNRPC: Remove xdr_buf_read_mic() Clean up: this function is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b41f34977995..8a6dd5bd6748 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,7 +184,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); -extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From ccbe80bad571c2f967ad42b25bbb3ef7a4a24705 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 2 Mar 2020 15:11:45 -0800 Subject: irqchip/sifive-plic: Enable/Disable external interrupts upon cpu online/offline Currently, PLIC threshold is only initialized once in the beginning. However, threshold can be set to disabled if a CPU is marked offline with CPU hotplug feature. This will not allow to change the irq affinity to a CPU that just came online. Add PLIC specific CPU hotplug callbacks and enable the threshold when a CPU comes online. Take this opportunity to move the external interrupt enable code from trap init to PLIC driver as well. On cpu offline path, the driver performs the exact opposite operations i.e. disable the interrupt and the threshold. Signed-off-by: Atish Patra Signed-off-by: Marc Zyngier Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20200302231146.15530-2-atish.patra@wdc.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d37c17e68268..77d70b633531 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -102,6 +102,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, + CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, -- cgit v1.2.3 From 469aef23aa4e49d5191050410a1422117db03e11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 07:23:23 -0600 Subject: sunrpc: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1afe38eb33f7..7f0a83451bc0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -380,7 +380,7 @@ struct svc_deferred_req { struct cache_deferred_req handle; size_t xprt_hlen; int argslen; - __be32 args[0]; + __be32 args[]; }; struct svc_process_info { -- cgit v1.2.3 From 412055398b9e67e07347a936fc4a6adddabe9cf4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 14:45:53 -0500 Subject: nfsd: Fix NFSv4 READ on RDMA when using readv svcrdma expects that the payload falls precisely into the xdr_buf page vector. This does not seem to be the case for nfsd4_encode_readv(). This code is called only when fops->splice_read is missing or when RQ_SPLICE_OK is clear, so it's not a noticeable problem in many common cases. Add new transport method: ->xpo_read_payload so that when a READ payload does not fit exactly in rq_res's page vector, the XDR encoder can inform the RPC transport exactly where that payload is, without the payload's XDR pad. That way, when a Write chunk is present, the transport knows what byte range in the Reply message is supposed to be matched with the chunk. Note that the Linux NFS server implementation of NFS/RDMA can currently handle only one Write chunk per RPC-over-RDMA message. This simplifies the implementation of this fix. Fixes: b04209806384 ("nfsd4: allow exotic read compounds") Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=198053 Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 +++ include/linux/sunrpc/svc_rdma.h | 8 +++++++- include/linux/sunrpc/svc_xprt.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7f0a83451bc0..fd390894a584 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -517,6 +517,9 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); +int svc_encode_read_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, struct kvec *first, size_t total); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 40f65888dd38..04e4a34d1c6a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -137,6 +137,8 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + unsigned int rc_read_payload_offset; + unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; }; @@ -170,7 +172,9 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - __be32 *wr_ch, struct xdr_buf *xdr); + __be32 *wr_ch, struct xdr_buf *xdr, + unsigned int offset, + unsigned long length); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, bool writelist, struct xdr_buf *xdr); @@ -189,6 +193,8 @@ extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt, struct xdr_buf *xdr, __be32 *wr_lst); extern int svc_rdma_sendto(struct svc_rqst *); +extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ea6f46be9cb7..9e1e046de176 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,6 +21,8 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); + int (*xpo_read_payload)(struct svc_rqst *, unsigned int, + unsigned int); void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); -- cgit v1.2.3 From 96f194b715b61b11f0184c776a1283df8e152033 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:01:08 -0500 Subject: SUNRPC: Add xdr_pad_size() helper Introduce a helper function to compute the XDR pad size of a variable-length XDR object. Clean up: Replace open-coded calculation of XDR pad sizes. I'm sure I haven't found every instance of this calculation. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b41f34977995..83cd9f15c526 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -300,6 +300,21 @@ xdr_align_size(size_t n) return (n + mask) & ~mask; } +/** + * xdr_pad_size - Calculate size of an object's pad + * @n: Size of an object being XDR encoded (in bytes) + * + * This implementation avoids the need for conditional + * branches or modulo division. + * + * Return value: + * Size (in bytes) of the needed XDR pad + */ +static inline size_t xdr_pad_size(size_t n) +{ + return xdr_align_size(n) - n; +} + /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From e604aad2cac7357162f661e45f2f60e46faa7b17 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:01:08 -0500 Subject: svcrdma: Use struct xdr_stream to decode ingress transport headers The logic that checks incoming network headers has to be scrupulous. De-duplicate: replace open-coded buffer overflow checks with the use of xdr_stream helpers that are used most everywhere else XDR decoding is done. One minor change to the sanity checks: instead of checking the length of individual segments, cap the length of the whole chunk to be sure it can fit in the set of pages available in rq_pages. This should be a better test of whether the server can handle the chunks in each request. Signed-off-by: Chuck Lever --- include/linux/sunrpc/rpc_rdma.h | 3 ++- include/linux/sunrpc/svc_rdma.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 92d182fd8e3b..320c672d84de 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -58,7 +58,8 @@ enum { enum { rpcrdma_fixed_maxsz = 4, rpcrdma_segment_maxsz = 4, - rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, + rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz, + rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz, }; /* diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 04e4a34d1c6a..c790dbb0dd90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt { struct ib_sge rc_recv_sge; void *rc_recv_buf; struct xdr_buf rc_arg; + struct xdr_stream rc_stream; bool rc_temp; u32 rc_byte_len; unsigned int rc_page_count; -- cgit v1.2.3 From 2fe8c446338e083a1f3c0ccaaaa20e7d48e71ebc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:01:08 -0500 Subject: svcrdma: De-duplicate code that locates Write and Reply chunks Cache the locations of the Requester-provided Write list and Reply chunk so that the Send path doesn't need to parse the Call header again. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c790dbb0dd90..e714e4d90ac5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -138,6 +138,8 @@ struct svc_rdma_recv_ctxt { unsigned int rc_page_count; unsigned int rc_hdr_count; u32 rc_inv_rkey; + __be32 *rc_write_list; + __be32 *rc_reply_chunk; unsigned int rc_read_payload_offset; unsigned int rc_read_payload_length; struct page *rc_pages[RPCSVC_MAXPAGES]; -- cgit v1.2.3 From 6fa5785e78d39f03d9fa33dea4dad2e7caf21e1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:02:19 -0500 Subject: svcrdma: Update synopsis of svc_rdma_send_reply_chunk() Preparing for subsequent patches, no behavior change expected. Pass the RPC Call's svc_rdma_recv_ctxt deeper into the sendto() path. This enables passing more information about Requester- provided Write and Reply chunks into the lower-level send functions. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e714e4d90ac5..42b68126cc60 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -179,7 +179,7 @@ extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, unsigned int offset, unsigned long length); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - __be32 *rp_ch, bool writelist, + const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); /* svc_rdma_sendto.c */ -- cgit v1.2.3 From 4554755ed81bb690d709168550aba5b46447f069 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:02:19 -0500 Subject: svcrdma: Update synopsis of svc_rdma_map_reply_msg() Preparing for subsequent patches, no behavior change expected. Pass the RPC Call's svc_rdma_recv_ctxt deeper into the sendto() path. This enables passing more information about Requester- provided Write and Reply chunks into those lower-level functions. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 42b68126cc60..c506732886b3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -193,8 +193,9 @@ extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt, unsigned int len); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - struct xdr_buf *xdr, __be32 *wr_lst); + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_recv_ctxt *rctxt, + struct xdr_buf *xdr); extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); -- cgit v1.2.3 From 5c266df52701635edfd49415b225fb17ceac5183 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:02:20 -0500 Subject: SUNRPC: Add encoders for list item discriminators Clean up. These are taken from the client-side RPC/RDMA transport to a more global header file so they can be used elsewhere. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 83cd9f15c526..9c8b73b509a1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -315,6 +315,44 @@ static inline size_t xdr_pad_size(size_t n) return xdr_align_size(n) - n; } +/** + * xdr_stream_encode_item_present - Encode a "present" list item + * @xdr: pointer to xdr_stream + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) +{ + const size_t len = sizeof(__be32); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = xdr_one; + return len; +} + +/** + * xdr_stream_encode_item_absent - Encode a "not present" list item + * @xdr: pointer to xdr_stream + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) +{ + const size_t len = sizeof(__be32); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = xdr_zero; + return len; +} + /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 6fd5034db45c9c0ca57c98f3d5b9a0ce5869eab3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:02:20 -0500 Subject: svcrdma: Refactor chunk list encoders Same idea as the receive-side changes I did a while back: use xdr_stream helpers rather than open-coding the XDR chunk list encoders. This builds the Reply transport header from beginning to end without backtracking. As additional clean-ups, fill in documenting comments for the XDR encoders and sprinkle some trace points in the new encoding functions. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c506732886b3..d001aac13c2f 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -149,6 +149,8 @@ struct svc_rdma_send_ctxt { struct list_head sc_list; struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; + struct xdr_buf sc_hdrbuf; + struct xdr_stream sc_stream; void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; -- cgit v1.2.3 From aee4b74a3f273b54d136132fedf575ec464f4134 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Mar 2020 11:08:05 -0500 Subject: svcrdma: Fix double sync of transport header buffer Performance optimization: Avoid syncing the transport buffer twice when Reply buffer pull-up is necessary. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d001aac13c2f..a3fa5b4fa2e4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -191,9 +191,6 @@ extern struct svc_rdma_send_ctxt * extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); -extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt, - unsigned int len); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, -- cgit v1.2.3 From 0dabe948f28274e7956a625a24f205016b810693 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Mar 2020 13:28:14 -0500 Subject: svcrdma: Avoid DMA mapping small RPC Replies On some platforms, DMA mapping part of a page is more costly than copying bytes. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Since pull-up is now a more a frequent operation, I've introduced a trace point in the pull-up path. It can be used for debugging or user-space tools that count pull-up frequency. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a3fa5b4fa2e4..78fe2ac6dc6c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -52,6 +52,7 @@ /* Default and maximum inline threshold sizes */ enum { + RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1, RPCRDMA_DEF_INLINE_THRESH = 4096, RPCRDMA_MAX_INLINE_THRESH = 65536 }; -- cgit v1.2.3 From 9e55eef4ab1bf1810443bb3989a07a68e1f5d084 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Mar 2020 15:19:54 -0500 Subject: SUNRPC: Refactor xs_sendpages() Re-locate xs_sendpages() so that it can be shared with server code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9c8b73b509a1..8529d6e33137 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -188,20 +188,6 @@ extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int) extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -/* - * Helper structure for copying from an sk_buff. - */ -struct xdr_skb_reader { - struct sk_buff *skb; - unsigned int offset; - size_t count; - __wsum csum; -}; - -typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); - -extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); - extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); -- cgit v1.2.3 From 65286b883c6de6b30928c837c47c167e82bde0b2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Mar 2020 18:21:42 -0500 Subject: nfsd: export upcalls must not return ESTALE when mountd is down If the rpc.mountd daemon goes down, then that should not cause all exports to start failing with ESTALE errors. Let's explicitly distinguish between the cache upcall cases that need to time out, and those that do not. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- include/linux/sunrpc/cache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 0f64de7caa39..656882a50991 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -179,6 +179,9 @@ sunrpc_cache_update(struct cache_detail *detail, extern int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); +extern int +sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, + struct cache_head *h); extern void cache_clean_deferred(void *owner); -- cgit v1.2.3 From 277f27e2f27752cd1a7901443d72e908ddea8a2e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Mar 2020 18:21:43 -0500 Subject: SUNRPC/cache: Allow garbage collection of invalid cache entries If the cache entry never gets initialised, we want the garbage collector to be able to evict it. Otherwise if the upcall daemon fails to initialise the entry, we end up never expiring it. Signed-off-by: Trond Myklebust [ cel: resolved a merge conflict ] Signed-off-by: Chuck Lever --- include/linux/sunrpc/cache.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 656882a50991..532cdbda43da 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -209,9 +209,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - if (!test_bit(CACHE_VALID, &h->flags)) - return false; - return (h->expiry_time < seconds_since_boot()) || (detail->flush_time >= h->last_refresh); } -- cgit v1.2.3 From 81924dae51941018afdaf25638da804be4807ce5 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 13 Mar 2020 19:42:35 +0000 Subject: mtd: spi-nor: Emphasise which is the generic set_4byte_addr_mode() method Rename (*set_4byte)() to (*set_4byte_addr_mode)() for a better differentiation between the 4 byte address mode and opcodes. Rename macronix_set_4byte() to spi_nor_set_4byte_addr_mode(), it will be the only 4 byte address mode method exposed to the manufacturer drivers. Here's how the manufacturers enter and exit the 4 byte address mode: - eon, gidadevice, issi, macronix, xmc use EN4B/EX4B - micron-st needs WEN. st_micron_set_4byte_addr_mode() will become a private method, as they are the only ones that need WEN before the EN4B/EX4B commands. - newer spansion have a 4BAM opcode (this translates to a new, public command). Older spansion flashes use the BRWR command (legacy in core.c -> spansion_set_4byte_addr_mode()) - winbond's method is hackish and may be reason for just a flash fixup hook -> private method Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index de90724f62f1..2b9717b0cd62 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -520,7 +520,7 @@ struct spi_nor_locking_ops { * @erase_map: the erase map parsed from the SFDP Sector Map Parameter * Table. * @quad_enable: enables SPI NOR quad mode. - * @set_4byte: puts the SPI NOR in 4 byte addressing mode. + * @set_4byte_addr_mode: puts the SPI NOR in 4 byte addressing mode. * @convert_addr: converts an absolute address into something the flash * will understand. Particularly useful when pagesize is * not a power-of-2. @@ -541,7 +541,7 @@ struct spi_nor_flash_parameter { struct spi_nor_erase_map erase_map; int (*quad_enable)(struct spi_nor *nor); - int (*set_4byte)(struct spi_nor *nor, bool enable); + int (*set_4byte_addr_mode)(struct spi_nor *nor, bool enable); u32 (*convert_addr)(struct spi_nor *nor, u32 addr); int (*setup)(struct spi_nor *nor, const struct spi_nor_hwcaps *hwcaps); -- cgit v1.2.3 From 414de7abbf809f046511269797d9f2310b88e036 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:20 -0800 Subject: KVM: Drop kvm_arch_create_memslot() Remove kvm_arch_create_memslot() now that all arch implementations are effectively nops. Removing kvm_arch_create_memslot() eliminates the possibility for arch specific code to allocate memory prior to setting a memslot, which sets the stage for simplifying kvm_free_memslot(). Cc: Janosch Frank Acked-by: Christian Borntraeger Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7944ad6ac10b..8f47f6b48444 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -671,8 +671,6 @@ int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, - unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, -- cgit v1.2.3 From 9d4c197c0e94c372ceffd2ffc53a23518f301ed9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:24 -0800 Subject: KVM: Drop "const" attribute from old memslot in commit_memory_region() Drop the "const" attribute from @old in kvm_arch_commit_memory_region() to allow arch specific code to free arch specific resources in the old memslot without having to cast away the attribute. Freeing resources in kvm_arch_commit_memory_region() paves the way for simplifying kvm_free_memslot() by eliminating the last usage of its @dont param. Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8f47f6b48444..7827156ec1c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -678,7 +678,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old, + struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); bool kvm_largepages_enabled(void); -- cgit v1.2.3 From e96c81ee89d80e1a0fe50a0e9be40c1b77e14aaa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:27 -0800 Subject: KVM: Simplify kvm_free_memslot() and all its descendents Now that all callers of kvm_free_memslot() pass NULL for @dont, remove the param from the top-level routine and all arch's implementations. No functional change intended. Tested-by: Christoffer Dall Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7827156ec1c9..5404ef8be291 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -669,8 +669,7 @@ int kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, -- cgit v1.2.3 From 0dff084607bd555d6f74db2af8406a9da9f0fc3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:29 -0800 Subject: KVM: Provide common implementation for generic dirty log functions Move the implementations of KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG for CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT into common KVM code. The arch specific implemenations are extremely similar, differing only in whether the dirty log needs to be sync'd from hardware (x86) and how the TLBs are flushed. Add new arch hooks to handle sync and TLB flush; the sync will also be used for non-generic dirty log support in a future patch (s390). The ulterior motive for providing a common implementation is to eliminate the dependency between arch and common code with respect to the memslot referenced by the dirty log, i.e. to make it obvious in the code that the validity of the memslot is guaranteed, as a future patch will rework memslot handling such that id_to_memslot() can return NULL. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5404ef8be291..35e6975d0a82 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -816,23 +816,20 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); -int kvm_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log, int *is_dirty); - -int kvm_get_dirty_log_protect(struct kvm *kvm, - struct kvm_dirty_log *log, bool *flush); -int kvm_clear_dirty_log_protect(struct kvm *kvm, - struct kvm_clear_dirty_log *log, bool *flush); - void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); - -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log); -int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, - struct kvm_clear_dirty_log *log); +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); + +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot); +#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); +int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, + int *is_dirty); +#endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); -- cgit v1.2.3 From 2a49f61dfcdc25ec06b41f7466ccb94a7a9d2624 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:30 -0800 Subject: KVM: Ensure validity of memslot with respect to kvm_get_dirty_log() Rework kvm_get_dirty_log() so that it "returns" the associated memslot on success. A future patch will rework memslot handling such that id_to_memslot() can return NULL, returning the memslot makes it more obvious that the validity of the memslot has been verified, i.e. precludes the need to add validity checks in the arch code that are technically unnecessary. To maintain ordering in s390, move the call to kvm_arch_sync_dirty_log() from s390's kvm_vm_ioctl_get_dirty_log() to the new kvm_get_dirty_log(). This is a nop for PPC, the only other arch that doesn't select KVM_GENERIC_DIRTYLOG_READ_PROTECT, as its sync_dirty_log() is empty. Ideally, moving the sync_dirty_log() call would be done in a separate patch, but it can't be done in a follow-on patch because that would temporarily break s390's ordering. Making the move in a preparatory patch would be functionally correct, but would create an odd scenario where the moved sync_dirty_log() would operate on a "different" memslot due to consuming the result of a different id_to_memslot(). The memslot couldn't actually be different as slots_lock is held, but the code is confusing enough as it is, i.e. moving sync_dirty_log() in this patch is the lesser of all evils. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35e6975d0a82..63ce6b21b107 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,7 +828,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, #else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, - int *is_dirty); + int *is_dirty, struct kvm_memory_slot **memslot); #endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, -- cgit v1.2.3 From 0577d1abe704c315bb5cdfc71f4ca7b9b5358f59 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:31 -0800 Subject: KVM: Terminate memslot walks via used_slots Refactor memslot handling to treat the number of used slots as the de facto size of the memslot array, e.g. return NULL from id_to_memslot() when an invalid index is provided instead of relying on npages==0 to detect an invalid memslot. Rework the sorting and walking of memslots in advance of dynamically sizing memslots to aid bisection and debug, e.g. with luck, a bug in the refactoring will bisect here and/or hit a WARN instead of randomly corrupting memory. Alternatively, a global null/invalid memslot could be returned, i.e. so callers of id_to_memslot() don't have to explicitly check for a NULL memslot, but that approach runs the risk of introducing difficult-to- debug issues, e.g. if the global null slot is modified. Constifying the return from id_to_memslot() to combat such issues is possible, but would require a massive refactoring of arch specific code and would still be susceptible to casting shenanigans. Add function comments to update_memslots() and search_memslots() to explicitly (and loudly) state how memslots are sorted. Opportunistically stuff @hva with a non-canonical value when deleting a private memslot on x86 to detect bogus usage of the freed slot. No functional change intended. Tested-by: Christoffer Dall Tested-by: Marc Zyngier Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 63ce6b21b107..20763598b13b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -572,10 +572,11 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) return vcpu->vcpu_idx; } -#define kvm_for_each_memslot(memslot, slots) \ - for (memslot = &slots->memslots[0]; \ - memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ - memslot++) +#define kvm_for_each_memslot(memslot, slots) \ + for (memslot = &slots->memslots[0]; \ + memslot < slots->memslots + slots->used_slots; memslot++) \ + if (WARN_ON_ONCE(!memslot->npages)) { \ + } else void kvm_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -635,12 +636,15 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) return __kvm_memslots(vcpu->kvm, as_id); } -static inline struct kvm_memory_slot * -id_to_memslot(struct kvm_memslots *slots, int id) +static inline +struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { int index = slots->id_to_index[id]; struct kvm_memory_slot *slot; + if (index < 0) + return NULL; + slot = &slots->memslots[index]; WARN_ON(slot->id != id); @@ -1012,6 +1016,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. * gfn_to_memslot() itself isn't here as an inline because that would * bloat other code too much. + * + * IMPORTANT: Slots are sorted from highest GFN to lowest GFN! */ static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn) -- cgit v1.2.3 From 36947254e5f981aeeedab1c7dfa35fc34d330e80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 18 Feb 2020 13:07:32 -0800 Subject: KVM: Dynamically size memslot array based on number of used slots Now that the memslot logic doesn't assume memslots are always non-NULL, dynamically size the array of memslots instead of unconditionally allocating memory for the maximum number of memslots. Note, because a to-be-deleted memslot must first be invalidated, the array size cannot be immediately reduced when deleting a memslot. However, consecutive deletions will realize the memory savings, i.e. a second deletion will trim the entry. Tested-by: Christoffer Dall Tested-by: Marc Zyngier Reviewed-by: Peter Xu Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 20763598b13b..4bd5251b4477 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -431,11 +431,11 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) */ struct kvm_memslots { u64 generation; - struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM]; /* The mapping table from slot id to the index in memslots[]. */ short id_to_index[KVM_MEM_SLOTS_NUM]; atomic_t lru_slot; int used_slots; + struct kvm_memory_slot memslots[]; }; struct kvm { -- cgit v1.2.3 From 3c9bd4006bfc2dccda1823db61b3f470ef91cfaa Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Thu, 27 Feb 2020 09:32:27 +0800 Subject: KVM: x86: enable dirty log gradually in small chunks It could take kvm->mmu_lock for an extended period of time when enabling dirty log for the first time. The main cost is to clear all the D-bits of last level SPTEs. This situation can benefit from manual dirty log protect as well, which can reduce the mmu_lock time taken. The sequence is like this: 1. Initialize all the bits of the dirty bitmap to 1 when enabling dirty log for the first time 2. Only write protect the huge pages 3. KVM_GET_DIRTY_LOG returns the dirty bitmap info 4. KVM_CLEAR_DIRTY_LOG will clear D-bit for each of the leaf level SPTEs gradually in small chunks Under the Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz environment, I did some tests with a 128G windows VM and counted the time taken of memory_global_dirty_log_start, here is the numbers: VM Size Before After optimization 128G 460ms 10ms Signed-off-by: Jay Zhou Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4bd5251b4477..127cb086ba32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -360,6 +360,10 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap); } +#ifndef KVM_DIRTY_LOG_MANUAL_CAPS +#define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE +#endif + struct kvm_s390_adapter_int { u64 ind_addr; u64 summary_addr; @@ -493,7 +497,7 @@ struct kvm { #endif long tlbs_dirty; struct list_head devices; - bool manual_dirty_log_protect; + u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; struct kvm_stat_data **debugfs_stat_data; struct srcu_struct srcu; @@ -527,6 +531,11 @@ struct kvm { #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) +{ + return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); +} + static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, -- cgit v1.2.3 From 2bde08f9f5f13ef2674674a2e3d7420abd08be33 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Mar 2020 12:51:52 -0500 Subject: KVM: Drop gfn_to_pfn_atomic() It's never used anywhere now. Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 127cb086ba32..0ed394162b68 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -714,7 +714,6 @@ void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -- cgit v1.2.3 From 600087b6146764999949b4a12ce5f7627602c33a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 Mar 2020 15:57:05 -0800 Subject: KVM: Drop largepages_enabled and its accessor/mutator Drop largepages_enabled, kvm_largepages_enabled() and kvm_disable_largepages() now that all users are gone. Note, largepages_enabled was an x86-only flag that got left in common KVM code when KVM gained support for multiple architectures. No functional change intended. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0ed394162b68..35bc52e187a2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -693,8 +693,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); -bool kvm_largepages_enabled(void); -void kvm_disable_largepages(void); /* flush all memory translations */ void kvm_arch_flush_shadow_all(struct kvm *kvm); /* flush memory translations pointing to 'slot' */ -- cgit v1.2.3 From a46a22955bae16fc5a756af7188d3ccb25c3f797 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 12 Mar 2020 13:03:15 -0700 Subject: kernfs: Add removed_size out param for simple_xattr_set This helps set up size accounting in the next commit. Without this out param, it's difficult to find out the removed xattr size without taking a lock for longer and walking the xattr linked list twice. Signed-off-by: Daniel Xu Acked-by: Chris Down Reviewed-by: Greg Kroah-Hartman Signed-off-by: Tejun Heo --- include/linux/xattr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 6dad031be3c2..4cf6e11f4a3c 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -102,7 +102,8 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, - const void *value, size_t size, int flags); + const void *value, size_t size, int flags, + ssize_t *removed_size); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_list_add(struct simple_xattrs *xattrs, -- cgit v1.2.3 From 0c47383ba3bd10877956e41149d19644fba937d1 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 12 Mar 2020 13:03:16 -0700 Subject: kernfs: Add option to enable user xattrs User extended attributes are useful as metadata storage for kernfs consumers like cgroups. Especially in the case of cgroups, it is useful to have a central metadata store that multiple processes/services can use to coordinate actions. A concrete example is for userspace out of memory killers. We want to let delegated cgroup subtree owners (running as non-root) to be able to say "please avoid killing this cgroup". This is especially important for desktop linux as delegated subtrees owners are less likely to run as root. This patch introduces a new flag, KERNFS_ROOT_SUPPORT_USER_XATTR, that lets kernfs consumers enable user xattr support. An initial limit of 128 entries or 128KB -- whichever is hit first -- is placed per cgroup because xattrs come from kernel memory and we don't want to let unprivileged users accidentally eat up too much kernel memory. Signed-off-by: Daniel Xu Acked-by: Chris Down Reviewed-by: Greg Kroah-Hartman Signed-off-by: Tejun Heo --- include/linux/kernfs.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index dded2e5a9f42..89f6a4214a70 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -37,8 +37,10 @@ enum kernfs_node_type { KERNFS_LINK = 0x0004, }; -#define KERNFS_TYPE_MASK 0x000f -#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK +#define KERNFS_TYPE_MASK 0x000f +#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK +#define KERNFS_MAX_USER_XATTRS 128 +#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, @@ -78,6 +80,11 @@ enum kernfs_root_flag { * fhandle to access nodes of the fs. */ KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004, + + /* + * Support user xattrs to be written to nodes rooted at this root. + */ + KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, }; /* type-specific structures for kernfs_node union members */ -- cgit v1.2.3 From 19f747f7370fcf4ced4988ed795ccd4a28f2b530 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 Mar 2020 13:30:58 -0700 Subject: scsi: linux/unaligned/byteshift.h: Remove superfluous casts The C language supports implicitly casting a void pointer into a non-void pointer. Remove explicit void pointer to non-void pointer casts because these are superfluous. Link: https://lore.kernel.org/r/20200313203102.16613-2-bvanassche@acm.org Cc: Harvey Harrison Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Andrew Morton Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/unaligned/be_byteshift.h | 6 +++--- include/linux/unaligned/le_byteshift.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 8bdb8fa01bd4..c43ff5918c8a 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h @@ -40,17 +40,17 @@ static inline void __put_unaligned_be64(u64 val, u8 *p) static inline u16 get_unaligned_be16(const void *p) { - return __get_unaligned_be16((const u8 *)p); + return __get_unaligned_be16(p); } static inline u32 get_unaligned_be32(const void *p) { - return __get_unaligned_be32((const u8 *)p); + return __get_unaligned_be32(p); } static inline u64 get_unaligned_be64(const void *p) { - return __get_unaligned_be64((const u8 *)p); + return __get_unaligned_be64(p); } static inline void put_unaligned_be16(u16 val, void *p) diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 1628b75866f0..2248dcb0df76 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h @@ -40,17 +40,17 @@ static inline void __put_unaligned_le64(u64 val, u8 *p) static inline u16 get_unaligned_le16(const void *p) { - return __get_unaligned_le16((const u8 *)p); + return __get_unaligned_le16(p); } static inline u32 get_unaligned_le32(const void *p) { - return __get_unaligned_le32((const u8 *)p); + return __get_unaligned_le32(p); } static inline u64 get_unaligned_le64(const void *p) { - return __get_unaligned_le64((const u8 *)p); + return __get_unaligned_le64(p); } static inline void put_unaligned_le16(u16 val, void *p) -- cgit v1.2.3 From a7afff31d56db22647251d76d6af030cd47bd97e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 Mar 2020 13:31:00 -0700 Subject: scsi: treewide: Consolidate {get,put}_unaligned_[bl]e24() definitions Move the get_unaligned_be24(), get_unaligned_le24() and put_unaligned_le24() definitions from various drivers into include/linux/unaligned/generic.h. Add a put_unaligned_be24() implementation. Link: https://lore.kernel.org/r/20200313203102.16613-4-bvanassche@acm.org Cc: Keith Busch Cc: Sagi Grimberg Cc: Jens Axboe Cc: Harvey Harrison Cc: Martin K. Petersen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Andrew Morton Reviewed-by: Christoph Hellwig Reviewed-by: Andy Shevchenko Reviewed-by: Greg Kroah-Hartman # For drivers/usb Reviewed-by: Felipe Balbi # For drivers/usb/gadget Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/unaligned/generic.h | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h index 57d3114656e5..303289492859 100644 --- a/include/linux/unaligned/generic.h +++ b/include/linux/unaligned/generic.h @@ -2,6 +2,8 @@ #ifndef _LINUX_UNALIGNED_GENERIC_H #define _LINUX_UNALIGNED_GENERIC_H +#include + /* * Cause a link-time error if we try an unaligned access other than * 1,2,4 or 8 bytes long @@ -66,4 +68,48 @@ extern void __bad_unaligned_access_size(void); } \ (void)0; }) +static inline u32 __get_unaligned_be24(const u8 *p) +{ + return p[0] << 16 | p[1] << 8 | p[2]; +} + +static inline u32 get_unaligned_be24(const void *p) +{ + return __get_unaligned_be24(p); +} + +static inline u32 __get_unaligned_le24(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16; +} + +static inline u32 get_unaligned_le24(const void *p) +{ + return __get_unaligned_le24(p); +} + +static inline void __put_unaligned_be24(const u32 val, u8 *p) +{ + *p++ = val >> 16; + *p++ = val >> 8; + *p++ = val; +} + +static inline void put_unaligned_be24(const u32 val, void *p) +{ + __put_unaligned_be24(val, p); +} + +static inline void __put_unaligned_le24(const u32 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; + *p++ = val >> 16; +} + +static inline void put_unaligned_le24(const u32 val, void *p) +{ + __put_unaligned_le24(val, p); +} + #endif /* _LINUX_UNALIGNED_GENERIC_H */ -- cgit v1.2.3 From 9ec4bbcb2044ea1f380c9feceb10654dd5a35a95 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 13 Mar 2020 19:42:39 +0000 Subject: mtd: spi-nor: Add the concept of SPI NOR manufacturer driver Declare a spi_nor_manufacturer struct and add basic building blocks to move manufacturer specific code outside of the core. Signed-off-by: Boris Brezillon Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 2b9717b0cd62..bf37bfc68797 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -554,6 +554,12 @@ struct spi_nor_flash_parameter { */ struct flash_info; +/** + * struct spi_nor_manufacturer - Forward declaration of a structure used + * internally by the core and manufacturer drivers. + */ +struct spi_nor_manufacturer; + /** * struct spi_nor - Structure for defining a the SPI NOR layer * @mtd: point to a mtd_info structure @@ -564,6 +570,7 @@ struct flash_info; * layer is not DMA-able * @bouncebuf_size: size of the bounce buffer * @info: spi-nor part JDEC MFR id and other info + * @manufacturer: spi-nor manufacturer * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector @@ -591,6 +598,7 @@ struct spi_nor { u8 *bouncebuf; size_t bouncebuf_size; const struct flash_info *info; + const struct spi_nor_manufacturer *manufacturer; u32 page_size; u8 addr_width; u8 erase_opcode; -- cgit v1.2.3 From d3c4bb31bf627ede607d7b1827e6be43f1b26be7 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 13 Mar 2020 19:42:52 +0000 Subject: mtd: spi-nor: Drop the MFR definitions Cross manufacturer code is unlikely and discouraged, get rid of the MFR definitions. Suggested-by: Vignesh Raghavendra Signed-off-by: Tudor Ambarus Reviewed-by: Boris Brezillon --- include/linux/mtd/spi-nor.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index bf37bfc68797..2f7725525460 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -11,23 +11,6 @@ #include #include -/* - * Manufacturer IDs - * - * The first byte returned from the flash after sending opcode SPINOR_OP_RDID. - * Sometimes these are the same as CFI IDs, but sometimes they aren't. - */ -#define SNOR_MFR_ATMEL CFI_MFR_ATMEL -#define SNOR_MFR_GIGADEVICE 0xc8 -#define SNOR_MFR_INTEL CFI_MFR_INTEL -#define SNOR_MFR_ST CFI_MFR_ST /* ST Micro */ -#define SNOR_MFR_MICRON CFI_MFR_MICRON /* Micron */ -#define SNOR_MFR_ISSI CFI_MFR_PMC -#define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX -#define SNOR_MFR_SPANSION CFI_MFR_AMD -#define SNOR_MFR_SST CFI_MFR_SST -#define SNOR_MFR_WINBOND 0xef /* Also used by some Spansion */ - /* * Note on opcode nomenclature: some opcodes have a format like * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number -- cgit v1.2.3 From 829ec6408dc58dbf27522bbd57d0a85b0a3d1a0e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 13 Mar 2020 19:42:53 +0000 Subject: mtd: spi-nor: Trim what is exposed in spi-nor.h The SPI NOR controllers drivers must not be able to use structures that are meant just for the SPI NOR core. struct spi_nor_flash_parameter is filled at run-time with info gathered from flash_info, manufacturer and sfdp data. struct spi_nor_flash_parameter should be opaque to the SPI NOR controller drivers, make sure it is. spi_nor_option_flags, spi_nor_read_command, spi_nor_pp_command, spi_nor_read_command_index and spi_nor_pp_command_index are defined for the core use, make sure they are opaque to the SPI NOR controller drivers. Signed-off-by: Tudor Ambarus Reviewed-by: Boris Brezillon Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 260 +------------------------------------------- 1 file changed, 6 insertions(+), 254 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 2f7725525460..e656858b50a5 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -210,110 +210,6 @@ static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto) return spi_nor_get_protocol_data_nbits(proto); } -enum spi_nor_option_flags { - SNOR_F_USE_FSR = BIT(0), - SNOR_F_HAS_SR_TB = BIT(1), - SNOR_F_NO_OP_CHIP_ERASE = BIT(2), - SNOR_F_READY_XSR_RDY = BIT(3), - SNOR_F_USE_CLSR = BIT(4), - SNOR_F_BROKEN_RESET = BIT(5), - SNOR_F_4B_OPCODES = BIT(6), - SNOR_F_HAS_4BAIT = BIT(7), - SNOR_F_HAS_LOCK = BIT(8), - SNOR_F_HAS_16BIT_SR = BIT(9), - SNOR_F_NO_READ_CR = BIT(10), - SNOR_F_HAS_SR_TB_BIT6 = BIT(11), - -}; - -/** - * struct spi_nor_erase_type - Structure to describe a SPI NOR erase type - * @size: the size of the sector/block erased by the erase type. - * JEDEC JESD216B imposes erase sizes to be a power of 2. - * @size_shift: @size is a power of 2, the shift is stored in - * @size_shift. - * @size_mask: the size mask based on @size_shift. - * @opcode: the SPI command op code to erase the sector/block. - * @idx: Erase Type index as sorted in the Basic Flash Parameter - * Table. It will be used to synchronize the supported - * Erase Types with the ones identified in the SFDP - * optional tables. - */ -struct spi_nor_erase_type { - u32 size; - u32 size_shift; - u32 size_mask; - u8 opcode; - u8 idx; -}; - -/** - * struct spi_nor_erase_command - Used for non-uniform erases - * The structure is used to describe a list of erase commands to be executed - * once we validate that the erase can be performed. The elements in the list - * are run-length encoded. - * @list: for inclusion into the list of erase commands. - * @count: how many times the same erase command should be - * consecutively used. - * @size: the size of the sector/block erased by the command. - * @opcode: the SPI command op code to erase the sector/block. - */ -struct spi_nor_erase_command { - struct list_head list; - u32 count; - u32 size; - u8 opcode; -}; - -/** - * struct spi_nor_erase_region - Structure to describe a SPI NOR erase region - * @offset: the offset in the data array of erase region start. - * LSB bits are used as a bitmask encoding flags to - * determine if this region is overlaid, if this region is - * the last in the SPI NOR flash memory and to indicate - * all the supported erase commands inside this region. - * The erase types are sorted in ascending order with the - * smallest Erase Type size being at BIT(0). - * @size: the size of the region in bytes. - */ -struct spi_nor_erase_region { - u64 offset; - u64 size; -}; - -#define SNOR_ERASE_TYPE_MAX 4 -#define SNOR_ERASE_TYPE_MASK GENMASK_ULL(SNOR_ERASE_TYPE_MAX - 1, 0) - -#define SNOR_LAST_REGION BIT(4) -#define SNOR_OVERLAID_REGION BIT(5) - -#define SNOR_ERASE_FLAGS_MAX 6 -#define SNOR_ERASE_FLAGS_MASK GENMASK_ULL(SNOR_ERASE_FLAGS_MAX - 1, 0) - -/** - * struct spi_nor_erase_map - Structure to describe the SPI NOR erase map - * @regions: array of erase regions. The regions are consecutive in - * address space. Walking through the regions is done - * incrementally. - * @uniform_region: a pre-allocated erase region for SPI NOR with a uniform - * sector size (legacy implementation). - * @erase_type: an array of erase types shared by all the regions. - * The erase types are sorted in ascending order, with the - * smallest Erase Type size being the first member in the - * erase_type array. - * @uniform_erase_type: bitmask encoding erase types that can erase the - * entire memory. This member is completed at init by - * uniform and non-uniform SPI NOR flash memories if they - * support at least one erase type that can erase the - * entire memory. - */ -struct spi_nor_erase_map { - struct spi_nor_erase_region *regions; - struct spi_nor_erase_region uniform_region; - struct spi_nor_erase_type erase_type[SNOR_ERASE_TYPE_MAX]; - u8 uniform_erase_type; -}; - /** * struct spi_nor_hwcaps - Structure for describing the hardware capabilies * supported by the SPI controller (bus master). @@ -389,61 +285,7 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_ALL (SNOR_HWCAPS_READ_MASK | \ SNOR_HWCAPS_PP_MASK) -struct spi_nor_read_command { - u8 num_mode_clocks; - u8 num_wait_states; - u8 opcode; - enum spi_nor_protocol proto; -}; - -struct spi_nor_pp_command { - u8 opcode; - enum spi_nor_protocol proto; -}; - -enum spi_nor_read_command_index { - SNOR_CMD_READ, - SNOR_CMD_READ_FAST, - SNOR_CMD_READ_1_1_1_DTR, - - /* Dual SPI */ - SNOR_CMD_READ_1_1_2, - SNOR_CMD_READ_1_2_2, - SNOR_CMD_READ_2_2_2, - SNOR_CMD_READ_1_2_2_DTR, - - /* Quad SPI */ - SNOR_CMD_READ_1_1_4, - SNOR_CMD_READ_1_4_4, - SNOR_CMD_READ_4_4_4, - SNOR_CMD_READ_1_4_4_DTR, - - /* Octal SPI */ - SNOR_CMD_READ_1_1_8, - SNOR_CMD_READ_1_8_8, - SNOR_CMD_READ_8_8_8, - SNOR_CMD_READ_1_8_8_DTR, - - SNOR_CMD_READ_MAX -}; - -enum spi_nor_pp_command_index { - SNOR_CMD_PP, - - /* Quad SPI */ - SNOR_CMD_PP_1_1_4, - SNOR_CMD_PP_1_4_4, - SNOR_CMD_PP_4_4_4, - - /* Octal SPI */ - SNOR_CMD_PP_1_1_8, - SNOR_CMD_PP_1_8_8, - SNOR_CMD_PP_8_8_8, - - SNOR_CMD_PP_MAX -}; - -/* Forward declaration that will be used in 'struct spi_nor_flash_parameter' */ +/* Forward declaration that is used in 'struct spi_nor_controller_ops' */ struct spi_nor; /** @@ -474,74 +316,13 @@ struct spi_nor_controller_ops { int (*erase)(struct spi_nor *nor, loff_t offs); }; -/** - * struct spi_nor_locking_ops - SPI NOR locking methods - * @lock: lock a region of the SPI NOR. - * @unlock: unlock a region of the SPI NOR. - * @is_locked: check if a region of the SPI NOR is completely locked - */ -struct spi_nor_locking_ops { - int (*lock)(struct spi_nor *nor, loff_t ofs, uint64_t len); - int (*unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); - int (*is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); -}; - -/** - * struct spi_nor_flash_parameter - SPI NOR flash parameters and settings. - * Includes legacy flash parameters and settings that can be overwritten - * by the spi_nor_fixups hooks, or dynamically when parsing the JESD216 - * Serial Flash Discoverable Parameters (SFDP) tables. - * - * @size: the flash memory density in bytes. - * @page_size: the page size of the SPI NOR flash memory. - * @hwcaps: describes the read and page program hardware - * capabilities. - * @reads: read capabilities ordered by priority: the higher index - * in the array, the higher priority. - * @page_programs: page program capabilities ordered by priority: the - * higher index in the array, the higher priority. - * @erase_map: the erase map parsed from the SFDP Sector Map Parameter - * Table. - * @quad_enable: enables SPI NOR quad mode. - * @set_4byte_addr_mode: puts the SPI NOR in 4 byte addressing mode. - * @convert_addr: converts an absolute address into something the flash - * will understand. Particularly useful when pagesize is - * not a power-of-2. - * @setup: configures the SPI NOR memory. Useful for SPI NOR - * flashes that have peculiarities to the SPI NOR standard - * e.g. different opcodes, specific address calculation, - * page size, etc. - * @locking_ops: SPI NOR locking methods. - */ -struct spi_nor_flash_parameter { - u64 size; - u32 page_size; - - struct spi_nor_hwcaps hwcaps; - struct spi_nor_read_command reads[SNOR_CMD_READ_MAX]; - struct spi_nor_pp_command page_programs[SNOR_CMD_PP_MAX]; - - struct spi_nor_erase_map erase_map; - - int (*quad_enable)(struct spi_nor *nor); - int (*set_4byte_addr_mode)(struct spi_nor *nor, bool enable); - u32 (*convert_addr)(struct spi_nor *nor, u32 addr); - int (*setup)(struct spi_nor *nor, const struct spi_nor_hwcaps *hwcaps); - - const struct spi_nor_locking_ops *locking_ops; -}; - -/** - * struct flash_info - Forward declaration of a structure used internally by - * spi_nor_scan() +/* + * Forward declarations that are used internally by the core and manufacturer + * drivers. */ struct flash_info; - -/** - * struct spi_nor_manufacturer - Forward declaration of a structure used - * internally by the core and manufacturer drivers. - */ struct spi_nor_manufacturer; +struct spi_nor_flash_parameter; /** * struct spi_nor - Structure for defining a the SPI NOR layer @@ -596,7 +377,7 @@ struct spi_nor { const struct spi_nor_controller_ops *controller_ops; - struct spi_nor_flash_parameter params; + struct spi_nor_flash_parameter *params; struct { struct spi_mem_dirmap_desc *rdesc; @@ -606,35 +387,6 @@ struct spi_nor { void *priv; }; -static u64 __maybe_unused -spi_nor_region_is_last(const struct spi_nor_erase_region *region) -{ - return region->offset & SNOR_LAST_REGION; -} - -static u64 __maybe_unused -spi_nor_region_end(const struct spi_nor_erase_region *region) -{ - return (region->offset & ~SNOR_ERASE_FLAGS_MASK) + region->size; -} - -static void __maybe_unused -spi_nor_region_mark_end(struct spi_nor_erase_region *region) -{ - region->offset |= SNOR_LAST_REGION; -} - -static void __maybe_unused -spi_nor_region_mark_overlay(struct spi_nor_erase_region *region) -{ - region->offset |= SNOR_OVERLAID_REGION; -} - -static bool __maybe_unused spi_nor_has_uniform_erase(const struct spi_nor *nor) -{ - return !!nor->params.erase_map.uniform_erase_type; -} - static inline void spi_nor_set_flash_node(struct spi_nor *nor, struct device_node *np) { -- cgit v1.2.3 From bedb404e91bb2908d9921fc736a518a9d89525fc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Feb 2020 13:40:15 +0200 Subject: serial: 8250_port: Don't use power management for kernel console Doing any kind of power management for kernel console is really bad idea. First of all, it runs in poll and atomic mode. This fact attaches a limitation on the functions that might be called. For example, pm_runtime_get_sync() might sleep and thus can't be used. This call needs, for example, to bring the device to powered on state on the system, where the power on sequence may require on-atomic operations, such as Intel Cherrytrail with ACPI enumerated UARTs. That said, on ACPI enabled platforms it might even call firmware for a job. On the other hand pm_runtime_get() doesn't guarantee that device will become powered on fast enough. Besides that, imagine the case when console is about to print a kernel Oops and it's powered off. In such an emergency case calling the complex functions is not the best what we can do, taking into consideration that user wants to see at least something of the last kernel word before it passes away. Here we modify the 8250 console code to prevent runtime power management. Note, there is a behaviour change for OMAP boards. It will require to detach kernel console to become idle. Link: https://lists.openwall.net/linux-kernel/2018/09/29/65 Suggested-by: Russell King Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200217114016.49856-6-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 0901c2aa366c..6545f8cfc8fa 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -179,6 +179,7 @@ void serial8250_set_defaults(struct uart_8250_port *up); void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count); int serial8250_console_setup(struct uart_port *port, char *options, bool probe); +int serial8250_console_exit(struct uart_port *port); extern void serial8250_set_isa_configurator(void (*v) (int port, struct uart_port *up, -- cgit v1.2.3 From c84716c401d34eed5ed45a9c3553b08b0d541b9a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Mar 2019 14:54:22 +1100 Subject: list/hashtable: minor documentation corrections. hash_for_each_safe() and hash_for_each_possible_safe() need to be passed a temp 'struct hlist_node' pointer, but do not say that in the documentation - they just say a 'struct'. Also the documentation for hlist_for_each_entry_safe() describes @n as "another" hlist_node, but in reality it is the only one. Signed-off-by: NeilBrown Reviewed-by: Mukesh Ojha Signed-off-by: Jiri Kosina --- include/linux/hashtable.h | 4 ++-- include/linux/list.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 417d2c4bc60d..78b6ea5fa8ba 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -145,7 +145,7 @@ static inline void hash_del_rcu(struct hlist_node *node) * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @tmp: a &struct used for temporary storage + * @tmp: a &struct hlist_node used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ @@ -197,7 +197,7 @@ static inline void hash_del_rcu(struct hlist_node *node) * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @tmp: a &struct used for temporary storage + * @tmp: a &struct hlist_node used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ diff --git a/include/linux/list.h b/include/linux/list.h index 884216db3246..aff44d34f4e4 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -989,7 +989,7 @@ static inline void hlist_move_list(struct hlist_head *old, /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop cursor. - * @n: another &struct hlist_node to use as temporary storage + * @n: a &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -- cgit v1.2.3 From b927ddf2dd1807e62946129cfbc7754fdcde85c4 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 6 Mar 2020 07:53:00 +0000 Subject: PNP: constify driver name struct pnp_driver has name set as char* instead of const char* like platform_driver, pci_driver, usb_driver, etc... Let's unify a bit by setting name as const char*. Furthermore, all users of this structures set name from already const data. Signed-off-by: Corentin Labbe Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 3b12fd28af78..b18dca67253d 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -379,7 +379,7 @@ struct pnp_id { }; struct pnp_driver { - char *name; + const char *name; const struct pnp_device_id *id_table; unsigned int flags; int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id); -- cgit v1.2.3 From 80ebc420ec59a7c9bcf37881b7d80f68c937ed46 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Wed, 18 Sep 2019 15:28:33 +0800 Subject: genirq: fix kerneldoc comment for irq_desc commit 0c6f8a8b917a ("genirq: Remove compat code") deleted the @status member of irq_desc, but forgot to update the comment. Signed-off-by: Yunfeng Ye Signed-off-by: Jiri Kosina --- include/linux/irqdesc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index d6e2ab538ef2..8f2820c5e69e 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -24,7 +24,7 @@ struct pt_regs; * @handle_irq: highlevel irq-events handler * @preflow_handler: handler called before the flow handler (currently used by sparc) * @action: the irq action chain - * @status: status information + * @status_use_accessors: status information * @core_internal_state__do_not_mess_with_it: core internal status information * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers -- cgit v1.2.3 From a0e374525def2ef18a078523e1faefb5ce2b05e5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Jan 2020 12:06:18 -0800 Subject: libnvdimm/region: Introduce NDD_LABELING The NDD_ALIASING flag is used to indicate where pmem capacity might alias with blk capacity and require labeling. It is also used to indicate whether the DIMM supports labeling. Separate this latter capability into its own flag so that the NDD_ALIASING flag is scoped to true aliased configurations. To my knowledge aliased configurations only exist in the ACPI spec, there are no known platforms that ship this support in production. This clarity allows namespace-capacity alignment constraints around interleave-ways to be relaxed. Cc: Vishal Verma Cc: Oliver O'Halloran Reviewed-by: Jeff Moyer Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/158041477856.3889308.4212605617834097674.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 9df091bd30ba..18da4059be09 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -37,6 +37,8 @@ enum { NDD_WORK_PENDING = 4, /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ NDD_NOBLK = 5, + /* dimm supports namespace labels */ + NDD_LABELING = 6, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit v1.2.3 From 1ac7072ca3d4808acc56bf0fd27bee7bdff9e92f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 24 Oct 2019 17:19:27 +0200 Subject: mfd: wm8994: Fix comment spelling Fix misspellings of "configuration". Signed-off-by: Geert Uytterhoeven Signed-off-by: Jiri Kosina --- include/linux/mfd/wm8994/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index 81e7dcbd94df..6e2962ef5b81 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -33,7 +33,7 @@ struct wm8994_ldo_pdata { * DRC configurations are specified with a label and a set of register * values to write (the enable bits will be ignored). At runtime an * enumerated control will be presented for each DRC block allowing - * the user to choose the configration to use. + * the user to choose the configuration to use. * * Configurations may be generated by hand or by using the DRC control * panel provided by the WISCE - see http://www.wolfsonmicro.com/wisce/ -- cgit v1.2.3 From 8673e02e58410e6c4cefa499efa846286e45a991 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 2 Mar 2020 18:17:52 +0000 Subject: arm64: perf: Add support for ARMv8.5-PMU 64-bit counters At present ARMv8 event counters are limited to 32-bits, though by using the CHAIN event it's possible to combine adjacent counters to achieve 64-bits. The perf config1:0 bit can be set to use such a configuration. With the introduction of ARMv8.5-PMU support, all event counters can now be used as 64-bit counters. Let's enable 64-bit event counters where support exists. Unless the user sets config1:0 we will adjust the counter value such that it overflows upon 32-bit overflow. This follows the same behaviour as the cycle counter which has always been (and remains) 64-bits. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose [Mark: fix ID field names, compare with 8.5 value] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 71f525a35ac2..5b616dde9a4c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -80,6 +80,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; + int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); -- cgit v1.2.3 From b030f194aed290705426c62e501201c0739405c5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:01 +0100 Subject: netfilter: Rename ingress hook include file Prepare for addition of a netfilter egress hook by renaming to . The egress hook also necessitates a refactoring of the include file, but that is done in a separate commit to ease reviewing. No functional change intended. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 58 --------------------------------------- include/linux/netfilter_netdev.h | 58 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 58 deletions(-) delete mode 100644 include/linux/netfilter_ingress.h create mode 100644 include/linux/netfilter_netdev.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h deleted file mode 100644 index a13774be2eb5..000000000000 --- a/include/linux/netfilter_ingress.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ - -#include -#include - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) - return false; -#endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); -} - -/* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); - struct nf_hook_state state; - int ret; - - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, NF_NETDEV_INGRESS, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) -{ - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} - -static inline void nf_hook_ingress_init(struct net_device *dev) {} -#endif /* CONFIG_NETFILTER_INGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h new file mode 100644 index 000000000000..a13774be2eb5 --- /dev/null +++ b/include/linux/netfilter_netdev.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 5418d3881e1f5d2cf9c1076eb8bd85770393a0e8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:02 +0100 Subject: netfilter: Generalize ingress hook Prepare for addition of a netfilter egress hook by generalizing the ingress hook introduced by commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key"). In particular, rename and refactor the ingress hook's static inlines such that they can be reused for an egress hook. No functional change intended. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_netdev.h | 45 +++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index a13774be2eb5..49e26479642e 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -1,34 +1,37 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_INGRESS_H_ -#define _NETFILTER_INGRESS_H_ +#ifndef _NETFILTER_NETDEV_H_ +#define _NETFILTER_NETDEV_H_ #include #include -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +#ifdef CONFIG_NETFILTER +static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum, + struct nf_hook_entries __rcu *hooks) { #ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum])) return false; #endif - return rcu_access_pointer(skb->dev->nf_hooks_ingress); + return rcu_access_pointer(hooks); } /* caller must hold rcu_read_lock */ -static inline int nf_hook_ingress(struct sk_buff *skb) +static __always_inline int nf_hook_netdev(struct sk_buff *skb, + enum nf_dev_hooks hooknum, + struct nf_hook_entries __rcu *hooks) { - struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_entries *e = rcu_dereference(hooks); struct nf_hook_state state; int ret; - /* Must recheck the ingress hook head, in the event it became NULL - * after the check in nf_hook_ingress_active evaluated to true. + /* Must recheck the hook head, in the event it became NULL + * after the check in nf_hook_netdev_active evaluated to true. */ if (unlikely(!e)) return 0; - nf_hook_state_init(&state, NF_NETDEV_INGRESS, + nf_hook_state_init(&state, hooknum, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); ret = nf_hook_slow(skb, &state, e, 0); @@ -37,10 +40,26 @@ static inline int nf_hook_ingress(struct sk_buff *skb) return ret; } +#endif /* CONFIG_NETFILTER */ -static inline void nf_hook_ingress_init(struct net_device *dev) +static inline void nf_hook_netdev_init(struct net_device *dev) { +#ifdef CONFIG_NETFILTER_INGRESS RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +#endif +} + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ + return nf_hook_netdev_active(NF_NETDEV_INGRESS, + skb->dev->nf_hooks_ingress); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return nf_hook_netdev(skb, NF_NETDEV_INGRESS, + skb->dev->nf_hooks_ingress); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) @@ -52,7 +71,5 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { return 0; } - -static inline void nf_hook_ingress_init(struct net_device *dev) {} #endif /* CONFIG_NETFILTER_INGRESS */ #endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 8537f78647c072bdb1a5dbe32e1c7e5b13ff1258 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 11 Mar 2020 12:59:03 +0100 Subject: netfilter: Introduce egress hook Commit e687ad60af09 ("netfilter: add netfilter ingress hook after handle_ing() under unique static key") introduced the ability to classify packets on ingress. Allow the same on egress. Position the hook immediately before a packet is handed to tc and then sent out on an interface, thereby mirroring the ingress order. This order allows marking packets in the netfilter egress hook and subsequently using the mark in tc. Another benefit of this order is consistency with a lot of existing documentation which says that egress tc is performed after netfilter hooks. Egress hooks already exist for the most common protocols, such as NF_INET_LOCAL_OUT or NF_ARP_OUT, and those are to be preferred because they are executed earlier during packet processing. However for more exotic protocols, there is currently no provision to apply netfilter on egress. A common workaround is to enslave the interface to a bridge and use ebtables, or to resort to tc. But when the ingress hook was introduced, consensus was that users should be given the choice to use netfilter or tc, whichever tool suits their needs best: https://lore.kernel.org/netdev/20150430153317.GA3230@salvia/ This hook is also useful for NAT46/NAT64, tunneling and filtering of locally generated af_packet traffic such as dhclient. There have also been occasional user requests for a netfilter egress hook in the past, e.g.: https://www.spinics.net/lists/netfilter/msg50038.html Performance measurements with pktgen surprisingly show a speedup rather than a slowdown with this commit: * Without this commit: Result: OK: 34240933(c34238375+d2558) usec, 100000000 (60byte,0frags) 2920481pps 1401Mb/sec (1401830880bps) errors: 0 * With this commit: Result: OK: 33997299(c33994193+d3106) usec, 100000000 (60byte,0frags) 2941410pps 1411Mb/sec (1411876800bps) errors: 0 * Without this commit + tc egress: Result: OK: 39022386(c39019547+d2839) usec, 100000000 (60byte,0frags) 2562631pps 1230Mb/sec (1230062880bps) errors: 0 * With this commit + tc egress: Result: OK: 37604447(c37601877+d2570) usec, 100000000 (60byte,0frags) 2659259pps 1276Mb/sec (1276444320bps) errors: 0 * With this commit + nft egress: Result: OK: 41436689(c41434088+d2600) usec, 100000000 (60byte,0frags) 2413320pps 1158Mb/sec (1158393600bps) errors: 0 Tested on a bare-metal Core i7-3615QM, each measurement was performed three times to verify that the numbers are stable. Commands to perform a measurement: modprobe pktgen echo "add_device lo@3" > /proc/net/pktgen/kpktgend_3 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i 'lo@3' -n 100000000 Commands for testing tc egress: tc qdisc add dev lo clsact tc filter add dev lo egress protocol ip prio 1 u32 match ip dst 4.3.2.1/32 Commands for testing nft egress: nft add table netdev t nft add chain netdev t co \{ type filter hook egress device lo priority 0 \; \} nft add rule netdev t co ip daddr 4.3.2.1/32 drop All testing was performed on the loopback interface to avoid distorting measurements by the packet handling in the low-level Ethernet driver. Signed-off-by: Lukas Wunner Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 4 ++++ include/linux/netfilter_netdev.h | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..15f1e32b430c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,6 +1751,7 @@ enum netdev_priv_flags { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2026,6 +2027,9 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 49e26479642e..92d3611a782e 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -47,6 +47,9 @@ static inline void nf_hook_netdev_init(struct net_device *dev) #ifdef CONFIG_NETFILTER_INGRESS RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); #endif +#ifdef CONFIG_NETFILTER_EGRESS + RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); +#endif } #ifdef CONFIG_NETFILTER_INGRESS @@ -72,4 +75,28 @@ static inline int nf_hook_ingress(struct sk_buff *skb) return 0; } #endif /* CONFIG_NETFILTER_INGRESS */ + +#ifdef CONFIG_NETFILTER_EGRESS +static inline bool nf_hook_egress_active(const struct sk_buff *skb) +{ + return nf_hook_netdev_active(NF_NETDEV_EGRESS, + skb->dev->nf_hooks_egress); +} + +static inline int nf_hook_egress(struct sk_buff *skb) +{ + return nf_hook_netdev(skb, NF_NETDEV_EGRESS, + skb->dev->nf_hooks_egress); +} +#else /* CONFIG_NETFILTER_EGRESS */ +static inline int nf_hook_egress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_egress(struct sk_buff *skb) +{ + return 0; +} +#endif /* CONFIG_NETFILTER_EGRESS */ #endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 613f53fe09a27f928a7d05132e1a74b5136e8f04 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 24 Feb 2020 16:59:54 +0200 Subject: net/mlx5: Eswitch, enable forwarding back to uplink port Add dependencny on cap termination_table_raw_traffic to allow non encapsulated packets received from uplink to be forwarded back to the received uplink port. Refactor the conditions into a separate function. Signed-off-by: Eli Cohen Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2bd920965bd3..cc55cee3b53c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; u8 reserved_at_1a[0x6]; - u8 reserved_at_20[0x2]; + u8 termination_table_raw_traffic[0x1]; + u8 reserved_at_21[0x1]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; u8 max_modify_header_actions[0x8]; -- cgit v1.2.3 From 9000edb71ab29d184aa33f5a77fa6e52d8812bb9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Mar 2020 13:47:12 -0700 Subject: net: ethtool: require drivers to set supported_coalesce_params Now that all in-tree drivers have been updated we can make the supported_coalesce_params mandatory. To save debugging time in case some driver was missed (or is out of tree) add a warning when netdev is registered with set_coalesce but without supported_coalesce_params. Signed-off-by: Jakub Kicinski Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index be355f37337d..c1d379bf6ee1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -458,6 +458,8 @@ struct ethtool_ops { struct ethtool_stats *, u64 *); }; +int ethtool_check_ops(const struct ethtool_ops *ops); + struct ethtool_rx_flow_rule { struct flow_rule *rule; unsigned long priv[0]; -- cgit v1.2.3 From 9010f9deb000edce823cb79345f137742ccffa19 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Mar 2020 22:32:33 +0100 Subject: net: phy: improve phy_driver callback handle_interrupt did_interrupt() clears the interrupt, therefore handle_interrupt() can not check which event triggered the interrupt. To overcome this constraint and allow more flexibility for customer interrupt handlers, let's decouple handle_interrupt() from parts of the phylib interrupt handling. Custom interrupt handlers now have to implement the did_interrupt() functionality in handle_interrupt() if needed. Fortunately we have just one custom interrupt handler so far (in the mscc PHY driver), convert it to the changed API. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b872aed8ba6..cb5a2182ba6d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -568,7 +569,7 @@ struct phy_driver { int (*did_interrupt)(struct phy_device *phydev); /* Override default interrupt handling */ - int (*handle_interrupt)(struct phy_device *phydev); + irqreturn_t (*handle_interrupt)(struct phy_device *phydev); /* Clears up any memory if needed */ void (*remove)(struct phy_device *phydev); -- cgit v1.2.3 From 6cc7cf8125b3d086cd80c96e02edb6f4ab9b20fa Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:31 +0000 Subject: net: mdiobus: add APIs for modifying a MDIO device register Add APIs for modifying a MDIO device register, similar to the existing phy_modify() group of functions, but at mdiobus level instead. Adapt __phy_modify_changed() to use the new mdiobus level helper. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 4 ++++ include/linux/phy.h | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index a7604248777b..917e4bb2ed71 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -316,11 +316,15 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, + u16 mask, u16 set); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, + u16 set); int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); diff --git a/include/linux/phy.h b/include/linux/phy.h index cb5a2182ba6d..36d9dea04016 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -754,6 +754,25 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) val); } +/** + * __phy_modify_changed() - Convenience function for modifying a PHY register + * @phydev: a pointer to a &struct phy_device + * @regnum: register number + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Unlocked helper function which allows a PHY register to be modified as + * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, + u16 mask, u16 set) +{ + return __mdiobus_modify_changed(phydev->mdio.bus, phydev->mdio.addr, + regnum, mask, set); +} + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From 74db1c18d80aaacdc42e6d00d7aca2bb5c97c7ea Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:36 +0000 Subject: net: phylink: pcs: add 802.3 clause 22 helpers Implement helpers for PCS accessed via the MII bus using 802.3 clause 22 cycles, conforming to 802.3 clause 37 and Cisco SGMII specifications for the advertisement word. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2180eb1aa254..de591c2fb37e 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -317,4 +317,10 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); void phylink_set_port_modes(unsigned long *bits); void phylink_helper_basex_speed(struct phylink_link_state *state); +void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); +int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, + const struct phylink_link_state *state); +void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); + #endif -- cgit v1.2.3 From b8679ef8bedfe2bae90c97bc4c8a1826cfd98bba Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Mar 2020 14:52:41 +0000 Subject: net: phylink: pcs: add 802.3 clause 45 helpers Implement helpers for PCS accessed via the MII bus using 802.3 clause 45 cycles for 10GBASE-R. Only link up/down is supported, 10G full duplex is assumed. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index de591c2fb37e..8fa6df3b881b 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -323,4 +323,6 @@ int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, const struct phylink_link_state *state); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); +void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, + struct phylink_link_state *state); #endif -- cgit v1.2.3 From 28321582334c261c13b20d7efe634e610b4c100b Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 13 Mar 2020 14:34:57 +0530 Subject: arm64: initialize ptrauth keys for kernel booting task This patch uses the existing boot_init_stack_canary arch function to initialize the ptrauth keys for the booting task in the primary core. The requirement here is that it should be always inline and the caller must never return. As pointer authentication too detects a subset of stack corruption so it makes sense to place this code here. Both pointer authentication and stack canary codes are protected by their respective config option. Suggested-by: Ard Biesheuvel Signed-off-by: Amit Daniel Kachhap Reviewed-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- include/linux/stackprotector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 6b792d080eee..4c678c4fec58 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -6,7 +6,7 @@ #include #include -#ifdef CONFIG_STACKPROTECTOR +#if defined(CONFIG_STACKPROTECTOR) || defined(CONFIG_ARM64_PTR_AUTH) # include #else static inline void boot_init_stack_canary(void) -- cgit v1.2.3 From 3f9d51333129e16d77dcc9414bd548151d884c8a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 29 Feb 2020 12:50:46 +0100 Subject: watchdog: wm831x_wdt: Remove GPIO handling An attempt to convert the driver to using GPIO descriptors (see Link tag) was discouraged in favor of deleting the handling of the update GPIO altogehter since there are no in-tree users. This patch deletes the GPIO handling instead. Cc: Richard Fitzgerald Cc: Charles Keepax Cc: Mark Brown Link: https://lore.kernel.org/linux-watchdog/20200210102209.289379-1-linus.walleij@linaro.org/ Signed-off-by: Linus Walleij Reviewed-by: Guenter Roeck Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200229115046.57781-1-linus.walleij@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/mfd/wm831x/pdata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h index 986986fe4e4e..75aa94dadf1c 100644 --- a/include/linux/mfd/wm831x/pdata.h +++ b/include/linux/mfd/wm831x/pdata.h @@ -89,7 +89,6 @@ enum wm831x_watchdog_action { struct wm831x_watchdog_pdata { enum wm831x_watchdog_action primary, secondary; - int update_gpio; unsigned int software:1; }; -- cgit v1.2.3 From 6ce6ae7c178b95f83ca0e15bd2ac961425a3af5c Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 11 Mar 2020 15:16:53 +0800 Subject: misc: cleanup minor number definitions in c file into miscdevice.h HWRNG_MINOR and RNG_MISCDEV_MINOR are duplicate definitions, use unified HWRNG_MINOR instead and moved into miscdevice.h ANSLCD_MINOR and LCD_MINOR are duplicate definitions, use unified LCD_MINOR instead and moved into miscdevice.h MISCDEV_MINOR is renamed to PXA3XX_GCU_MINOR and moved into miscdevice.h Other definitions are just moved without any change. Link: https://lore.kernel.org/lkml/20200120221323.GJ15860@mit.edu/t/ Suggested-by: Arnd Bergmann Build-tested-by: Willy TARREAU Build-tested-by: Miguel Ojeda Signed-off-by: Zhenzhong Duan Acked-by: Miguel Ojeda Acked-by: Arnd Bergmann Acked-by: Herbert Xu Link: https://lore.kernel.org/r/20200311071654.335-2-zhenzhong.duan@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index becde6981a95..42360fcd7342 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,14 +31,23 @@ #define DMAPI_MINOR 140 /* unused */ #define NVRAM_MINOR 144 #define SGI_MMTIMER 153 +#define PMU_MINOR 154 #define STORE_QUEUE_MINOR 155 /* unused */ +#define LCD_MINOR 156 +#define AC_MINOR 157 +#define BUTTON_MINOR 158 /* Major 10, Minor 158, /dev/nwbutton */ +#define ENVCTRL_MINOR 162 #define I2O_MINOR 166 +#define UCTRL_MINOR 174 #define AGPGART_MINOR 175 +#define TOSH_MINOR_DEV 181 #define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 +#define KEYPAD_MINOR 185 #define IRNET_MINOR 187 #define D7S_MINOR 193 #define VFIO_MINOR 196 +#define PXA3XX_GCU_MINOR 197 #define TUN_MINOR 200 #define CUSE_MINOR 203 #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ @@ -49,6 +58,7 @@ #define MISC_MCELOG_MINOR 227 #define HPET_MINOR 228 #define FUSE_MINOR 229 +#define SNAPSHOT_MINOR 231 #define KVM_MINOR 232 #define BTRFS_MINOR 234 #define AUTOFS_MINOR 235 -- cgit v1.2.3 From 2668dba6df53584fb147d656c45a600d9e723dcb Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 11 Mar 2020 15:16:54 +0800 Subject: misc: move FLASH_MINOR into miscdevice.h and fix conflicts FLASH_MINOR is used in both drivers/char/nwflash.c and drivers/sbus/char/flash.c with conflict minor numbers. Move all the definitions of FLASH_MINOR into miscdevice.h. Rename FLASH_MINOR for drivers/char/nwflash.c to NWFLASH_MINOR and FLASH_MINOR for drivers/sbus/char/flash.c to SBUS_FLASH_MINOR. Link: https://lore.kernel.org/lkml/20200120221323.GJ15860@mit.edu/t/ Suggested-by: Arnd Bergmann Signed-off-by: Zhenzhong Duan Acked-by: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Russell King Cc: "David S. Miller" Link: https://lore.kernel.org/r/20200311071654.335-3-zhenzhong.duan@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 42360fcd7342..66cc45e0624b 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -30,12 +30,14 @@ #define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* unused */ #define NVRAM_MINOR 144 +#define SBUS_FLASH_MINOR 152 #define SGI_MMTIMER 153 #define PMU_MINOR 154 #define STORE_QUEUE_MINOR 155 /* unused */ #define LCD_MINOR 156 #define AC_MINOR 157 #define BUTTON_MINOR 158 /* Major 10, Minor 158, /dev/nwbutton */ +#define NWFLASH_MINOR 160 /* MAJOR is 10 - miscdevice */ #define ENVCTRL_MINOR 162 #define I2O_MINOR 166 #define UCTRL_MINOR 174 -- cgit v1.2.3 From 05d67ec3ca59627f2c1dd62538a345c4a9cdff44 Mon Sep 17 00:00:00 2001 From: Qiang Su Date: Fri, 6 Mar 2020 15:03:59 +0800 Subject: UIO: fix up inapposite whiteplace in uio head file Whitespace is used in the inapposite place, which makes checkpatch complain. Signed-off-by: Qiang Su Link: https://lore.kernel.org/r/20200306070359.71398-1-suqiang4@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 01081c4726c0..461db05819f4 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -24,10 +24,10 @@ struct uio_map; * struct uio_mem - description of a UIO memory region * @name: name of the memory region for identification * @addr: address of the device's memory rounded to page - * size (phys_addr is used since addr can be - * logical, virtual, or physical & phys_addr_t - * should always be large enough to handle any of - * the address types) + * size (phys_addr is used since addr can be + * logical, virtual, or physical & phys_addr_t + * should always be large enough to handle any of + * the address types) * @offs: offset of device memory within the page * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to @@ -67,16 +67,16 @@ struct uio_port { #define MAX_UIO_PORT_REGIONS 5 struct uio_device { - struct module *owner; + struct module *owner; struct device dev; - int minor; - atomic_t event; - struct fasync_struct *async_queue; - wait_queue_head_t wait; - struct uio_info *info; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + struct uio_info *info; struct mutex info_lock; - struct kobject *map_dir; - struct kobject *portio_dir; + struct kobject *map_dir; + struct kobject *portio_dir; }; /** -- cgit v1.2.3 From 86a78b1cfc78a6378c4ff3b30f822899c066dca5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 6 Mar 2020 18:18:52 +0200 Subject: uio: add resource managed devm_uio_register_device() function This change adds a resource managed equivalent of uio_register_device(). Not adding devm_uio_unregister_device(), since the intent is to discourage it's usage. Having such a function may allow some bad driver designs. Most users of devm_*register*() functions rarely use the unregister equivalents. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20200306161853.25368-1-alexandru.ardelean@analog.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 461db05819f4..54bf6b118401 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -123,6 +123,15 @@ extern int __must_check extern void uio_unregister_device(struct uio_info *info); extern void uio_event_notify(struct uio_info *info); +extern int __must_check + __devm_uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info); + +/* use a define to avoid include chaining to get THIS_MODULE */ +#define devm_uio_register_device(parent, info) \ + __devm_uio_register_device(THIS_MODULE, parent, info) + /* defines for uio_info->irq */ #define UIO_IRQ_CUSTOM -1 #define UIO_IRQ_NONE 0 -- cgit v1.2.3 From 526ee72dfdf74f4c14cbe165d68736f86bff6cf2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Mar 2020 17:36:40 +0100 Subject: debugfs: remove return value of debugfs_create_file_size() No one checks the return value of debugfs_create_file_size, as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200309163640.237984-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 43efcc49f061..d672b7db0cfc 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -67,10 +67,10 @@ struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -struct dentry *debugfs_create_file_size(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops, - loff_t file_size); +void debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size); struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); @@ -181,13 +181,11 @@ static inline struct dentry *debugfs_create_file_unsafe(const char *name, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_file_size(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops, - loff_t file_size) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size) +{ } static inline struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) -- cgit v1.2.3 From 2644f912b41012c1ce5ff9be99efeec721491b86 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 10 Feb 2020 11:15:46 +0100 Subject: backlight: pwm_bl: Switch to full GPIO descriptor The PWM backlight still supports passing a enable GPIO line as platform data using the legacy API. It turns out that ever board using this mechanism except one is pass .enable_gpio = -1. So we drop all these cargo-culted -1's from all instances of this platform data in the kernel. The remaning board, Palm TC, is converted to pass a machine descriptior table with the "enable" GPIO instead, and delete the platform data entry for enable_gpio and the code handling it and things should work smoothly with the new API. Signed-off-by: Linus Walleij Acked-by: Robert Jarzmik Acked-by: Krzysztof Kozlowski Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones --- include/linux/pwm_backlight.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 8ea265a022fd..06086cb93b6f 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -16,8 +16,6 @@ struct platform_pwm_backlight_data { unsigned int *levels; unsigned int post_pwm_on_delay; unsigned int pwm_off_delay; - /* TODO remove once all users are switched to gpiod_* API */ - int enable_gpio; int (*init)(struct device *dev); int (*notify)(struct device *dev, int brightness); void (*notify_after)(struct device *dev, int brightness); -- cgit v1.2.3 From e598a72faeb543599bdf0d930df3a71906404e6f Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 13 Mar 2020 05:30:05 +0000 Subject: block/genhd: Notify udev about capacity change Allow block/genhd to notify user space (via udev) about disk size changes using a new helper set_capacity_revalidate_and_notify(), which is a wrapper on top of set_capacity(). set_capacity_revalidate_and_notify() will only notify via udev if the current capacity or the target capacity is not zero and iff the capacity changes. Suggested-by: Christoph Hellwig Signed-off-by: Someswarudu Sangaraju Signed-off-by: Balbir Singh Reviewed-by: Bob Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e7244fb6b6ad..3c1a816785c8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -508,6 +508,8 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); +extern void set_capacity_revalidate_and_notify(struct gendisk *disk, + sector_t size, bool revalidate); extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ -- cgit v1.2.3 From bd3ebed9304acd2ccddde44675fedf963dbfdc71 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2020 15:54:26 +0100 Subject: regulator: driver.h: fix regulator_map_* function names The toolchain produces a warning on this driver when building the docs: ./include/linux/regulator/driver.h:284: WARNING: Unknown target name: "regulator_regmap_x_voltage". While fixing it, we notices that there's no function names with the above pattern. It seems that some previous patch renamed it to regulator_map_* instead. So, change the function name, replacing "x" by "*", with is a more used way to add a wildcard, and escape those with ``literal`` markup, in order to avoid the toolchain to think that this is a link to some existing document chapter. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/b9f5687bcf981a88c9d1fd04d759a540fda53a99.1584456635.git.mchehab+huawei@kernel.org Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 9a911bb5fb61..29d920516e0b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -277,9 +277,9 @@ enum regulator_type { * @curr_table: Current limit mapping table (if table based mapping) * * @vsel_range_reg: Register for range selector when using pickable ranges - * and regulator_regmap_X_voltage_X_pickable functions. + * and ``regulator_map_*_voltage_*_pickable`` functions. * @vsel_range_mask: Mask for register bitfield used for range selector - * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_ + * @vsel_reg: Register for selector when using ``regulator_map_*_voltage_*`` * @vsel_mask: Mask for register bitfield used for selector * @vsel_step: Specify the resolution of selector stepping when setting * voltage. If 0, then no stepping is done (requested selector is -- cgit v1.2.3 From 357b6cc5834eabc1be7c28a9faae7da061df097d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Mar 2020 10:33:22 +0100 Subject: netfilter: revert introduction of egress hook This reverts the following commits: 8537f78647c0 ("netfilter: Introduce egress hook") 5418d3881e1f ("netfilter: Generalize ingress hook") b030f194aed2 ("netfilter: Rename ingress hook include file") >From the discussion in [0], the author's main motivation to add a hook in fast path is for an out of tree kernel module, which is a red flag to begin with. Other mentioned potential use cases like NAT{64,46} is on future extensions w/o concrete code in the tree yet. Revert as suggested [1] given the weak justification to add more hooks to critical fast-path. [0] https://lore.kernel.org/netdev/cover.1583927267.git.lukas@wunner.de/ [1] https://lore.kernel.org/netdev/20200318.011152.72770718915606186.davem@davemloft.net/ Signed-off-by: Daniel Borkmann Cc: David Miller Cc: Pablo Neira Ayuso Cc: Alexei Starovoitov Nacked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 -- include/linux/netfilter_ingress.h | 58 ++++++++++++++++++++++ include/linux/netfilter_netdev.h | 102 -------------------------------------- 3 files changed, 58 insertions(+), 106 deletions(-) create mode 100644 include/linux/netfilter_ingress.h delete mode 100644 include/linux/netfilter_netdev.h (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 15f1e32b430c..654808bfad83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,7 +1751,6 @@ enum netdev_priv_flags { * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing - * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) @@ -2027,9 +2026,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; #endif -#ifdef CONFIG_NETFILTER_EGRESS - struct nf_hook_entries __rcu *nf_hooks_egress; -#endif #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..a13774be2eb5 --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) +{ +#ifdef CONFIG_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return rcu_access_pointer(skb->dev->nf_hooks_ingress); +} + +/* caller must hold rcu_read_lock */ +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_state state; + int ret; + + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, NF_NETDEV_INGRESS, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + ret = nf_hook_slow(skb, &state, e, 0); + if (ret == 0) + return -1; + + return ret; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h deleted file mode 100644 index 92d3611a782e..000000000000 --- a/include/linux/netfilter_netdev.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NETFILTER_NETDEV_H_ -#define _NETFILTER_NETDEV_H_ - -#include -#include - -#ifdef CONFIG_NETFILTER -static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum, - struct nf_hook_entries __rcu *hooks) -{ -#ifdef CONFIG_JUMP_LABEL - if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum])) - return false; -#endif - return rcu_access_pointer(hooks); -} - -/* caller must hold rcu_read_lock */ -static __always_inline int nf_hook_netdev(struct sk_buff *skb, - enum nf_dev_hooks hooknum, - struct nf_hook_entries __rcu *hooks) -{ - struct nf_hook_entries *e = rcu_dereference(hooks); - struct nf_hook_state state; - int ret; - - /* Must recheck the hook head, in the event it became NULL - * after the check in nf_hook_netdev_active evaluated to true. - */ - if (unlikely(!e)) - return 0; - - nf_hook_state_init(&state, hooknum, - NFPROTO_NETDEV, skb->dev, NULL, NULL, - dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e, 0); - if (ret == 0) - return -1; - - return ret; -} -#endif /* CONFIG_NETFILTER */ - -static inline void nf_hook_netdev_init(struct net_device *dev) -{ -#ifdef CONFIG_NETFILTER_INGRESS - RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); -#endif -#ifdef CONFIG_NETFILTER_EGRESS - RCU_INIT_POINTER(dev->nf_hooks_egress, NULL); -#endif -} - -#ifdef CONFIG_NETFILTER_INGRESS -static inline bool nf_hook_ingress_active(const struct sk_buff *skb) -{ - return nf_hook_netdev_active(NF_NETDEV_INGRESS, - skb->dev->nf_hooks_ingress); -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return nf_hook_netdev(skb, NF_NETDEV_INGRESS, - skb->dev->nf_hooks_ingress); -} -#else /* CONFIG_NETFILTER_INGRESS */ -static inline int nf_hook_ingress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_ingress(struct sk_buff *skb) -{ - return 0; -} -#endif /* CONFIG_NETFILTER_INGRESS */ - -#ifdef CONFIG_NETFILTER_EGRESS -static inline bool nf_hook_egress_active(const struct sk_buff *skb) -{ - return nf_hook_netdev_active(NF_NETDEV_EGRESS, - skb->dev->nf_hooks_egress); -} - -static inline int nf_hook_egress(struct sk_buff *skb) -{ - return nf_hook_netdev(skb, NF_NETDEV_EGRESS, - skb->dev->nf_hooks_egress); -} -#else /* CONFIG_NETFILTER_EGRESS */ -static inline int nf_hook_egress_active(struct sk_buff *skb) -{ - return 0; -} - -static inline int nf_hook_egress(struct sk_buff *skb) -{ - return 0; -} -#endif /* CONFIG_NETFILTER_EGRESS */ -#endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From 39f16c1c0f14e9794545dbf6a64c909d5e16a2ea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Feb 2020 18:39:17 -0500 Subject: x86: get rid of put_user_try in {ia32,x32}_setup_rt_frame() Straightforward, except for compat_save_altstack_ex() stuck in those. Replace that thing with an analogue that would use unsafe_put_user() instead of put_user_ex() (called unsafe_compat_save_altstack()) and be done with that... Signed-off-by: Al Viro --- include/linux/compat.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 11083d84eb23..224ecb4fffd4 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -483,12 +483,13 @@ extern void __user *compat_alloc_user_space(unsigned long len); int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); -#define compat_save_altstack_ex(uss, sp) do { \ +#define unsafe_compat_save_altstack(uss, sp, label) do { \ compat_stack_t __user *__uss = uss; \ struct task_struct *t = current; \ - put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ - put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + unsafe_put_user(ptr_to_compat((void __user *)t->sas_ss_sp), \ + &__uss->ss_sp, label); \ + unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ + unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ if (t->sas_ss_flags & SS_AUTODISARM) \ sas_ss_reset(t); \ } while (0); -- cgit v1.2.3 From b52cc1bb952f23d0d05615ef1a390a19f6b6b5fd Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 17 Feb 2020 19:47:28 +0900 Subject: extcon: Remove unneeded extern keyword from extcon-provider.h The commit tb7365587f513 ("extcon: Remove unneeded extern keyword from extcon.h") removes the unneeded extern keyword from extcon header file. But, The commit tb7365587f513 has missed that deletes 'extern' keyword from extcon-provider.h. So that it deletes extern keyword from extcon-provider.h. Signed-off-by: Chanwoo Choi Link: https://lore.kernel.org/r/20200217104728.29330-1-cw00.choi@samsung.com Signed-off-by: Greg Kroah-Hartman --- include/linux/extcon-provider.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon-provider.h b/include/linux/extcon-provider.h index 1c143d200caa..fa70945f4e6b 100644 --- a/include/linux/extcon-provider.h +++ b/include/linux/extcon-provider.h @@ -17,30 +17,30 @@ struct extcon_dev; #if IS_ENABLED(CONFIG_EXTCON) /* Following APIs register/unregister the extcon device. */ -extern int extcon_dev_register(struct extcon_dev *edev); -extern void extcon_dev_unregister(struct extcon_dev *edev); -extern int devm_extcon_dev_register(struct device *dev, +int extcon_dev_register(struct extcon_dev *edev); +void extcon_dev_unregister(struct extcon_dev *edev); +int devm_extcon_dev_register(struct device *dev, struct extcon_dev *edev); -extern void devm_extcon_dev_unregister(struct device *dev, +void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev); /* Following APIs allocate/free the memory of the extcon device. */ -extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); -extern void extcon_dev_free(struct extcon_dev *edev); -extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, +struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); +void extcon_dev_free(struct extcon_dev *edev); +struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, const unsigned int *cable); -extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); +void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* Synchronize the state and property value for each external connector. */ -extern int extcon_sync(struct extcon_dev *edev, unsigned int id); +int extcon_sync(struct extcon_dev *edev, unsigned int id); /* * Following APIs set the connected state of each external connector. * The 'id' argument indicates the defined external connector. */ -extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, +int extcon_set_state(struct extcon_dev *edev, unsigned int id, bool state); -extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, +int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, bool state); /* @@ -52,13 +52,13 @@ extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, * for each external connector. They are used to set the capability of the * property of each external connector based on the id and property. */ -extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, +int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); -extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, +int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); -extern int extcon_set_property_capability(struct extcon_dev *edev, +int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); #else /* CONFIG_EXTCON */ -- cgit v1.2.3 From 8067c0b0c6ac7bce201961f0092e2532b12fc00a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Feb 2020 23:43:21 +0100 Subject: rtc/ia64: remove legacy efirtc driver There are two EFI RTC drivers, the original drivers/char/efirtc.c driver and the more modern drivers/rtc/rtc-efi.c. Both implement the same interface, but the new one does so in a more portable way. Move everything over to that one and remove the old one. Cc: linux-ia64@vger.kernel.org Cc: Fenghua Yu Cc: Tony Luck Cc: Stephane Eranian Signed-off-by: Arnd Bergmann Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200226224322.187960-1-alexandre.belloni@bootlin.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 66cc45e0624b..c7a93002a3c1 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -25,7 +25,7 @@ #define TEMP_MINOR 131 /* Temperature Sensor */ #define APM_MINOR_DEV 134 #define RTC_MINOR 135 -#define EFI_RTC_MINOR 136 /* EFI Time services */ +/*#define EFI_RTC_MINOR 136 was EFI Time services */ #define VHCI_MINOR 137 #define SUN_OPENPROM_MINOR 139 #define DMAPI_MINOR 140 /* unused */ -- cgit v1.2.3 From 8b977c5498b8336b0c61b0fa72f6353e71f938da Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 10 Mar 2020 13:22:46 +0000 Subject: nvmem: core: add nvmem_cell_read_u64 Add nvmem_cell_read_u64() helper to ease read of an u64 value on consumer side. This helper is useful on some sunxi platform that has 64 bits data cells stored in no volatile memory. Signed-off-by: Yangtao Li Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200310132257.23358-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index d3776be48c53..1b311d27c9b8 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -63,6 +63,7 @@ void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len); int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len); int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val); int nvmem_cell_read_u32(struct device *dev, const char *cell_id, u32 *val); +int nvmem_cell_read_u64(struct device *dev, const char *cell_id, u64 *val); /* direct nvmem device read/write interface */ struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); @@ -138,6 +139,12 @@ static inline int nvmem_cell_read_u32(struct device *dev, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u64(struct device *dev, + const char *cell_id, u64 *val) +{ + return -EOPNOTSUPP; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { -- cgit v1.2.3 From 0cbf260820fa780a336e4a08cce1f81cd66a7ac1 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:40 +0530 Subject: bus: mhi: core: Add support for registering MHI controllers This commit adds support for registering MHI controller drivers with the MHI stack. MHI controller drivers manages the interaction with the MHI client devices such as the external modems and WiFi chipsets. They are also the MHI bus master in charge of managing the physical link between the host and client device. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/987 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [jhugo: added static config for controllers and fixed several bugs] Signed-off-by: Jeffrey Hugo [mani: removed DT dependency, splitted and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200220095854.4804-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 400 ++++++++++++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 12 ++ 2 files changed, 412 insertions(+) create mode 100644 include/linux/mhi.h (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h new file mode 100644 index 000000000000..a34aa50120c8 --- /dev/null +++ b/include/linux/mhi.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + * + */ +#ifndef _MHI_H_ +#define _MHI_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct mhi_chan; +struct mhi_event; +struct mhi_ctxt; +struct mhi_cmd; +struct mhi_buf_info; + +/** + * enum mhi_callback - MHI callback + * @MHI_CB_IDLE: MHI entered idle state + * @MHI_CB_PENDING_DATA: New data available for client to process + * @MHI_CB_LPM_ENTER: MHI host entered low power mode + * @MHI_CB_LPM_EXIT: MHI host about to exit low power mode + * @MHI_CB_EE_RDDM: MHI device entered RDDM exec env + * @MHI_CB_EE_MISSION_MODE: MHI device entered Mission Mode exec env + * @MHI_CB_SYS_ERROR: MHI device entered error state (may recover) + * @MHI_CB_FATAL_ERROR: MHI device entered fatal error state + */ +enum mhi_callback { + MHI_CB_IDLE, + MHI_CB_PENDING_DATA, + MHI_CB_LPM_ENTER, + MHI_CB_LPM_EXIT, + MHI_CB_EE_RDDM, + MHI_CB_EE_MISSION_MODE, + MHI_CB_SYS_ERROR, + MHI_CB_FATAL_ERROR, +}; + +/** + * enum mhi_flags - Transfer flags + * @MHI_EOB: End of buffer for bulk transfer + * @MHI_EOT: End of transfer + * @MHI_CHAIN: Linked transfer + */ +enum mhi_flags { + MHI_EOB, + MHI_EOT, + MHI_CHAIN, +}; + +/** + * enum mhi_device_type - Device types + * @MHI_DEVICE_XFER: Handles data transfer + * @MHI_DEVICE_CONTROLLER: Control device + */ +enum mhi_device_type { + MHI_DEVICE_XFER, + MHI_DEVICE_CONTROLLER, +}; + +/** + * enum mhi_ch_type - Channel types + * @MHI_CH_TYPE_INVALID: Invalid channel type + * @MHI_CH_TYPE_OUTBOUND: Outbound channel to the device + * @MHI_CH_TYPE_INBOUND: Inbound channel from the device + * @MHI_CH_TYPE_INBOUND_COALESCED: Coalesced channel for the device to combine + * multiple packets and send them as a single + * large packet to reduce CPU consumption + */ +enum mhi_ch_type { + MHI_CH_TYPE_INVALID = 0, + MHI_CH_TYPE_OUTBOUND = DMA_TO_DEVICE, + MHI_CH_TYPE_INBOUND = DMA_FROM_DEVICE, + MHI_CH_TYPE_INBOUND_COALESCED = 3, +}; + +/** + * enum mhi_ee_type - Execution environment types + * @MHI_EE_PBL: Primary Bootloader + * @MHI_EE_SBL: Secondary Bootloader + * @MHI_EE_AMSS: Modem, aka the primary runtime EE + * @MHI_EE_RDDM: Ram dump download mode + * @MHI_EE_WFW: WLAN firmware mode + * @MHI_EE_PTHRU: Passthrough + * @MHI_EE_EDL: Embedded downloader + */ +enum mhi_ee_type { + MHI_EE_PBL, + MHI_EE_SBL, + MHI_EE_AMSS, + MHI_EE_RDDM, + MHI_EE_WFW, + MHI_EE_PTHRU, + MHI_EE_EDL, + MHI_EE_MAX_SUPPORTED = MHI_EE_EDL, + MHI_EE_DISABLE_TRANSITION, /* local EE, not related to mhi spec */ + MHI_EE_NOT_SUPPORTED, + MHI_EE_MAX, +}; + +/** + * enum mhi_ch_ee_mask - Execution environment mask for channel + * @MHI_CH_EE_PBL: Allow channel to be used in PBL EE + * @MHI_CH_EE_SBL: Allow channel to be used in SBL EE + * @MHI_CH_EE_AMSS: Allow channel to be used in AMSS EE + * @MHI_CH_EE_RDDM: Allow channel to be used in RDDM EE + * @MHI_CH_EE_PTHRU: Allow channel to be used in PTHRU EE + * @MHI_CH_EE_WFW: Allow channel to be used in WFW EE + * @MHI_CH_EE_EDL: Allow channel to be used in EDL EE + */ +enum mhi_ch_ee_mask { + MHI_CH_EE_PBL = BIT(MHI_EE_PBL), + MHI_CH_EE_SBL = BIT(MHI_EE_SBL), + MHI_CH_EE_AMSS = BIT(MHI_EE_AMSS), + MHI_CH_EE_RDDM = BIT(MHI_EE_RDDM), + MHI_CH_EE_PTHRU = BIT(MHI_EE_PTHRU), + MHI_CH_EE_WFW = BIT(MHI_EE_WFW), + MHI_CH_EE_EDL = BIT(MHI_EE_EDL), +}; + +/** + * enum mhi_er_data_type - Event ring data types + * @MHI_ER_DATA: Only client data over this ring + * @MHI_ER_CTRL: MHI control data and client data + */ +enum mhi_er_data_type { + MHI_ER_DATA, + MHI_ER_CTRL, +}; + +/** + * enum mhi_db_brst_mode - Doorbell mode + * @MHI_DB_BRST_DISABLE: Burst mode disable + * @MHI_DB_BRST_ENABLE: Burst mode enable + */ +enum mhi_db_brst_mode { + MHI_DB_BRST_DISABLE = 0x2, + MHI_DB_BRST_ENABLE = 0x3, +}; + +/** + * struct mhi_channel_config - Channel configuration structure for controller + * @name: The name of this channel + * @num: The number assigned to this channel + * @num_elements: The number of elements that can be queued to this channel + * @local_elements: The local ring length of the channel + * @event_ring: The event rung index that services this channel + * @dir: Direction that data may flow on this channel + * @type: Channel type + * @ee_mask: Execution Environment mask for this channel + * @pollcfg: Polling configuration for burst mode. 0 is default. milliseconds + for UL channels, multiple of 8 ring elements for DL channels + * @doorbell: Doorbell mode + * @lpm_notify: The channel master requires low power mode notifications + * @offload_channel: The client manages the channel completely + * @doorbell_mode_switch: Channel switches to doorbell mode on M0 transition + * @auto_queue: Framework will automatically queue buffers for DL traffic + * @auto_start: Automatically start (open) this channel + */ +struct mhi_channel_config { + char *name; + u32 num; + u32 num_elements; + u32 local_elements; + u32 event_ring; + enum dma_data_direction dir; + enum mhi_ch_type type; + u32 ee_mask; + u32 pollcfg; + enum mhi_db_brst_mode doorbell; + bool lpm_notify; + bool offload_channel; + bool doorbell_mode_switch; + bool auto_queue; + bool auto_start; +}; + +/** + * struct mhi_event_config - Event ring configuration structure for controller + * @num_elements: The number of elements that can be queued to this ring + * @irq_moderation_ms: Delay irq for additional events to be aggregated + * @irq: IRQ associated with this ring + * @channel: Dedicated channel number. U32_MAX indicates a non-dedicated ring + * @priority: Priority of this ring. Use 1 for now + * @mode: Doorbell mode + * @data_type: Type of data this ring will process + * @hardware_event: This ring is associated with hardware channels + * @client_managed: This ring is client managed + * @offload_channel: This ring is associated with an offloaded channel + */ +struct mhi_event_config { + u32 num_elements; + u32 irq_moderation_ms; + u32 irq; + u32 channel; + u32 priority; + enum mhi_db_brst_mode mode; + enum mhi_er_data_type data_type; + bool hardware_event; + bool client_managed; + bool offload_channel; +}; + +/** + * struct mhi_controller_config - Root MHI controller configuration + * @max_channels: Maximum number of channels supported + * @timeout_ms: Timeout value for operations. 0 means use default + * @buf_len: Size of automatically allocated buffers. 0 means use default + * @num_channels: Number of channels defined in @ch_cfg + * @ch_cfg: Array of defined channels + * @num_events: Number of event rings defined in @event_cfg + * @event_cfg: Array of defined event rings + * @use_bounce_buf: Use a bounce buffer pool due to limited DDR access + * @m2_no_db: Host is not allowed to ring DB in M2 state + */ +struct mhi_controller_config { + u32 max_channels; + u32 timeout_ms; + u32 buf_len; + u32 num_channels; + struct mhi_channel_config *ch_cfg; + u32 num_events; + struct mhi_event_config *event_cfg; + bool use_bounce_buf; + bool m2_no_db; +}; + +/** + * struct mhi_controller - Master MHI controller structure + * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI + * controller (required) + * @mhi_dev: MHI device instance for the controller + * @regs: Base address of MHI MMIO register space (required) + * @iova_start: IOMMU starting address for data (required) + * @iova_stop: IOMMU stop address for data (required) + * @fw_image: Firmware image name for normal booting (required) + * @edl_image: Firmware image name for emergency download mode (optional) + * @sbl_size: SBL image size downloaded through BHIe (optional) + * @seg_len: BHIe vector size (optional) + * @mhi_chan: Points to the channel configuration table + * @lpm_chans: List of channels that require LPM notifications + * @irq: base irq # to request (required) + * @max_chan: Maximum number of channels the controller supports + * @total_ev_rings: Total # of event rings allocated + * @hw_ev_rings: Number of hardware event rings + * @sw_ev_rings: Number of software event rings + * @nr_irqs_req: Number of IRQs required to operate (optional) + * @nr_irqs: Number of IRQ allocated by bus master (required) + * @mhi_event: MHI event ring configurations table + * @mhi_cmd: MHI command ring configurations table + * @mhi_ctxt: MHI device context, shared memory between host and device + * @pm_mutex: Mutex for suspend/resume operation + * @pm_lock: Lock for protecting MHI power management state + * @timeout_ms: Timeout in ms for state transitions + * @pm_state: MHI power management state + * @db_access: DB access states + * @ee: MHI device execution environment + * @dev_wake: Device wakeup count + * @pending_pkts: Pending packets for the controller + * @transition_list: List of MHI state transitions + * @transition_lock: Lock for protecting MHI state transition list + * @wlock: Lock for protecting device wakeup + * @st_worker: State transition worker + * @fw_worker: Firmware download worker + * @syserr_worker: System error worker + * @state_event: State change event + * @status_cb: CB function to notify power states of the device (required) + * @link_status: CB function to query link status of the device (required) + * @wake_get: CB function to assert device wake (optional) + * @wake_put: CB function to de-assert device wake (optional) + * @wake_toggle: CB function to assert and de-assert device wake (optional) + * @runtime_get: CB function to controller runtime resume (required) + * @runtimet_put: CB function to decrement pm usage (required) + * @buffer_len: Bounce buffer length + * @bounce_buf: Use of bounce buffer + * @fbc_download: MHI host needs to do complete image transfer (optional) + * @pre_init: MHI host needs to do pre-initialization before power up + * @wake_set: Device wakeup set flag + * + * Fields marked as (required) need to be populated by the controller driver + * before calling mhi_register_controller(). For the fields marked as (optional) + * they can be populated depending on the usecase. + */ +struct mhi_controller { + struct device *cntrl_dev; + struct mhi_device *mhi_dev; + void __iomem *regs; + dma_addr_t iova_start; + dma_addr_t iova_stop; + const char *fw_image; + const char *edl_image; + size_t sbl_size; + size_t seg_len; + struct mhi_chan *mhi_chan; + struct list_head lpm_chans; + int *irq; + u32 max_chan; + u32 total_ev_rings; + u32 hw_ev_rings; + u32 sw_ev_rings; + u32 nr_irqs_req; + u32 nr_irqs; + + struct mhi_event *mhi_event; + struct mhi_cmd *mhi_cmd; + struct mhi_ctxt *mhi_ctxt; + + struct mutex pm_mutex; + rwlock_t pm_lock; + u32 timeout_ms; + u32 pm_state; + u32 db_access; + enum mhi_ee_type ee; + atomic_t dev_wake; + atomic_t pending_pkts; + struct list_head transition_list; + spinlock_t transition_lock; + spinlock_t wlock; + struct work_struct st_worker; + struct work_struct fw_worker; + struct work_struct syserr_worker; + wait_queue_head_t state_event; + + void (*status_cb)(struct mhi_controller *mhi_cntrl, + enum mhi_callback cb); + int (*link_status)(struct mhi_controller *mhi_cntrl); + void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override); + void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override); + void (*wake_toggle)(struct mhi_controller *mhi_cntrl); + int (*runtime_get)(struct mhi_controller *mhi_cntrl); + void (*runtime_put)(struct mhi_controller *mhi_cntrl); + + size_t buffer_len; + bool bounce_buf; + bool fbc_download; + bool pre_init; + bool wake_set; +}; + +/** + * struct mhi_device - Structure representing a MHI device which binds + * to channels + * @id: Pointer to MHI device ID struct + * @chan_name: Name of the channel to which the device binds + * @mhi_cntrl: Controller the device belongs to + * @ul_chan: UL channel for the device + * @dl_chan: DL channel for the device + * @dev: Driver model device node for the MHI device + * @dev_type: MHI device type + * @dev_wake: Device wakeup counter + */ +struct mhi_device { + const struct mhi_device_id *id; + const char *chan_name; + struct mhi_controller *mhi_cntrl; + struct mhi_chan *ul_chan; + struct mhi_chan *dl_chan; + struct device dev; + enum mhi_device_type dev_type; + u32 dev_wake; +}; + +/** + * struct mhi_result - Completed buffer information + * @buf_addr: Address of data buffer + * @bytes_xferd: # of bytes transferred + * @dir: Channel direction + * @transaction_status: Status of last transaction + */ +struct mhi_result { + void *buf_addr; + size_t bytes_xferd; + enum dma_data_direction dir; + int transaction_status; +}; + +#define to_mhi_device(dev) container_of(dev, struct mhi_device, dev) + +/** + * mhi_register_controller - Register MHI controller + * @mhi_cntrl: MHI controller to register + * @config: Configuration to use for the controller + */ +int mhi_register_controller(struct mhi_controller *mhi_cntrl, + struct mhi_controller_config *config); + +/** + * mhi_unregister_controller - Unregister MHI controller + * @mhi_cntrl: MHI controller to unregister + */ +void mhi_unregister_controller(struct mhi_controller *mhi_cntrl); + +#endif /* _MHI_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..be15e997fe39 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -821,4 +821,16 @@ struct wmi_device_id { const void *context; }; +#define MHI_NAME_SIZE 32 + +/** + * struct mhi_device_id - MHI device identification + * @chan: MHI channel name + * @driver_data: driver data; + */ +struct mhi_device_id { + const char chan[MHI_NAME_SIZE]; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ -- cgit v1.2.3 From e755cadb0171ce78b29b89fe8bdd0179121a7827 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:41 +0530 Subject: bus: mhi: core: Add support for registering MHI client drivers This commit adds support for registering MHI client drivers with the MHI stack. MHI client drivers binds to one or more MHI devices inorder to sends and receive the upper-layer protocol packets like IP packets, modem control messages, and diagnostics messages over MHI bus. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/987 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index a34aa50120c8..7e6b7743c705 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -354,6 +354,8 @@ struct mhi_controller { * @dl_chan: DL channel for the device * @dev: Driver model device node for the MHI device * @dev_type: MHI device type + * @ul_chan_id: MHI channel id for UL transfer + * @dl_chan_id: MHI channel id for DL transfer * @dev_wake: Device wakeup counter */ struct mhi_device { @@ -364,6 +366,8 @@ struct mhi_device { struct mhi_chan *dl_chan; struct device dev; enum mhi_device_type dev_type; + int ul_chan_id; + int dl_chan_id; u32 dev_wake; }; @@ -381,6 +385,29 @@ struct mhi_result { int transaction_status; }; +/** + * struct mhi_driver - Structure representing a MHI client driver + * @probe: CB function for client driver probe function + * @remove: CB function for client driver remove function + * @ul_xfer_cb: CB function for UL data transfer + * @dl_xfer_cb: CB function for DL data transfer + * @status_cb: CB functions for asynchronous status + * @driver: Device driver model driver + */ +struct mhi_driver { + const struct mhi_device_id *id_table; + int (*probe)(struct mhi_device *mhi_dev, + const struct mhi_device_id *id); + void (*remove)(struct mhi_device *mhi_dev); + void (*ul_xfer_cb)(struct mhi_device *mhi_dev, + struct mhi_result *result); + void (*dl_xfer_cb)(struct mhi_device *mhi_dev, + struct mhi_result *result); + void (*status_cb)(struct mhi_device *mhi_dev, enum mhi_callback mhi_cb); + struct device_driver driver; +}; + +#define to_mhi_driver(drv) container_of(drv, struct mhi_driver, driver) #define to_mhi_device(dev) container_of(dev, struct mhi_device, dev) /** @@ -397,4 +424,16 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, */ void mhi_unregister_controller(struct mhi_controller *mhi_cntrl); +/** + * mhi_driver_register - Register driver with MHI framework + * @mhi_drv: Driver associated with the device + */ +int mhi_driver_register(struct mhi_driver *mhi_drv); + +/** + * mhi_driver_unregister - Unregister a driver for mhi_devices + * @mhi_drv: Driver associated with the device + */ +void mhi_driver_unregister(struct mhi_driver *mhi_drv); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From da1c4f85692476ab038e3279209f07b8f4b7641e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:42 +0530 Subject: bus: mhi: core: Add support for creating and destroying MHI devices This commit adds support for creating and destroying MHI devices. The MHI devices binds to the MHI channels and are used to transfer data between MHI host and client device. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/989 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted from pm patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-5-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 7e6b7743c705..1ce2bdd5f2f4 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -163,6 +163,7 @@ enum mhi_db_brst_mode { * @doorbell_mode_switch: Channel switches to doorbell mode on M0 transition * @auto_queue: Framework will automatically queue buffers for DL traffic * @auto_start: Automatically start (open) this channel + * @wake-capable: Channel capable of waking up the system */ struct mhi_channel_config { char *name; @@ -180,6 +181,7 @@ struct mhi_channel_config { bool doorbell_mode_switch; bool auto_queue; bool auto_start; + bool wake_capable; }; /** -- cgit v1.2.3 From 6cd330ae76ffd5c8f6294c423cabde7eeef1b40c Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:43 +0530 Subject: bus: mhi: core: Add support for ringing channel/event ring doorbells This commit adds support for ringing channel and event ring doorbells by MHI host. The MHI host can use the channel and event ring doorbells for notifying the client device about processing transfer and event rings which it has queued using MMIO registers. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/989 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted from pm patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-6-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 1ce2bdd5f2f4..099d1643b072 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -240,6 +240,8 @@ struct mhi_controller_config { * controller (required) * @mhi_dev: MHI device instance for the controller * @regs: Base address of MHI MMIO register space (required) + * @bhi: Points to base of MHI BHI register space + * @wake_db: MHI WAKE doorbell register address * @iova_start: IOMMU starting address for data (required) * @iova_stop: IOMMU stop address for data (required) * @fw_image: Firmware image name for normal booting (required) @@ -294,6 +296,8 @@ struct mhi_controller { struct device *cntrl_dev; struct mhi_device *mhi_dev; void __iomem *regs; + void __iomem *bhi; + void __iomem *wake_db; dma_addr_t iova_start; dma_addr_t iova_stop; const char *fw_image; -- cgit v1.2.3 From a6e2e3522f29141b95c1ef8580c665a3582b3e66 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:44 +0530 Subject: bus: mhi: core: Add support for PM state transitions This commit adds support for transitioning the MHI states as a part of the power management operations. Helpers functions are provided for the state transitions, which will be consumed by the actual power management routines. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/989 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [jhugo: removed dma_zalloc_coherent() and fixed several bugs] Signed-off-by: Jeffrey Hugo [mani: splitted the pm patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-7-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 099d1643b072..fc0cd4af646c 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -105,6 +105,31 @@ enum mhi_ee_type { MHI_EE_MAX, }; +/** + * enum mhi_state - MHI states + * @MHI_STATE_RESET: Reset state + * @MHI_STATE_READY: Ready state + * @MHI_STATE_M0: M0 state + * @MHI_STATE_M1: M1 state + * @MHI_STATE_M2: M2 state + * @MHI_STATE_M3: M3 state + * @MHI_STATE_M3_FAST: M3 Fast state + * @MHI_STATE_BHI: BHI state + * @MHI_STATE_SYS_ERR: System Error state + */ +enum mhi_state { + MHI_STATE_RESET = 0x0, + MHI_STATE_READY = 0x1, + MHI_STATE_M0 = 0x2, + MHI_STATE_M1 = 0x3, + MHI_STATE_M2 = 0x4, + MHI_STATE_M3 = 0x5, + MHI_STATE_M3_FAST = 0x6, + MHI_STATE_BHI = 0x7, + MHI_STATE_SYS_ERR = 0xFF, + MHI_STATE_MAX, +}; + /** * enum mhi_ch_ee_mask - Execution environment mask for channel * @MHI_CH_EE_PBL: Allow channel to be used in PBL EE @@ -266,6 +291,7 @@ struct mhi_controller_config { * @pm_state: MHI power management state * @db_access: DB access states * @ee: MHI device execution environment + * @dev_state: MHI device state * @dev_wake: Device wakeup count * @pending_pkts: Pending packets for the controller * @transition_list: List of MHI state transitions @@ -298,6 +324,7 @@ struct mhi_controller { void __iomem *regs; void __iomem *bhi; void __iomem *wake_db; + dma_addr_t iova_start; dma_addr_t iova_stop; const char *fw_image; @@ -324,6 +351,7 @@ struct mhi_controller { u32 pm_state; u32 db_access; enum mhi_ee_type ee; + enum mhi_state dev_state; atomic_t dev_wake; atomic_t pending_pkts; struct list_head transition_list; @@ -391,6 +419,22 @@ struct mhi_result { int transaction_status; }; +/** + * struct mhi_buf - MHI Buffer description + * @buf: Virtual address of the buffer + * @name: Buffer label. For offload channel, configurations name must be: + * ECA - Event context array data + * CCA - Channel context array data + * @dma_addr: IOMMU address of the buffer + * @len: # of bytes + */ +struct mhi_buf { + void *buf; + const char *name; + dma_addr_t dma_addr; + size_t len; +}; + /** * struct mhi_driver - Structure representing a MHI client driver * @probe: CB function for client driver probe function @@ -442,4 +486,12 @@ int mhi_driver_register(struct mhi_driver *mhi_drv); */ void mhi_driver_unregister(struct mhi_driver *mhi_drv); +/** + * mhi_set_mhi_state - Set MHI device state + * @mhi_cntrl: MHI controller + * @state: State to set + */ +void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, + enum mhi_state state); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From 3000f85b8f47b2c860add5cce4c201c83bde6468 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:45 +0530 Subject: bus: mhi: core: Add support for basic PM operations This commit adds support for basic MHI PM operations such as mhi_async_power_up, mhi_sync_power_up, and mhi_power_down. These routines places the MHI bus into respective power domain states and calls the state_transition APIs when necessary. The MHI controller driver is expected to call these PM routines for MHI powerup and powerdown. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/989 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted the pm patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-8-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index fc0cd4af646c..630643f6b4a4 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -81,6 +81,17 @@ enum mhi_ch_type { MHI_CH_TYPE_INBOUND_COALESCED = 3, }; +/** + * struct image_info - Firmware and RDDM table table + * @mhi_buf - Buffer for firmware and RDDM table + * @entries - # of entries in table + */ +struct image_info { + struct mhi_buf *mhi_buf; + struct bhi_vec_entry *bhi_vec; + u32 entries; +}; + /** * enum mhi_ee_type - Execution environment types * @MHI_EE_PBL: Primary Bootloader @@ -266,6 +277,7 @@ struct mhi_controller_config { * @mhi_dev: MHI device instance for the controller * @regs: Base address of MHI MMIO register space (required) * @bhi: Points to base of MHI BHI register space + * @bhie: Points to base of MHI BHIe register space * @wake_db: MHI WAKE doorbell register address * @iova_start: IOMMU starting address for data (required) * @iova_stop: IOMMU stop address for data (required) @@ -273,6 +285,7 @@ struct mhi_controller_config { * @edl_image: Firmware image name for emergency download mode (optional) * @sbl_size: SBL image size downloaded through BHIe (optional) * @seg_len: BHIe vector size (optional) + * @fbc_image: Points to firmware image buffer * @mhi_chan: Points to the channel configuration table * @lpm_chans: List of channels that require LPM notifications * @irq: base irq # to request (required) @@ -323,6 +336,7 @@ struct mhi_controller { struct mhi_device *mhi_dev; void __iomem *regs; void __iomem *bhi; + void __iomem *bhie; void __iomem *wake_db; dma_addr_t iova_start; @@ -331,6 +345,7 @@ struct mhi_controller { const char *edl_image; size_t sbl_size; size_t seg_len; + struct image_info *fbc_image; struct mhi_chan *mhi_chan; struct list_head lpm_chans; int *irq; @@ -494,4 +509,40 @@ void mhi_driver_unregister(struct mhi_driver *mhi_drv); void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, enum mhi_state state); +/** + * mhi_prepare_for_power_up - Do pre-initialization before power up. + * This is optional, call this before power up if + * the controller does not want bus framework to + * automatically free any allocated memory during + * shutdown process. + * @mhi_cntrl: MHI controller + */ +int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl); + +/** + * mhi_async_power_up - Start MHI power up sequence + * @mhi_cntrl: MHI controller + */ +int mhi_async_power_up(struct mhi_controller *mhi_cntrl); + +/** + * mhi_sync_power_up - Start MHI power up sequence and wait till the device + * device enters valid EE state + * @mhi_cntrl: MHI controller + */ +int mhi_sync_power_up(struct mhi_controller *mhi_cntrl); + +/** + * mhi_power_down - Start MHI power down sequence + * @mhi_cntrl: MHI controller + * @graceful: Link is still accessible, so do a graceful shutdown process + */ +void mhi_power_down(struct mhi_controller *mhi_cntrl, bool graceful); + +/** + * mhi_unprepare_after_power_down - Free any allocated memory after power down + * @mhi_cntrl: MHI controller + */ +void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From 6fdfdd27328ceef39f4b8daec3510874ad68e753 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:47 +0530 Subject: bus: mhi: core: Add support for downloading RDDM image during panic MHI protocol supports downloading RDDM (RAM Dump) image from the device through BHIE. This is useful to debugging as the RDDM image can capture the firmware state. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/989 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted the data transfer patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-10-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 630643f6b4a4..d3453a1de835 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -283,9 +283,11 @@ struct mhi_controller_config { * @iova_stop: IOMMU stop address for data (required) * @fw_image: Firmware image name for normal booting (required) * @edl_image: Firmware image name for emergency download mode (optional) + * @rddm_size: RAM dump size that host should allocate for debugging purpose * @sbl_size: SBL image size downloaded through BHIe (optional) * @seg_len: BHIe vector size (optional) * @fbc_image: Points to firmware image buffer + * @rddm_image: Points to RAM dump buffer * @mhi_chan: Points to the channel configuration table * @lpm_chans: List of channels that require LPM notifications * @irq: base irq # to request (required) @@ -343,9 +345,11 @@ struct mhi_controller { dma_addr_t iova_stop; const char *fw_image; const char *edl_image; + size_t rddm_size; size_t sbl_size; size_t seg_len; struct image_info *fbc_image; + struct image_info *rddm_image; struct mhi_chan *mhi_chan; struct list_head lpm_chans; int *irq; @@ -545,4 +549,24 @@ void mhi_power_down(struct mhi_controller *mhi_cntrl, bool graceful); */ void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl); +/** + * mhi_download_rddm_img - Download ramdump image from device for + * debugging purpose. + * @mhi_cntrl: MHI controller + * @in_panic: Download rddm image during kernel panic + */ +int mhi_download_rddm_img(struct mhi_controller *mhi_cntrl, bool in_panic); + +/** + * mhi_force_rddm_mode - Force device into rddm mode + * @mhi_cntrl: MHI controller + */ +int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl); + +/** + * mhi_get_mhi_state - Get MHI state of the device + * @mhi_cntrl: MHI controller + */ +enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From 1d3173a3bae7039b765a0956e3e4bf846dbaacb8 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:48 +0530 Subject: bus: mhi: core: Add support for processing events from client device This commit adds support for processing the MHI data and control events from the client device. The client device can report various events such as EE events, state change events by interrupting the host through IRQ and adding events to the event rings allocated by the host during initialization. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/988 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted the data transfer patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-11-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d3453a1de835..bf8921ee0805 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -31,6 +31,7 @@ struct mhi_buf_info; * @MHI_CB_EE_MISSION_MODE: MHI device entered Mission Mode exec env * @MHI_CB_SYS_ERROR: MHI device entered error state (may recover) * @MHI_CB_FATAL_ERROR: MHI device entered fatal error state + * @MHI_CB_BW_REQ: Received a bandwidth switch request from device */ enum mhi_callback { MHI_CB_IDLE, @@ -41,6 +42,7 @@ enum mhi_callback { MHI_CB_EE_MISSION_MODE, MHI_CB_SYS_ERROR, MHI_CB_FATAL_ERROR, + MHI_CB_BW_REQ, }; /** @@ -92,6 +94,16 @@ struct image_info { u32 entries; }; +/** + * struct mhi_link_info - BW requirement + * target_link_speed - Link speed as defined by TLS bits in LinkControl reg + * target_link_width - Link width as defined by NLW bits in LinkStatus reg + */ +struct mhi_link_info { + unsigned int target_link_speed; + unsigned int target_link_width; +}; + /** * enum mhi_ee_type - Execution environment types * @MHI_EE_PBL: Primary Bootloader @@ -312,6 +324,7 @@ struct mhi_controller_config { * @transition_list: List of MHI state transitions * @transition_lock: Lock for protecting MHI state transition list * @wlock: Lock for protecting device wakeup + * @mhi_link_info: Device bandwidth info * @st_worker: State transition worker * @fw_worker: Firmware download worker * @syserr_worker: System error worker @@ -376,6 +389,7 @@ struct mhi_controller { struct list_head transition_list; spinlock_t transition_lock; spinlock_t wlock; + struct mhi_link_info mhi_link_info; struct work_struct st_worker; struct work_struct fw_worker; struct work_struct syserr_worker; -- cgit v1.2.3 From 189ff97cca53e3fe2d8b38d64105040ce17fc62d Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:49 +0530 Subject: bus: mhi: core: Add support for data transfer Add support for transferring data between external modem and host processor using MHI protocol. This is based on the patch submitted by Sujeev Dias: https://lkml.org/lkml/2018/7/9/988 Signed-off-by: Sujeev Dias Signed-off-by: Siddartha Mohanadoss [mani: splitted the data transfer patch and cleaned up for upstream] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-12-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index bf8921ee0805..79cb9f898544 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -336,6 +337,8 @@ struct mhi_controller_config { * @wake_toggle: CB function to assert and de-assert device wake (optional) * @runtime_get: CB function to controller runtime resume (required) * @runtimet_put: CB function to decrement pm usage (required) + * @map_single: CB function to create TRE buffer + * @unmap_single: CB function to destroy TRE buffer * @buffer_len: Bounce buffer length * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) @@ -403,6 +406,10 @@ struct mhi_controller { void (*wake_toggle)(struct mhi_controller *mhi_cntrl); int (*runtime_get)(struct mhi_controller *mhi_cntrl); void (*runtime_put)(struct mhi_controller *mhi_cntrl); + int (*map_single)(struct mhi_controller *mhi_cntrl, + struct mhi_buf_info *buf); + void (*unmap_single)(struct mhi_controller *mhi_cntrl, + struct mhi_buf_info *buf); size_t buffer_len; bool bounce_buf; @@ -583,4 +590,77 @@ int mhi_force_rddm_mode(struct mhi_controller *mhi_cntrl); */ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl); +/** + * mhi_device_get - Disable device low power mode + * @mhi_dev: Device associated with the channel + */ +void mhi_device_get(struct mhi_device *mhi_dev); + +/** + * mhi_device_get_sync - Disable device low power mode. Synchronously + * take the controller out of suspended state + * @mhi_dev: Device associated with the channel + */ +int mhi_device_get_sync(struct mhi_device *mhi_dev); + +/** + * mhi_device_put - Re-enable device low power mode + * @mhi_dev: Device associated with the channel + */ +void mhi_device_put(struct mhi_device *mhi_dev); + +/** + * mhi_prepare_for_transfer - Setup channel for data transfer + * @mhi_dev: Device associated with the channels + */ +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); + +/** + * mhi_unprepare_from_transfer - Unprepare the channels + * @mhi_dev: Device associated with the channels + */ +void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev); + +/** + * mhi_poll - Poll for any available data in DL direction + * @mhi_dev: Device associated with the channels + * @budget: # of events to process + */ +int mhi_poll(struct mhi_device *mhi_dev, u32 budget); + +/** + * mhi_queue_dma - Send or receive DMA mapped buffers from client device + * over MHI channel + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + * @mhi_buf: Buffer for holding the DMA mapped data + * @len: Buffer length + * @mflags: MHI transfer flags used for the transfer + */ +int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir, + struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags); + +/** + * mhi_queue_buf - Send or receive raw buffers from client device over MHI + * channel + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + * @buf: Buffer for holding the data + * @len: Buffer length + * @mflags: MHI transfer flags used for the transfer + */ +int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir, + void *buf, size_t len, enum mhi_flags mflags); + +/** + * mhi_queue_skb - Send or receive SKBs from client device over MHI channel + * @mhi_dev: Device associated with the channels + * @dir: DMA direction for the channel + * @skb: Buffer for holding SKBs + * @len: Buffer length + * @mflags: MHI transfer flags used for the transfer + */ +int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, + struct sk_buff *skb, size_t len, enum mhi_flags mflags); + #endif /* _MHI_H_ */ -- cgit v1.2.3 From e6b0de469c5babfe29a86be289408ba2070ea44a Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 20 Feb 2020 15:28:50 +0530 Subject: bus: mhi: core: Add uevent support for module autoloading Add uevent support to MHI bus so that the client drivers can be autoloaded by udev when the MHI devices gets created. The client drivers are expected to provide MODULE_DEVICE_TABLE with the MHI id_table struct so that the alias can be exported. Signed-off-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Link: https://lore.kernel.org/r/20200220095854.4804-13-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index be15e997fe39..f10e779a3fd0 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -821,6 +821,7 @@ struct wmi_device_id { const void *context; }; +#define MHI_DEVICE_MODALIAS_FMT "mhi:%s" #define MHI_NAME_SIZE 32 /** -- cgit v1.2.3 From 0b04758b002bde9434053be2fff8064ac3d9d8bb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:08 +0000 Subject: irqchip/gic-v3: Use SGIs without active state if offered To allow the direct injection of SGIs into a guest, the GICv4.1 architecture has to sacrifice the Active state so that SGIs look a lot like LPIs (they are injected by the same mechanism). In order not to break existing software, the architecture gives offers guests OSs the choice: SGIs with or without an active state. It is the hypervisors duty to honor the guest's choice. For this, the architecture offers a discovery bit indicating whether the GIC supports GICv4.1 SGIs (GICD_TYPER2.nASSGIcap), and another bit indicating whether the guest wants Active-less SGIs or not (controlled by GICD_CTLR.nASSGIreq). A hypervisor not supporting GICv4.1 SGIs would leave nASSGIcap clear, and a guest not knowing about GICv4.1 SGIs (or definitely wanting an Active state) would leave nASSGIreq clear (both being thankfully backward compatible with older revisions of the GIC). Since Linux is perfectly happy without an active state on SGIs, inform the hypervisor that we'll use that if offered. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-2-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 83439bfb6c5b..c29a02678a6f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -57,6 +57,7 @@ #define GICD_SPENDSGIR 0x0F20 #define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_nASSGIreq (1U << 8) #define GICD_CTLR_DS (1U << 6) #define GICD_CTLR_ARE_NS (1U << 4) #define GICD_CTLR_ENABLE_G1A (1U << 1) @@ -90,6 +91,7 @@ #define GICD_TYPER_ESPIS(typer) \ (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) +#define GICD_TYPER2_nASSGIcap (1U << 8) #define GICD_TYPER2_VIL (1U << 7) #define GICD_TYPER2_VID GENMASK(4, 0) -- cgit v1.2.3 From f3a059219bc718ccc3bf3ff894f089b7e9a93139 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:10 +0000 Subject: irqchip/gic-v4.1: Ensure mutual exclusion between vPE affinity change and RD access Before GICv4.1, all operations would be serialized with the affinity changes by virtue of using the same ITS command queue. With v4.1, things change, as invalidations (and a number of other operations) are issued using the redistributor MMIO frame. We must thus make sure that these redistributor accesses cannot race against aginst the affinity change, or we may end-up talking to the wrong redistributor. To ensure this, we expand the irq_to_cpuid() helper to take a spinlock when the LPI is mapped to a vLPI (a new per-VPE lock) on each operation that requires mutual exclusion. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-4-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index d9c34968467a..439963f4c66a 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -53,6 +53,11 @@ struct its_vpe { }; }; + /* + * Ensures mutual exclusion between affinity setting of the + * vPE and vLPI operations using vpe->col_idx. + */ + raw_spinlock_t vpe_lock; /* * This collection ID is used to indirect the target * redistributor for this VPE. The ID itself isn't involved in -- cgit v1.2.3 From 5a40fc4b934c1d1026bc401b1def8b6455ce20f0 Mon Sep 17 00:00:00 2001 From: Nagarjuna Kristam Date: Mon, 10 Feb 2020 13:41:31 +0530 Subject: phy: tegra: xusb: Add support to get companion USB 3 port Tegra XUSB host, device mode driver requires the USB 3 companion port number for corresponding USB 2 port. Add API to retrieve the same. Signed-off-by: Nagarjuna Kristam Reviewed-by: JC Kuo Acked-by: Kishon Vijay Abraham I Signed-off-by: Thierry Reding --- include/linux/phy/tegra/xusb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 1235865e7e2c..71d956935405 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -21,4 +21,6 @@ int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl, int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl, bool val); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); +int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, + unsigned int port); #endif /* PHY_TEGRA_XUSB_H */ -- cgit v1.2.3 From ff895103a84abc85a5f43ecabc7f67cf36e1348f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Mar 2020 17:32:23 -0400 Subject: tracing: Save off entry when peeking at next entry In order to have the iterator read the buffer even when it's still updating, it requires that the ring buffer iterator saves each event in a separate location outside the ring buffer such that its use is immutable. There's one use case that saves off the event returned from the ring buffer interator and calls it again to look at the next event, before going back to use the first event. As the ring buffer iterator will only have a single copy, this use case will no longer be supported. Instead, have the one use case create its own buffer to store the first event when looking at the next event. This way, when looking at the first event again, it wont be corrupted by the second read. Link: http://lkml.kernel.org/r/20200317213415.722539921@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 6c7a10a6d71e..5c6943354049 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -85,6 +85,8 @@ struct trace_iterator { struct mutex mutex; struct ring_buffer_iter **buffer_iter; unsigned long iter_flags; + void *temp; /* temp holder */ + unsigned int temp_size; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; -- cgit v1.2.3 From bc1a72afdc4a91844928831cac85731566e03bc6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Mar 2020 17:32:25 -0400 Subject: ring-buffer: Rename ring_buffer_read() to read_buffer_iter_advance() When the ring buffer was first created, the iterator followed the normal producer/consumer operations where it had both a peek() operation, that just returned the event at the current location, and a read(), that would return the event at the current location and also increment the iterator such that the next peek() or read() will return the next event. The only use of the ring_buffer_read() is currently to move the iterator to the next location and nothing now actually reads the event it returns. Rename this function to its actual use case to ring_buffer_iter_advance(), which also adds the "iter" part to the name, which is more meaningful. As the timestamp returned by ring_buffer_read() was never used, there's no reason that this new version should bother having returning it. It will also become a void function. Link: http://lkml.kernel.org/r/20200317213416.018928618@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index df0124eabece..0ae603b79b0e 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -135,8 +135,7 @@ void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); -struct ring_buffer_event * -ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts); +void ring_buffer_iter_advance(struct ring_buffer_iter *iter); void ring_buffer_iter_reset(struct ring_buffer_iter *iter); int ring_buffer_iter_empty(struct ring_buffer_iter *iter); -- cgit v1.2.3 From 8511d72f14bc7b94a96bb2990615e1cd7c1dd21e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 19 Mar 2020 15:47:41 +0100 Subject: sysfs: fix static inline declaration of sysfs_groups_change_owner() The CONFIG_SYSFS declaration of sysfs_group_change_owner() is different from the !CONFIG_SYSFS version and thus causes build failurs when !CONFIG_SYSFS is set. Reported-by: Stephen Rothwell Fixes: 303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()") Signed-off-by: Christian Brauner Reported-by: Randy Dunlap Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9e531ec76274..4beb51009b62 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -562,8 +562,8 @@ static inline int sysfs_groups_change_owner(struct kobject *kobj, } static inline int sysfs_group_change_owner(struct kobject *kobj, - const struct attribute_group **groups, - kuid_t kuid, kgid_t kgid) + const struct attribute_group *groups, + kuid_t kuid, kgid_t kgid) { return 0; } -- cgit v1.2.3 From e98ad464750c0894bc560d10503dae8ff90ccdac Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 Mar 2020 13:50:49 -0700 Subject: fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl Add an ioctl FS_IOC_GET_ENCRYPTION_NONCE which retrieves the nonce from an encrypted file or directory. The nonce is the 16-byte random value stored in the inode's encryption xattr. It is normally used together with the master key to derive the inode's actual encryption key. The nonces are needed by automated tests that verify the correctness of the ciphertext on-disk. Except for the IV_INO_LBLK_64 case, there's no way to replicate a file's ciphertext without knowing that file's nonce. The nonces aren't secret, and the existing ciphertext verification tests in xfstests retrieve them from disk using debugfs or dump.f2fs. But in environments that lack these debugging tools, getting the nonces by manually parsing the filesystem structure would be very hard. To make this important type of testing much easier, let's just add an ioctl that retrieves the nonce. Link: https://lore.kernel.org/r/20200314205052.93294-2-ebiggers@kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 556f4adf5dc5..e3c2d2a15525 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -139,6 +139,7 @@ extern void fscrypt_free_bounce_page(struct page *bounce_page); extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *); +extern int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); extern int fscrypt_has_permitted_context(struct inode *, struct inode *); extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); @@ -300,6 +301,11 @@ static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, return -EOPNOTSUPP; } +static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { -- cgit v1.2.3 From 46a87b3851f0d6eb05e6d83d5c5a30df0eca8f76 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Tue, 10 Mar 2020 18:01:13 -0700 Subject: sched/core: Distribute tasks within affinity masks Currently, when updating the affinity of tasks via either cpusets.cpus, or, sched_setaffinity(); tasks not currently running within the newly specified mask will be arbitrarily assigned to the first CPU within the mask. This (particularly in the case that we are restricting masks) can result in many tasks being assigned to the first CPUs of their new masks. This: 1) Can induce scheduling delays while the load-balancer has a chance to spread them between their new CPUs. 2) Can antogonize a poor load-balancer behavior where it has a difficult time recognizing that a cross-socket imbalance has been forced by an affinity mask. This change adds a new cpumask interface to allow iterated calls to distribute within the intersection of the provided masks. The cases that this mainly affects are: - modifying cpuset.cpus - when tasks join a cpuset - when modifying a task's affinity via sched_setaffinity(2) Signed-off-by: Paul Turner Signed-off-by: Josh Don Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Qais Yousef Tested-by: Qais Yousef Link: https://lkml.kernel.org/r/20200311010113.136465-1-joshdon@google.com --- include/linux/cpumask.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d5cc88514aee..f0d895d6ac39 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -194,6 +194,11 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) return 0; } +static inline int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { + return cpumask_next_and(-1, src1p, src2p); +} + #define for_each_cpu(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ @@ -245,6 +250,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); +int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p); /** * for_each_cpu - iterate over every cpu in a mask -- cgit v1.2.3 From b05e75d611380881e73edc58a20fd8c6bb71720b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 16 Mar 2020 15:13:31 -0400 Subject: psi: Fix cpu.pressure for cpu.max and competing cgroups For simplicity, cpu pressure is defined as having more than one runnable task on a given CPU. This works on the system-level, but it has limitations in a cgrouped reality: When cpu.max is in use, it doesn't capture the time in which a task is not executing on the CPU due to throttling. Likewise, it doesn't capture the time in which a competing cgroup is occupying the CPU - meaning it only reflects cgroup-internal competitive pressure, not outside pressure. Enable tracking of currently executing tasks, and then change the definition of cpu pressure in a cgroup from NR_RUNNING > 1 to NR_RUNNING > ON_CPU which will capture the effects of cpu.max as well as competition from outside the cgroup. After this patch, a cgroup running `stress -c 1` with a cpu.max setting of 5000 10000 shows ~50% continuous CPU pressure. Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200316191333.115523-2-hannes@cmpxchg.org --- include/linux/psi_types.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 07aaf9b82241..4b7258495a04 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -14,13 +14,21 @@ enum psi_task_count { NR_IOWAIT, NR_MEMSTALL, NR_RUNNING, - NR_PSI_TASK_COUNTS = 3, + /* + * This can't have values other than 0 or 1 and could be + * implemented as a bit flag. But for now we still have room + * in the first cacheline of psi_group_cpu, and this way we + * don't have to special case any state tracking for it. + */ + NR_ONCPU, + NR_PSI_TASK_COUNTS = 4, }; /* Task state bitmasks */ #define TSK_IOWAIT (1 << NR_IOWAIT) #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) +#define TSK_ONCPU (1 << NR_ONCPU) /* Resources that workloads could be stalled on */ enum psi_res { -- cgit v1.2.3 From 36b238d5717279163859fb6ba0f4360abcafab83 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 16 Mar 2020 15:13:32 -0400 Subject: psi: Optimize switching tasks inside shared cgroups When switching tasks running on a CPU, the psi state of a cgroup containing both of these tasks does not change. Right now, we don't exploit that, and can perform many unnecessary state changes in nested hierarchies, especially when most activity comes from one leaf cgroup. This patch implements an optimization where we only update cgroups whose state actually changes during a task switch. These are all cgroups that contain one task but not the other, up to the first shared ancestor. When both tasks are in the same group, we don't need to update anything at all. We can identify the first shared ancestor by walking the groups of the incoming task until we see TSK_ONCPU set on the local CPU; that's the first group that also contains the outgoing task. The new psi_task_switch() is similar to psi_task_change(). To allow code reuse, move the task flag maintenance code into a new function and the poll/avg worker wakeups into the shared psi_group_change(). Suggested-by: Peter Zijlstra Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200316191333.115523-3-hannes@cmpxchg.org --- include/linux/psi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psi.h b/include/linux/psi.h index 7b3de7321219..7361023f3fdd 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -17,6 +17,8 @@ extern struct psi_group psi_system; void psi_init(void); void psi_task_change(struct task_struct *task, int clear, int set); +void psi_task_switch(struct task_struct *prev, struct task_struct *next, + bool sleep); void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); -- cgit v1.2.3 From 1066d1b6974e095d5a6c472ad9180a957b496cd6 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 16 Mar 2020 21:28:05 -0400 Subject: psi: Move PF_MEMSTALL out of task->flags The task->flags is a 32-bits flag, in which 31 bits have already been consumed. So it is hardly to introduce other new per process flag. Currently there're still enough spaces in the bit-field section of task_struct, so we can define the memstall state as a single bit in task_struct instead. This patch also removes an out-of-date comment pointed by Matthew. Suggested-by: Johannes Weiner Signed-off-by: Yafang Shao Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/1584408485-1921-1-git-send-email-laoar.shao@gmail.com --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2e9199bf947b..09bddd9e69a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -785,9 +785,12 @@ struct task_struct { unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP - /* to be used once the psi infrastructure lands upstream. */ unsigned use_memdelay:1; #endif +#ifdef CONFIG_PSI + /* Stalled due to lack of memory */ + unsigned in_memstall:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1480,7 +1483,6 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -- cgit v1.2.3 From e4c2c0ff00ecaf8e245455a199b86ce22143becf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Jan 2020 17:35:48 +0100 Subject: firmware: Add new platform fallback mechanism and firmware_request_platform() In some cases the platform's main firmware (e.g. the UEFI fw) may contain an embedded copy of device firmware which needs to be (re)loaded into the peripheral. Normally such firmware would be part of linux-firmware, but in some cases this is not feasible, for 2 reasons: 1) The firmware is customized for a specific use-case of the chipset / use with a specific hardware model, so we cannot have a single firmware file for the chipset. E.g. touchscreen controller firmwares are compiled specifically for the hardware model they are used with, as they are calibrated for a specific model digitizer. 2) Despite repeated attempts we have failed to get permission to redistribute the firmware. This is especially a problem with customized firmwares, these get created by the chip vendor for a specific ODM and the copyright may partially belong with the ODM, so the chip vendor cannot give a blanket permission to distribute these. This commit adds a new platform fallback mechanism to the firmware loader which will try to lookup a device fw copy embedded in the platform's main firmware if direct filesystem lookup fails. Drivers which need such embedded fw copies can enable this fallback mechanism by using the new firmware_request_platform() function. Note that for now this is only supported on EFI platforms and even on these platforms firmware_fallback_platform() only works if CONFIG_EFI_EMBEDDED_FIRMWARE is enabled (this gets selected by drivers which need this), in all other cases firmware_fallback_platform() simply always returns -ENOENT. Reported-by: Dave Olsthoorn Suggested-by: Peter Jones Acked-by: Luis Chamberlain Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200115163554.101315-5-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware.h | 9 +++++++++ include/linux/fs.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 2dd566c91d44..4bbd0afd91b7 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -44,6 +44,8 @@ int request_firmware(const struct firmware **fw, const char *name, struct device *device); int firmware_request_nowarn(const struct firmware **fw, const char *name, struct device *device); +int firmware_request_platform(const struct firmware **fw, const char *name, + struct device *device); int request_firmware_nowait( struct module *module, bool uevent, const char *name, struct device *device, gfp_t gfp, void *context, @@ -69,6 +71,13 @@ static inline int firmware_request_nowarn(const struct firmware **fw, return -EINVAL; } +static inline int firmware_request_platform(const struct firmware **fw, + const char *name, + struct device *device) +{ + return -EINVAL; +} + static inline int request_firmware_nowait( struct module *module, bool uevent, const char *name, struct device *device, gfp_t gfp, void *context, diff --git a/include/linux/fs.h b/include/linux/fs.h index f814ccd8d929..8e08f7f0cd5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2982,6 +2982,7 @@ extern int do_pipe_flags(int *, int); id(UNKNOWN, unknown) \ id(FIRMWARE, firmware) \ id(FIRMWARE_PREALLOC_BUFFER, firmware) \ + id(FIRMWARE_EFI_EMBEDDED, firmware) \ id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image) \ id(KEXEC_INITRAMFS, kexec-initramfs) \ -- cgit v1.2.3 From a9107de4b03604ce0d279315c91b31b8065ee4ea Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 11 Mar 2020 11:35:44 +0000 Subject: soundwire: stream: Add read_only_wordlength flag to port properties According to SoundWire Specification Version 1.2. "A Data Port number X (in the range 0-14) which supports only one value of WordLength may implement the WordLength field in the DPX_BlockCtrl1 Register as Read-Only, returning the fixed value of WordLength in response to reads." As WSA881x interfaces in PDM mode making the only field "WordLength" in DPX_BlockCtrl1" fixed and read-only. Behaviour of writing to this register on WSA881x soundwire slave with Qualcomm Soundwire Controller is throwing up an error. Not sure how other controllers deal with writing to readonly registers, but this patch provides a way to avoid writes to DPN_BlockCtrl1 register by providing a read_only_wordlength flag in struct sdw_dpn_prop Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200311113545.23773-2-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index b451bb622335..2dfe14ed3bb0 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -284,6 +284,7 @@ struct sdw_dpn_audio_mode { * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes * @block_pack_mode: Type of block port mode supported + * @read_only_wordlength: Read Only wordlength field in DPN_BlockCtrl1 register * @port_encoding: Payload Channel Sample encoding schemes supported * @audio_modes: Audio modes supported */ @@ -307,6 +308,7 @@ struct sdw_dpn_prop { u32 modes; u32 max_async_buffer; bool block_pack_mode; + bool read_only_wordlength; u32 port_encoding; struct sdw_dpn_audio_mode *audio_modes; }; -- cgit v1.2.3 From 835e1b86ef8c9b466df5c9c949319690df700760 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Jan 2020 17:35:53 +0100 Subject: platform/x86: touchscreen_dmi: Add EFI embedded firmware info support Sofar we have been unable to get permission from the vendors to put the firmware for touchscreens listed in touchscreen_dmi in linux-firmware. Some of the tablets with such a touchscreen have a touchscreen driver, and thus a copy of the firmware, as part of their EFI code. This commit adds the necessary info for the new EFI embedded-firmware code to extract these firmwares, making the touchscreen work OOTB without the user needing to manually add the firmware. Acked-by: Andy Shevchenko Acked-by: Ard Biesheuvel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200115163554.101315-10-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/efi_embedded_fw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h index 3d066c6370c6..57eac5241303 100644 --- a/include/linux/efi_embedded_fw.h +++ b/include/linux/efi_embedded_fw.h @@ -36,6 +36,8 @@ struct efi_embedded_fw_desc { u8 sha256[32]; }; +extern const struct dmi_system_id touchscreen_dmi_table[]; + int efi_get_embedded_fw(const char *name, const u8 **dat, size_t *sz); #endif -- cgit v1.2.3 From 9058a4e980648e7d068a7f7726a8ea4c67d0e88a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:12 +0000 Subject: irqchip/gic-v4.1: Ensure mutual exclusion betwen invalidations on the same RD The GICv4.1 spec says that it is CONTRAINED UNPREDICTABLE to write to any of the GICR_INV{LPI,ALL}R registers if GICR_SYNCR.Busy == 1. To deal with it, we must ensure that only a single invalidation can happen at a time for a given redistributor. Add a per-RD lock to that effect and take it around the invalidation/syncr-read to deal with this. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200304203330.4967-6-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c29a02678a6f..b28acfa71f82 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -652,6 +652,7 @@ struct rdists { struct { + raw_spinlock_t rd_lock; void __iomem *rd_base; struct page *pend_page; phys_addr_t phys_base; -- cgit v1.2.3 From 3c40706d05fdea421e991da50e72a29d41131a66 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:13 +0000 Subject: irqchip/gic-v4.1: Advertise support v4.1 to KVM Tell KVM that we support v4.1. Nothing uses this information so far. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200304203330.4967-7-maz@kernel.org --- include/linux/irqchip/arm-gic-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index b9850f5f1906..fa8c0455c352 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -32,6 +32,8 @@ struct gic_kvm_info { struct resource vctrl; /* vlpi support */ bool has_v4; + /* rvpeid support */ + bool has_v4_1; }; const struct gic_kvm_info *gic_get_kvm_info(void); -- cgit v1.2.3 From b2cb11f4f7643255b7703c0fcabc31a8ec478f3a Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Sat, 30 Nov 2019 15:38:49 +0800 Subject: irqchip/gic-v4: Use Inner-Shareable attributes for virtual pending tables There is no special reason to set virtual LPI pending table as non-shareable. If we choose to hard code the shareability without probing, Inner-Shareable is likely to be a better choice, as the VPEs can move around and benefit from having the redistributors snooping each other's cache, if that's something they can do. Furthermore, Hisilicon hip08 ends up with unspecified errors when mixing shareability attributes. So let's move to IS attributes for the VPT. This has also been tested on D05 and didn't show any regression. Signed-off-by: Heyi Guo [maz: rewrote commit message] Signed-off-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20191130073849.38378-1-guoheyi@huawei.com --- include/linux/irqchip/arm-gic-v3.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 83439bfb6c5b..85b105f6dc36 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -320,6 +320,9 @@ #define GICR_VPENDBASER_NonShareable \ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + #define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) #define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) #define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) -- cgit v1.2.3 From 835d722ba10ac924adba1e8a46f2d80955222b4b Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Fri, 20 Mar 2020 10:52:52 -0600 Subject: coresight: cti: Initial CoreSight CTI Driver This introduces a baseline CTI driver and associated configuration files. Uses the platform agnostic naming standard for CoreSight devices, along with a generic platform probing method that currently supports device tree descriptions, but allows for the ACPI bindings to be added once these have been defined for the CTI devices. Driver will probe for the device on the AMBA bus, and load the CTI driver on CoreSight ID match to CTI IDs in tables. Initial sysfs support for enable / disable provided. Default CTI interconnection data is generated based on hardware register signal counts, with no additional connection information. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200320165303.13681-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 44e552de419c..b3e582d96a34 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -41,6 +41,7 @@ enum coresight_dev_type { CORESIGHT_DEV_TYPE_LINKSINK, CORESIGHT_DEV_TYPE_SOURCE, CORESIGHT_DEV_TYPE_HELPER, + CORESIGHT_DEV_TYPE_ECT, }; enum coresight_dev_subtype_sink { @@ -68,6 +69,12 @@ enum coresight_dev_subtype_helper { CORESIGHT_DEV_SUBTYPE_HELPER_CATU, }; +/* Embedded Cross Trigger (ECT) sub-types */ +enum coresight_dev_subtype_ect { + CORESIGHT_DEV_SUBTYPE_ECT_NONE, + CORESIGHT_DEV_SUBTYPE_ECT_CTI, +}; + /** * union coresight_dev_subtype - further characterisation of a type * @sink_subtype: type of sink this component is, as defined @@ -78,6 +85,8 @@ enum coresight_dev_subtype_helper { * by @coresight_dev_subtype_source. * @helper_subtype: type of helper this component is, as defined * by @coresight_dev_subtype_helper. + * @ect_subtype: type of cross trigger this component is, as + * defined by @coresight_dev_subtype_ect */ union coresight_dev_subtype { /* We have some devices which acts as LINK and SINK */ @@ -87,6 +96,7 @@ union coresight_dev_subtype { }; enum coresight_dev_subtype_source source_subtype; enum coresight_dev_subtype_helper helper_subtype; + enum coresight_dev_subtype_ect ect_subtype; }; /** @@ -196,6 +206,7 @@ static struct coresight_dev_list (var) = { \ #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops #define helper_ops(csdev) csdev->ops->helper_ops +#define ect_ops(csdev) csdev->ops->ect_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -262,11 +273,23 @@ struct coresight_ops_helper { int (*disable)(struct coresight_device *csdev, void *data); }; +/** + * struct coresight_ops_ect - Ops for an embedded cross trigger device + * + * @enable : Enable the device + * @disable : Disable the device + */ +struct coresight_ops_ect { + int (*enable)(struct coresight_device *csdev); + int (*disable)(struct coresight_device *csdev); +}; + struct coresight_ops { const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; const struct coresight_ops_helper *helper_ops; + const struct coresight_ops_ect *ect_ops; }; #ifdef CONFIG_CORESIGHT -- cgit v1.2.3 From 177af8285b59a3887e4430d2c782598083cddcd7 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Fri, 20 Mar 2020 10:52:59 -0600 Subject: coresight: cti: Enable CTI associated with devices The CoreSight subsystem enables a path of devices from source to sink. Any CTI devices associated with the path devices must be enabled at the same time. This patch adds an associated coresight_device element to the main coresight device structure, and uses this to create associations between the CTI and other devices based on the device tree data. The associated device element is used to enable CTI in conjunction with the path elements. CTI devices are reference counted so where a single CTI is associated with multiple elements on the path, it will be enabled on the first associated device enable, and disabled with the last associated device disable. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200320165303.13681-9-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index b3e582d96a34..193cc9dbf448 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -163,6 +163,8 @@ struct coresight_connection { * activated but not yet enabled. Enabling for a _sink_ * appens when a source has been selected for that it. * @ea: Device attribute for sink representation under PMU directory. + * @ect_dev: Associated cross trigger device. Not part of the trace data + * path or connections. */ struct coresight_device { struct coresight_platform_data *pdata; @@ -176,6 +178,8 @@ struct coresight_device { /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ struct dev_ext_attribute *ea; + /* cross trigger handling */ + struct coresight_device *ect_dev; }; /* -- cgit v1.2.3 From 8165b57bca2167acc150b708a9a6b7322f235e91 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:26 +0000 Subject: linux/const.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split const.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-2-vincenzo.frascino@arm.com --- include/linux/const.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/const.h b/include/linux/const.h index 7b55a55f5911..81b8aae5a855 100644 --- a/include/linux/const.h +++ b/include/linux/const.h @@ -1,9 +1,6 @@ #ifndef _LINUX_CONST_H #define _LINUX_CONST_H -#include - -#define UL(x) (_UL(x)) -#define ULL(x) (_ULL(x)) +#include #endif /* _LINUX_CONST_H */ -- cgit v1.2.3 From 3945ff37d2f48d39fd1751d282c80176654049e4 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:27 +0000 Subject: linux/bits.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split bits.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-3-vincenzo.frascino@arm.com --- include/linux/bits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bits.h b/include/linux/bits.h index 669d69441a62..a740bbcf3cd2 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -3,9 +3,9 @@ #define __LINUX_BITS_H #include +#include #include -#define BIT(nr) (UL(1) << (nr)) #define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -- cgit v1.2.3 From 3e0e9f8c6e3ca92154a74edc23a8872da921d2b6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:28 +0000 Subject: linux/limits.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split limits.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-4-vincenzo.frascino@arm.com --- include/linux/limits.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/limits.h b/include/linux/limits.h index 76afcd24ff8c..7fc497ee1393 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -4,19 +4,8 @@ #include #include +#include -#define USHRT_MAX ((unsigned short)~0U) -#define SHRT_MAX ((short)(USHRT_MAX >> 1)) -#define SHRT_MIN ((short)(-SHRT_MAX - 1)) -#define INT_MAX ((int)(~0U >> 1)) -#define INT_MIN (-INT_MAX - 1) -#define UINT_MAX (~0U) -#define LONG_MAX ((long)(~0UL >> 1)) -#define LONG_MIN (-LONG_MAX - 1) -#define ULONG_MAX (~0UL) -#define LLONG_MAX ((long long)(~0ULL >> 1)) -#define LLONG_MIN (-LLONG_MAX - 1) -#define ULLONG_MAX (~0ULL) #define SIZE_MAX (~(size_t)0) #define PHYS_ADDR_MAX (~(phys_addr_t)0) -- cgit v1.2.3 From 14ee2ac618e49e767c9af4aa007f951eb9e35153 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:33 +0000 Subject: linux/clocksource.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split clocksource.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-9-vincenzo.frascino@arm.com --- include/linux/clocksource.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 02e3282719bd..86d143db6523 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -28,16 +28,7 @@ struct module; #include #endif -enum vdso_clock_mode { - VDSO_CLOCKMODE_NONE, -#ifdef CONFIG_GENERIC_GETTIMEOFDAY - VDSO_ARCH_CLOCKMODES, -#endif - VDSO_CLOCKMODE_MAX, - - /* Indicator for time namespace VDSO */ - VDSO_CLOCKMODE_TIMENS = INT_MAX -}; +#include /** * struct clocksource - hardware abstraction for a free running counter -- cgit v1.2.3 From b874b8358c756f2503c55686af842d99f5b1f312 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:34 +0000 Subject: linux/math64.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split math64.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-10-vincenzo.frascino@arm.com --- include/linux/math64.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 65bef21cdddb..11a267413e8e 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -3,6 +3,7 @@ #define _LINUX_MATH64_H #include +#include #include #if BITS_PER_LONG == 64 @@ -142,25 +143,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor) u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); -static __always_inline u32 -__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) -{ - u32 ret = 0; - - while (dividend >= divisor) { - /* The following asm() prevents the compiler from - optimising this loop into a modulo operation. */ - asm("" : "+rm"(dividend)); - - dividend -= divisor; - ret++; - } - - *remainder = dividend; - - return ret; -} - #ifndef mul_u32_u32 /* * Many a GCC version messes this up and generates a 64x64 mult :-( -- cgit v1.2.3 From 639fff1cce0f7462bb4440deeffc1048ecfc6ebb Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:35 +0000 Subject: linux/time.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split time.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-11-vincenzo.frascino@arm.com --- include/linux/time.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 8ef5e5cc9f57..4c325bf44ce0 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -111,9 +111,6 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) */ #define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) -struct timens_offset { - s64 sec; - u64 nsec; -}; +# include #endif -- cgit v1.2.3 From 9a4162316965818ea73701b611915deca97afece Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:36 +0000 Subject: linux/time32.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split time32.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-12-vincenzo.frascino@arm.com --- include/linux/time32.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time32.h b/include/linux/time32.h index cad4c3186002..0933f28214c0 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -12,19 +12,9 @@ #include #include -#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) - -typedef s32 old_time32_t; - -struct old_timespec32 { - old_time32_t tv_sec; - s32 tv_nsec; -}; +#include -struct old_timeval32 { - old_time32_t tv_sec; - s32 tv_usec; -}; +#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) struct old_itimerspec32 { struct old_timespec32 it_interval; -- cgit v1.2.3 From b72a9c5e023bf405e62b858eb5384eb34e2b7a74 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:37 +0000 Subject: linux/time64.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split time64.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-13-vincenzo.frascino@arm.com --- include/linux/time64.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 19125489ae94..c9dcb3e5781f 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -3,6 +3,7 @@ #define _LINUX_TIME64_H #include +#include typedef __s64 time64_t; typedef __u64 timeu64_t; @@ -19,15 +20,6 @@ struct itimerspec64 { struct timespec64 it_value; }; -/* Parameters used to convert the timespec values: */ -#define MSEC_PER_SEC 1000L -#define USEC_PER_MSEC 1000L -#define NSEC_PER_USEC 1000L -#define NSEC_PER_MSEC 1000000L -#define USEC_PER_SEC 1000000L -#define NSEC_PER_SEC 1000000000L -#define FSEC_PER_SEC 1000000000000000LL - /* Located here for timespec[64]_valid_strict */ #define TIME64_MAX ((s64)~((u64)1 << 63)) #define TIME64_MIN (-TIME64_MAX - 1) -- cgit v1.2.3 From 97b01d2eabd8c0fa650a9d513febeae35dbc6de4 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:38 +0000 Subject: linux/jiffies.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split jiffies.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-14-vincenzo.frascino@arm.com --- include/linux/jiffies.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index e3279ef24d28..fed6ba96c527 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /* for HZ */ #include @@ -59,9 +60,6 @@ extern int register_refined_jiffies(long clock_tick_rate); -/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ -#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) - /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) -- cgit v1.2.3 From cc56f32f00154ff3586e8346c91f5253882cd8a5 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:39 +0000 Subject: linux/ktime.h: Extract common header for vDSO The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Split ktime.h into linux and common headers to make the latter suitable for inclusion in the vDSO library. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-15-vincenzo.frascino@arm.com --- include/linux/ktime.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index b2bb44f87f5a..1fcfce97a020 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -253,14 +253,7 @@ static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt, } } -/* - * The resolution of the clocks. The resolution value is returned in - * the clock_getres() system call to give application programmers an - * idea of the (in)accuracy of timers. Timer values are rounded up to - * this resolution values. - */ -#define LOW_RES_NSEC TICK_NSEC -#define KTIME_LOW_RES (LOW_RES_NSEC) +#include static inline ktime_t ns_to_ktime(u64 ns) { -- cgit v1.2.3 From b558051725c5f7caa1f390ab28f3ab9db085cbfc Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 20 Mar 2020 14:53:42 +0000 Subject: linux/elfnote.h: Replace elf.h with UAPI equivalent The vDSO library should only include the necessary headers required for a userspace library (UAPI and a minimal set of kernel headers). To make this possible it is necessary to isolate from the kernel headers the common parts that are strictly necessary to build the library. Replace linux/elf.h with UAPI equivalent in elfnote.h to make the header suitable for vDSO inclusion. Signed-off-by: Vincenzo Frascino Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200320145351.32292-18-vincenzo.frascino@arm.com --- include/linux/elfnote.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index f236f5b931b2..594d4e78654f 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -59,7 +59,7 @@ ELFNOTE_END #else /* !__ASSEMBLER__ */ -#include +#include /* * Use an anonymous structure which matches the shape of * Elf{32,64}_Nhdr, but includes the name and desc data. The size and -- cgit v1.2.3 From 80fbaf1c3f2926c834f8ca915441dfe27ce5487e Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sat, 21 Mar 2020 12:25:55 +0100 Subject: rcuwait: Add @state argument to rcuwait_wait_event() Extend rcuwait_wait_event() with a state variable so that it is not restricted to UNINTERRUPTIBLE waits. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113241.824030968@linutronix.de --- include/linux/rcuwait.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 75c97e4bbc57..2ffe1ee6d482 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -3,6 +3,7 @@ #define _LINUX_RCUWAIT_H_ #include +#include /* * rcuwait provides a way of blocking and waking up a single @@ -30,23 +31,30 @@ extern void rcuwait_wake_up(struct rcuwait *w); * The caller is responsible for locking around rcuwait_wait_event(), * such that writes to @task are properly serialized. */ -#define rcuwait_wait_event(w, condition) \ +#define rcuwait_wait_event(w, condition, state) \ ({ \ + int __ret = 0; \ rcu_assign_pointer((w)->task, current); \ for (;;) { \ /* \ * Implicit barrier (A) pairs with (B) in \ * rcuwait_wake_up(). \ */ \ - set_current_state(TASK_UNINTERRUPTIBLE); \ + set_current_state(state); \ if (condition) \ break; \ \ + if (signal_pending_state(state, current)) { \ + __ret = -EINTR; \ + break; \ + } \ + \ schedule(); \ } \ \ WRITE_ONCE((w)->task, NULL); \ __set_current_state(TASK_RUNNING); \ + __ret; \ }) #endif /* _LINUX_RCUWAIT_H_ */ -- cgit v1.2.3 From a5c6234e10280b3ec65e2410ce34904a2580e5f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Mar 2020 12:26:00 +0100 Subject: completion: Use simple wait queues completion uses a wait_queue_head_t to enqueue waiters. wait_queue_head_t contains a spinlock_t to protect the list of waiters which excludes it from being used in truly atomic context on a PREEMPT_RT enabled kernel. The spinlock in the wait queue head cannot be replaced by a raw_spinlock because: - wait queues can have custom wakeup callbacks, which acquire other spinlock_t locks and have potentially long execution times - wake_up() walks an unbounded number of list entries during the wake up and may wake an unbounded number of waiters. For simplicity and performance reasons complete() should be usable on PREEMPT_RT enabled kernels. completions do not use custom wakeup callbacks and are usually single waiter, except for a few corner cases. Replace the wait queue in the completion with a simple wait queue (swait), which uses a raw_spinlock_t for protecting the waiter list and therefore is safe to use inside truly atomic regions on PREEMPT_RT. There is no semantical or functional change: - completions use the exclusive wait mode which is what swait provides - complete() wakes one exclusive waiter - complete_all() wakes all waiters while holding the lock which protects the wait queue against newly incoming waiters. The conversion to swait preserves this behaviour. complete_all() might cause unbound latencies with a large number of waiters being woken at once, but most complete_all() usage sites are either in testing or initialization code or have only a really small number of concurrent waiters which for now does not cause a latency problem. Keep it simple for now. The fixup of the warning check in the USB gadget driver is just a straight forward conversion of the lockless waiter check from one waitqueue type to the other. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Reviewed-by: Davidlohr Bueso Reviewed-by: Joel Fernandes (Google) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20200321113242.317954042@linutronix.de --- include/linux/completion.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 519e94915d18..bf8e77001f18 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,7 +9,7 @@ * See kernel/sched/completion.c for details. */ -#include +#include /* * struct completion - structure used to maintain state for a "completion" @@ -25,7 +25,7 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_queue_head wait; }; #define init_completion_map(x, m) __init_completion(x) @@ -34,7 +34,7 @@ static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ (*({ init_completion_map(&(work), &(map)); &(work); })) @@ -85,7 +85,7 @@ static inline void complete_release(struct completion *x) {} static inline void __init_completion(struct completion *x) { x->done = 0; - init_waitqueue_head(&x->wait); + init_swait_queue_head(&x->wait); } /** -- cgit v1.2.3 From de8f5e4f2dc1f032b46afda0a78cab5456974f89 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 21 Mar 2020 12:26:01 +0100 Subject: lockdep: Introduce wait-type checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend lockdep to validate lock wait-type context. The current wait-types are: LD_WAIT_FREE, /* wait free, rcu etc.. */ LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ Where lockdep validates that the current lock (the one being acquired) fits in the current wait-context (as generated by the held stack). This ensures that there is no attempt to acquire mutexes while holding spinlocks, to acquire spinlocks while holding raw_spinlocks and so on. In other words, its a more fancy might_sleep(). Obviously RCU made the entire ordeal more complex than a simple single value test because RCU can be acquired in (pretty much) any context and while it presents a context to nested locks it is not the same as it got acquired in. Therefore its necessary to split the wait_type into two values, one representing the acquire (outer) and one representing the nested context (inner). For most 'normal' locks these two are the same. [ To make static initialization easier we have the rule that: .outer == INV means .outer == .inner; because INV == 0. ] It further means that its required to find the minimal .inner of the held stack to compare against the outer of the new lock; because while 'normal' RCU presents a CONFIG type to nested locks, if it is taken while already holding a SPIN type it obviously doesn't relax the rules. Below is an example output generated by the trivial test code: raw_spin_lock(&foo); spin_lock(&bar); spin_unlock(&bar); raw_spin_unlock(&foo); [ BUG: Invalid wait context ] ----------------------------- swapper/0/1 is trying to lock: ffffc90000013f20 (&bar){....}-{3:3}, at: kernel_init+0xdb/0x187 other info that might help us debug this: 1 lock held by swapper/0/1: #0: ffffc90000013ee0 (&foo){+.+.}-{2:2}, at: kernel_init+0xd1/0x187 The way to read it is to look at the new -{n,m} part in the lock description; -{3:3} for the attempted lock, and try and match that up to the held locks, which in this case is the one: -{2,2}. This tells that the acquiring lock requires a more relaxed environment than presented by the lock stack. Currently only the normal locks and RCU are converted, the rest of the lockdep users defaults to .inner = INV which is ignored. More conversions can be done when desired. The check for spinlock_t nesting is not enabled by default. It's a separate config option for now as there are known problems which are currently addressed. The config option allows to identify these problems and to verify that the solutions found are indeed solving them. The config switch will be removed and the checks will permanently enabled once the vast majority of issues has been addressed. [ bigeasy: Move LD_WAIT_FREE,… out of CONFIG_LOCKDEP to avoid compile failure with CONFIG_DEBUG_SPINLOCK + !CONFIG_LOCKDEP] [ tglx: Add the config option ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.427089655@linutronix.de --- include/linux/irqflags.h | 8 ++++- include/linux/lockdep.h | 71 +++++++++++++++++++++++++++++++++++------- include/linux/mutex.h | 7 +++-- include/linux/rwlock_types.h | 6 +++- include/linux/rwsem.h | 6 +++- include/linux/sched.h | 1 + include/linux/spinlock.h | 35 +++++++++++++++------ include/linux/spinlock_types.h | 24 +++++++++++--- 8 files changed, 128 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 21619c92c377..fdaf28601cbe 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -37,7 +37,12 @@ # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) # define trace_hardirq_enter() \ do { \ - current->hardirq_context++; \ + if (!current->hardirq_context++) \ + current->hardirq_threaded = 0; \ +} while (0) +# define trace_hardirq_threaded() \ +do { \ + current->hardirq_threaded = 1; \ } while (0) # define trace_hardirq_exit() \ do { \ @@ -59,6 +64,7 @@ do { \ # define trace_hardirqs_enabled(p) 0 # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) +# define trace_hardirq_threaded() do { } while (0) # define trace_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 664f52c6dd4c..425b4ceb7cd0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -21,6 +21,22 @@ extern int lock_stat; #include +enum lockdep_wait_type { + LD_WAIT_INV = 0, /* not checked, catch all */ + + LD_WAIT_FREE, /* wait free, rcu etc.. */ + LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ + +#ifdef CONFIG_PROVE_RAW_LOCK_NESTING + LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ +#else + LD_WAIT_CONFIG = LD_WAIT_SPIN, +#endif + LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ + + LD_WAIT_MAX, /* must be last */ +}; + #ifdef CONFIG_LOCKDEP #include @@ -111,6 +127,9 @@ struct lock_class { int name_version; const char *name; + short wait_type_inner; + short wait_type_outer; + #ifdef CONFIG_LOCK_STAT unsigned long contention_point[LOCKSTAT_POINTS]; unsigned long contending_point[LOCKSTAT_POINTS]; @@ -158,6 +177,8 @@ struct lockdep_map { struct lock_class_key *key; struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; const char *name; + short wait_type_outer; /* can be taken in this context */ + short wait_type_inner; /* presents this context */ #ifdef CONFIG_LOCK_STAT int cpu; unsigned long ip; @@ -299,8 +320,21 @@ extern void lockdep_unregister_key(struct lock_class_key *key); * to lockdep: */ -extern void lockdep_init_map(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass); +extern void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass, short inner, short outer); + +static inline void +lockdep_init_map_wait(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass, short inner) +{ + lockdep_init_map_waits(lock, name, key, subclass, inner, LD_WAIT_INV); +} + +static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + lockdep_init_map_wait(lock, name, key, subclass, LD_WAIT_INV); +} /* * Reinitialize a lock key - for cases where there is special locking or @@ -308,18 +342,29 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, * of dependencies wrong: they are either too broad (they need a class-split) * or they are too narrow (they suffer from a false class-split): */ -#define lockdep_set_class(lock, key) \ - lockdep_init_map(&(lock)->dep_map, #key, key, 0) -#define lockdep_set_class_and_name(lock, key, name) \ - lockdep_init_map(&(lock)->dep_map, name, key, 0) -#define lockdep_set_class_and_subclass(lock, key, sub) \ - lockdep_init_map(&(lock)->dep_map, #key, key, sub) -#define lockdep_set_subclass(lock, sub) \ - lockdep_init_map(&(lock)->dep_map, #lock, \ - (lock)->dep_map.key, sub) +#define lockdep_set_class(lock, key) \ + lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer) + +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer) + +#define lockdep_set_class_and_subclass(lock, key, sub) \ + lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer) + +#define lockdep_set_subclass(lock, sub) \ + lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer) #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) + /* * Compare locking classes */ @@ -432,6 +477,10 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) +# define lockdep_init_map_waits(lock, name, key, sub, inner, outer) \ + do { (void)(name); (void)(key); } while (0) +# define lockdep_init_map_wait(lock, name, key, sub, inner) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_init_map(lock, name, key, sub) \ do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index aca8f36dfac9..ae197cc00cc8 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -109,8 +109,11 @@ do { \ } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ - , .dep_map = { .name = #lockname } +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_SLEEP, \ + } #else # define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index 857a72ceb794..3bd03e18061c 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -22,7 +22,11 @@ typedef struct { #define RWLOCK_MAGIC 0xdeaf1eed #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +# define RW_DEP_MAP_INIT(lockname) \ + .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_CONFIG, \ + } #else # define RW_DEP_MAP_INIT(lockname) #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8a418d9eeb7a..7e5b2a4eb560 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -65,7 +65,11 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_DEP_MAP_INIT(lockname) \ + , .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_SLEEP, \ + } #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 04278493bf15..4d3b9ecce074 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -970,6 +970,7 @@ struct task_struct { #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; + unsigned int hardirq_threaded; unsigned long hardirq_enable_ip; unsigned long hardirq_disable_ip; unsigned int hardirq_enable_event; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 031ce8617df8..d3770b3f9d9a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -93,12 +93,13 @@ #ifdef CONFIG_DEBUG_SPINLOCK extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, - struct lock_class_key *key); -# define raw_spin_lock_init(lock) \ -do { \ - static struct lock_class_key __key; \ - \ - __raw_spin_lock_init((lock), #lock, &__key); \ + struct lock_class_key *key, short inner); + +# define raw_spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __raw_spin_lock_init((lock), #lock, &__key, LD_WAIT_SPIN); \ } while (0) #else @@ -327,12 +328,26 @@ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) return &lock->rlock; } -#define spin_lock_init(_lock) \ -do { \ - spinlock_check(_lock); \ - raw_spin_lock_init(&(_lock)->rlock); \ +#ifdef CONFIG_DEBUG_SPINLOCK + +# define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __raw_spin_lock_init(spinlock_check(lock), \ + #lock, &__key, LD_WAIT_CONFIG); \ +} while (0) + +#else + +# define spin_lock_init(_lock) \ +do { \ + spinlock_check(_lock); \ + *(_lock) = __SPIN_LOCK_UNLOCKED(_lock); \ } while (0) +#endif + static __always_inline void spin_lock(spinlock_t *lock) { raw_spin_lock(&lock->rlock); diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 24b4e6f2c1a2..6102e6bff3ae 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -33,8 +33,18 @@ typedef struct raw_spinlock { #define SPINLOCK_OWNER_INIT ((void *)-1L) #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +# define RAW_SPIN_DEP_MAP_INIT(lockname) \ + .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_SPIN, \ + } +# define SPIN_DEP_MAP_INIT(lockname) \ + .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_CONFIG, \ + } #else +# define RAW_SPIN_DEP_MAP_INIT(lockname) # define SPIN_DEP_MAP_INIT(lockname) #endif @@ -51,7 +61,7 @@ typedef struct raw_spinlock { { \ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ SPIN_DEBUG_INIT(lockname) \ - SPIN_DEP_MAP_INIT(lockname) } + RAW_SPIN_DEP_MAP_INIT(lockname) } #define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) @@ -72,11 +82,17 @@ typedef struct spinlock { }; } spinlock_t; +#define ___SPIN_LOCK_INITIALIZER(lockname) \ + { \ + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ + SPIN_DEBUG_INIT(lockname) \ + SPIN_DEP_MAP_INIT(lockname) } + #define __SPIN_LOCK_INITIALIZER(lockname) \ - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } + { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } #define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) + (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) #define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -- cgit v1.2.3 From 40db173965c05a1d803451240ed41707d5bd978d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:02 +0100 Subject: lockdep: Add hrtimer context tracing bits Set current->irq_config = 1 for hrtimers which are not marked to expire in hard interrupt context during hrtimer_init(). These timers will expire in softirq context on PREEMPT_RT. Setting this allows lockdep to differentiate these timers. If a timer is marked to expire in hard interrupt context then the timer callback is not supposed to acquire a regular spinlock instead of a raw_spinlock in the expiry callback. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.534508206@linutronix.de --- include/linux/irqflags.h | 15 +++++++++++++++ include/linux/sched.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index fdaf28601cbe..9c17f9c827aa 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -56,6 +56,19 @@ do { \ do { \ current->softirq_context--; \ } while (0) + +# define lockdep_hrtimer_enter(__hrtimer) \ + do { \ + if (!__hrtimer->is_hard) \ + current->irq_config = 1; \ + } while (0) + +# define lockdep_hrtimer_exit(__hrtimer) \ + do { \ + if (!__hrtimer->is_hard) \ + current->irq_config = 0; \ + } while (0) + #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) @@ -68,6 +81,8 @@ do { \ # define trace_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) +# define lockdep_hrtimer_enter(__hrtimer) do { } while (0) +# define lockdep_hrtimer_exit(__hrtimer) do { } while (0) #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d3b9ecce074..933914cdf219 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -983,6 +983,7 @@ struct task_struct { unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; + int irq_config; #endif #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 49915ac35ca7b07c54295a72d905be5064afb89e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:03 +0100 Subject: lockdep: Annotate irq_work Mark irq_work items with IRQ_WORK_HARD_IRQ which should be invoked in hardirq context even on PREEMPT_RT. IRQ_WORK without this flag will be invoked in softirq context on PREEMPT_RT. Set ->irq_config to 1 for the IRQ_WORK items which are invoked in softirq context so lockdep knows that these can safely acquire a spinlock_t. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.643576700@linutronix.de --- include/linux/irq_work.h | 2 ++ include/linux/irqflags.h | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 02da997ad12c..3b752e80c017 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -18,6 +18,8 @@ /* Doesn't want IPI, wait for tick: */ #define IRQ_WORK_LAZY BIT(2) +/* Run hard IRQ context, even on RT */ +#define IRQ_WORK_HARD_IRQ BIT(3) #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 9c17f9c827aa..f23f540e0ebb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -69,6 +69,17 @@ do { \ current->irq_config = 0; \ } while (0) +# define lockdep_irq_work_enter(__work) \ + do { \ + if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + current->irq_config = 1; \ + } while (0) +# define lockdep_irq_work_exit(__work) \ + do { \ + if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + current->irq_config = 0; \ + } while (0) + #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) @@ -83,6 +94,8 @@ do { \ # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) do { } while (0) # define lockdep_hrtimer_exit(__hrtimer) do { } while (0) +# define lockdep_irq_work_enter(__work) do { } while (0) +# define lockdep_irq_work_exit(__work) do { } while (0) #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ -- cgit v1.2.3 From d53f2b62fcb63f6547c10d8c62bca19e957b0eef Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:04 +0100 Subject: lockdep: Add posixtimer context tracing bits Splitting run_posix_cpu_timers() into two parts is work in progress which is stuck on other entry code related problems. The heavy lifting which involves locking of sighand lock will be moved into task context so the necessary execution time is burdened on the task and not on interrupt context. Until this work completes lockdep with the spinlock nesting rules enabled would emit warnings for this known context. Prevent it by setting "->irq_config = 1" for the invocation of run_posix_cpu_timers() so lockdep does not complain when sighand lock is acquried. This will be removed once the split is completed. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.751182723@linutronix.de --- include/linux/irqflags.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f23f540e0ebb..a16adbb58f66 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -69,6 +69,16 @@ do { \ current->irq_config = 0; \ } while (0) +# define lockdep_posixtimer_enter() \ + do { \ + current->irq_config = 1; \ + } while (0) + +# define lockdep_posixtimer_exit() \ + do { \ + current->irq_config = 0; \ + } while (0) + # define lockdep_irq_work_enter(__work) \ do { \ if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ @@ -94,6 +104,8 @@ do { \ # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) do { } while (0) # define lockdep_hrtimer_exit(__hrtimer) do { } while (0) +# define lockdep_posixtimer_enter() do { } while (0) +# define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) # define lockdep_irq_work_exit(__work) do { } while (0) #endif -- cgit v1.2.3 From 2502ec37a7b228b34c1e2e89480f98b92f53046a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 12:56:40 +0100 Subject: lockdep: Rename trace_hardirq_{enter,exit}() Continue what commit: d820ac4c2fa8 ("locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit]") started, rename these to avoid confusing them with tracepoints. Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200320115859.060481361@infradead.org --- include/linux/hardirq.h | 8 ++++---- include/linux/irqflags.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index da0af631ded5..7c8b82f69288 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -37,7 +37,7 @@ extern void rcu_nmi_exit(void); do { \ account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ - trace_hardirq_enter(); \ + lockdep_hardirq_enter(); \ } while (0) /* @@ -50,7 +50,7 @@ extern void irq_enter(void); */ #define __irq_exit() \ do { \ - trace_hardirq_exit(); \ + lockdep_hardirq_exit(); \ account_irq_exit_time(current); \ preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) @@ -74,12 +74,12 @@ extern void irq_exit(void); BUG_ON(in_nmi()); \ preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ - trace_hardirq_enter(); \ + lockdep_hardirq_enter(); \ } while (0) #define nmi_exit() \ do { \ - trace_hardirq_exit(); \ + lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 21619c92c377..7c4e64589250 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -35,11 +35,11 @@ # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) -# define trace_hardirq_enter() \ +# define lockdep_hardirq_enter() \ do { \ current->hardirq_context++; \ } while (0) -# define trace_hardirq_exit() \ +# define lockdep_hardirq_exit() \ do { \ current->hardirq_context--; \ } while (0) @@ -58,8 +58,8 @@ do { \ # define trace_softirq_context(p) 0 # define trace_hardirqs_enabled(p) 0 # define trace_softirqs_enabled(p) 0 -# define trace_hardirq_enter() do { } while (0) -# define trace_hardirq_exit() do { } while (0) +# define lockdep_hardirq_enter() do { } while (0) +# define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) #endif -- cgit v1.2.3 From 0d38453c85b426e47375346812d2271680c47988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Mar 2020 12:56:41 +0100 Subject: lockdep: Rename trace_softirqs_{on,off}() Continue what commit: d820ac4c2fa8 ("locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit]") started, rename these to avoid confusing them with tracepoints. git grep -l "trace_softirqs_\(on\|off\)" | while read file; do sed -ie 's/trace_softirqs_\(on\|off\)/lockdep_softirqs_\1/g' $file; done Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200320115859.119434738@infradead.org --- include/linux/irqflags.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 7c4e64589250..7ca1f2126ae1 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -15,15 +15,15 @@ #include #include -/* Currently trace_softirqs_on/off is used only by lockdep */ +/* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING - extern void trace_softirqs_on(unsigned long ip); - extern void trace_softirqs_off(unsigned long ip); + extern void lockdep_softirqs_on(unsigned long ip); + extern void lockdep_softirqs_off(unsigned long ip); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); #else - static inline void trace_softirqs_on(unsigned long ip) { } - static inline void trace_softirqs_off(unsigned long ip) { } + static inline void lockdep_softirqs_on(unsigned long ip) { } + static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } #endif -- cgit v1.2.3 From ef996916e78e03d25e56c2d372e5e21fdb471882 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Mar 2020 12:56:42 +0100 Subject: lockdep: Rename trace_{hard,soft}{irq_context,irqs_enabled}() Continue what commit: d820ac4c2fa8 ("locking: rename trace_softirq_[enter|exit] => lockdep_softirq_[enter|exit]") started, rename these to avoid confusing them with tracepoints. git grep -l "trace_\(soft\|hard\)\(irq_context\|irqs_enabled\)" | while read file; do sed -ie 's/trace_\(soft\|hard\)\(irq_context\|irqs_enabled\)/lockdep_\1\2/g' $file; done Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Acked-by: Will Deacon Link: https://lkml.kernel.org/r/20200320115859.178626842@infradead.org --- include/linux/irqflags.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 7ca1f2126ae1..f4c3907e241a 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -31,10 +31,10 @@ #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define trace_hardirq_context(p) ((p)->hardirq_context) -# define trace_softirq_context(p) ((p)->softirq_context) -# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) -# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) +# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_softirq_context(p) ((p)->softirq_context) +# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ current->hardirq_context++; \ @@ -54,10 +54,10 @@ do { \ #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define trace_hardirq_context(p) 0 -# define trace_softirq_context(p) 0 -# define trace_hardirqs_enabled(p) 0 -# define trace_softirqs_enabled(p) 0 +# define lockdep_hardirq_context(p) 0 +# define lockdep_softirq_context(p) 0 +# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) -- cgit v1.2.3 From 141f15c66d9472c642f38aae010ed68289036d7c Mon Sep 17 00:00:00 2001 From: Denis Osterland-Heim Date: Sat, 21 Mar 2020 08:15:53 +0000 Subject: leds: pwm: remove header The header is only used by leds_pwm.c, so move contents to leds_pwm.c and remove it. Apply minor changes suggested by checkpatch. Remove deprecated and unused pwm_id member. Suggested-by: Pavel Machek Signed-off-by: Denis Osterland-Heim Signed-off-by: Pavel Machek --- include/linux/leds_pwm.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/leds_pwm.h (limited to 'include/linux') diff --git a/include/linux/leds_pwm.h b/include/linux/leds_pwm.h deleted file mode 100644 index 93d101d28943..000000000000 --- a/include/linux/leds_pwm.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * PWM LED driver data - see drivers/leds/leds-pwm.c - */ -#ifndef __LINUX_LEDS_PWM_H -#define __LINUX_LEDS_PWM_H - -struct led_pwm { - const char *name; - const char *default_trigger; - unsigned pwm_id __deprecated; - u8 active_low; - unsigned max_brightness; - unsigned pwm_period_ns; -}; - -struct led_pwm_platform_data { - int num_leds; - struct led_pwm *leds; -}; - -#endif -- cgit v1.2.3 From 9c40365a65d62d7c06a95fb331b3442cb02d2fd9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 2 Mar 2020 12:29:39 +0100 Subject: threads: Update PID limit comment according to futex UAPI change The futex UAPI changed back in commit 76b81e2b0e22 ("[PATCH] lightweight robust futexes updates 2"), which landed in v2.6.17: FUTEX_TID_MASK is now 0x3fffffff instead of 0x1fffffff. Update the corresponding comment in include/linux/threads.h. Documentation mentions that only the lower 29 bits are available for TID storage, but as the UAPI header released the bit already via FUTEX_TID_MASK, this is moot as well. Fix it up. [ tglx: Fixed up documentation ] Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200302112939.8068-1-jannh@google.com --- include/linux/threads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/threads.h b/include/linux/threads.h index 3086dba525e2..18d5a74bcc3d 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -29,7 +29,7 @@ /* * A maximum of 4 million PIDs should be enough for a while. - * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.] + * [NOTE: PID/TIDs are limited to 2^30 ~= 1 billion, see FUTEX_TID_MASK.] */ #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) -- cgit v1.2.3 From fad7c9020948eab2bc4661eade4e1ef357279590 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 22 Mar 2020 17:57:02 +0100 Subject: err.h: remove deprecated PTR_RET for good MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially, commit fa9ee9c4b988 ("include/linux/err.h: add a function to cast error-pointers to a return value") from Uwe Kleine-König introduced PTR_RET in 03/2011. Then, in 07/2013, commit 6e8b8726ad50 ("PTR_RET is now PTR_ERR_OR_ZERO") from Rusty Russell renamed PTR_RET to PTR_ERR_OR_ZERO, and left PTR_RET as deprecated-marked alias. After six years since the renaming and various repeated cleanups in the meantime, it is time to finally remove the deprecated PTR_RET for good. Signed-off-by: Lukas Bulwahn Acked-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- include/linux/err.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/err.h b/include/linux/err.h index 87be24350e91..a139c64aef2a 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -62,9 +62,6 @@ static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) return 0; } -/* Deprecated */ -#define PTR_RET(p) PTR_ERR_OR_ZERO(p) - #endif #endif /* _LINUX_ERR_H */ -- cgit v1.2.3 From c84ef3c5e65ccf99a7a91a4d731ebb5d6331a178 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 3 Mar 2020 19:59:26 +0530 Subject: f2fs: Add a new CP flag to help fsck fix resize SPO issues Add and set a new CP flag CP_RESIZEFS_FLAG during online resize FS to help fsck fix the metadata mismatch that may happen due to SPO during resize, where SB got updated but CP data couldn't be written yet. fsck errors - Info: CKPT version = 6ed7bccb Wrong user_block_count(2233856) [f2fs_do_mount:3365] Checkpoint is polluted Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3df..3c383ddd92dd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -125,6 +125,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_RESIZEFS_FLAG 0x00004000 #define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 -- cgit v1.2.3 From 086b2d78375cffe58f5341359bebec0650793811 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Mar 2020 20:55:20 +0100 Subject: PM: remove s390 specific callbacks ARCH_SAVE_PAGE_KEYS has been introduced in order to be able to save and restore s390 specific storage keys into a hibernation image. With hibernation support removed from s390 there is no point in keeping the callbacks. Acked-by: Christian Borntraeger Acked-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- include/linux/suspend.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2b2055b035ee..4fcc6fd0cbd6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -566,38 +566,4 @@ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ -#ifdef CONFIG_ARCH_SAVE_PAGE_KEYS -/* - * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture - * to save/restore additional information to/from the array of page - * frame numbers in the hibernation image. For s390 this is used to - * save and restore the storage key for each page that is included - * in the hibernation image. - */ -unsigned long page_key_additional_pages(unsigned long pages); -int page_key_alloc(unsigned long pages); -void page_key_free(void); -void page_key_read(unsigned long *pfn); -void page_key_memorize(unsigned long *pfn); -void page_key_write(void *address); - -#else /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ - -static inline unsigned long page_key_additional_pages(unsigned long pages) -{ - return 0; -} - -static inline int page_key_alloc(unsigned long pages) -{ - return 0; -} - -static inline void page_key_free(void) {} -static inline void page_key_read(unsigned long *pfn) {} -static inline void page_key_memorize(unsigned long *pfn) {} -static inline void page_key_write(void *address) {} - -#endif /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ - #endif /* _LINUX_SUSPEND_H */ -- cgit v1.2.3 From d01cd62400b3a42d012069518614d944c35f9c99 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:49 +0200 Subject: uuid: Add inline helpers to import / export UUIDs Sometimes we may need to import UUID from or export to the raw buffer, which is provided outside of kernel and can't be declared as UUID type. With current API this operation will require an explicit casting to one of UUID types and length, that is always a constant derived as sizeof the certain UUID type. Provide a helpful set of inline helpers to minimize developer's effort in the cases when raw buffers are involved. Suggested-by: David Sterba Acked-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 0c631e2a73b6..8e4a5000da03 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -43,6 +43,16 @@ static inline void guid_copy(guid_t *dst, const guid_t *src) memcpy(dst, src, sizeof(guid_t)); } +static inline void import_guid(guid_t *dst, const __u8 *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + +static inline void export_guid(__u8 *dst, const guid_t *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + static inline bool guid_is_null(const guid_t *guid) { return guid_equal(guid, &guid_null); @@ -58,6 +68,16 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src) memcpy(dst, src, sizeof(uuid_t)); } +static inline void import_uuid(uuid_t *dst, const __u8 *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + +static inline void export_uuid(__u8 *dst, const uuid_t *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + static inline bool uuid_is_null(const uuid_t *uuid) { return uuid_equal(uuid, &uuid_null); -- cgit v1.2.3 From 48a2e88f53aea4dface64883157ad3c428132f75 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:50 +0200 Subject: uuid: Provide a GUID generator for raw buffer In some cases we would like to generate a GUID and export it. Though it would require either casting to internal kernel types or an intermediate buffer. Instead we may achieve this by supplying a pointer to raw buffer and make a complimentary API to existing one for UUIDs. Reviewed-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 8e4a5000da03..3780460a9a85 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -84,6 +84,7 @@ static inline bool uuid_is_null(const uuid_t *uuid) } void generate_random_uuid(unsigned char uuid[16]); +void generate_random_guid(unsigned char guid[16]); extern void guid_gen(guid_t *u); extern void uuid_gen(uuid_t *u); -- cgit v1.2.3 From 86eba9d34c41d5c2cffd82febafa34de574da6f7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:52 +0200 Subject: uuid: Remove no more needed macro uuid_le_gen() is no used anymore, remove it for good. Reviewed-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 3780460a9a85..d41b0d3e9474 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -98,7 +98,6 @@ int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); /* backwards compatibility, don't use in new code */ -#define uuid_le_gen(u) guid_gen(u) #define uuid_le_to_bin(guid, u) guid_parse(guid, u) static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) -- cgit v1.2.3 From 6473ea760ca1707ade74de5b57e74189d14f8e10 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:09 +0200 Subject: fsnotify: tidy up FS_ and FAN_ constants Order by value, so the free value ranges are easier to find. Link: https://lore.kernel.org/r/20200319151022.31456-2-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1915bdba2fad..db3cabb4600e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -49,16 +49,15 @@ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ -#define FS_ISDIR 0x40000000 /* event occurred against dir */ -#define FS_IN_ONESHOT 0x80000000 /* only send event once */ - -#define FS_DN_RENAME 0x10000000 /* file renamed */ -#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ - /* This inode cares about things that happen to its children. Always set for * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ +#define FS_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) /* -- cgit v1.2.3 From eae36a2b8324c9dd0a66b2ae32abd4a456e49c39 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:10 +0200 Subject: fsnotify: factor helpers fsnotify_dentry() and fsnotify_file() Most of the code in fsnotify hooks is boiler plate of one or the other. Link: https://lore.kernel.org/r/20200319151022.31456-3-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 99 ++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a2d5d175d3c1..f54936aa0365 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -41,16 +41,36 @@ static inline int fsnotify_parent(const struct path *path, } /* - * Simple wrapper to consolidate calls fsnotify_parent()/fsnotify() when - * an event is on a path. + * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when + * an event is on a file/dentry. */ -static inline int fsnotify_path(struct inode *inode, const struct path *path, - __u32 mask) +static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - int ret = fsnotify_parent(path, NULL, mask); + struct inode *inode = d_inode(dentry); + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_parent(NULL, dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); +} + +static inline int fsnotify_file(struct file *file, __u32 mask) +{ + const struct path *path = &file->f_path; + struct inode *inode = file_inode(file); + int ret; + + if (file->f_mode & FMODE_NONOTIFY) + return 0; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + ret = fsnotify_parent(path, NULL, mask); if (ret) return ret; + return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } @@ -58,19 +78,16 @@ static inline int fsnotify_path(struct inode *inode, const struct path *path, static inline int fsnotify_perm(struct file *file, int mask) { int ret; - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; - if (file->f_mode & FMODE_NONOTIFY) - return 0; if (!(mask & (MAY_READ | MAY_OPEN))) return 0; + if (mask & MAY_OPEN) { fsnotify_mask = FS_OPEN_PERM; if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_path(inode, path, FS_OPEN_EXEC_PERM); + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); if (ret) return ret; @@ -79,10 +96,7 @@ static inline int fsnotify_perm(struct file *file, int mask) fsnotify_mask = FS_ACCESS_PERM; } - if (S_ISDIR(inode->i_mode)) - fsnotify_mask |= FS_ISDIR; - - return fsnotify_path(inode, path, fsnotify_mask); + return fsnotify_file(file, fsnotify_mask); } /* @@ -229,15 +243,7 @@ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_access(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_ACCESS; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_ACCESS); } /* @@ -245,15 +251,7 @@ static inline void fsnotify_access(struct file *file) */ static inline void fsnotify_modify(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_MODIFY; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_MODIFY); } /* @@ -261,16 +259,12 @@ static inline void fsnotify_modify(struct file *file) */ static inline void fsnotify_open(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 mask = FS_OPEN; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -278,16 +272,10 @@ static inline void fsnotify_open(struct file *file) */ static inline void fsnotify_close(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - fmode_t mode = file->f_mode; - __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; + __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : + FS_CLOSE_NOWRITE; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -295,14 +283,7 @@ static inline void fsnotify_close(struct file *file) */ static inline void fsnotify_xattr(struct dentry *dentry) { - struct inode *inode = dentry->d_inode; - __u32 mask = FS_ATTRIB; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_dentry(dentry, FS_ATTRIB); } /* @@ -311,7 +292,6 @@ static inline void fsnotify_xattr(struct dentry *dentry) */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { - struct inode *inode = dentry->d_inode; __u32 mask = 0; if (ia_valid & ATTR_UID) @@ -332,13 +312,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; - if (mask) { - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - } + if (mask) + fsnotify_dentry(dentry, mask); } #endif /* _LINUX_FS_NOTIFY_H */ -- cgit v1.2.3 From a1aae0570a2b806937120921db2c5d3100ca55fc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:11 +0200 Subject: fsnotify: funnel all dirent events through fsnotify_name() Factor out fsnotify_name() from fsnotify_dirent(), so it can also serve link and rename events and use this helper to report all directory entry change events. Both helpers return void because no caller checks their return value. Link: https://lore.kernel.org/r/20200319151022.31456-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f54936aa0365..751da17e003d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -18,16 +18,24 @@ #include /* - * Notify this @dir inode about a change in the directory entry @dentry. + * Notify this @dir inode about a change in a child directory entry. + * The directory entry may have turned positive or negative or its inode may + * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode. */ -static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, - __u32 mask) +static inline void fsnotify_name(struct inode *dir, __u32 mask, + struct inode *child, + const struct qstr *name, u32 cookie) { - return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &dentry->d_name, 0); + fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); +} + +static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } /* Notify this dentry's parent about a child's events. */ @@ -136,10 +144,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, - fs_cookie); - fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, - fs_cookie); + fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); + fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -194,12 +200,13 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ -static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) +static inline void fsnotify_link(struct inode *dir, struct inode *inode, + struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, &new_dentry->d_name, 0); + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } /* -- cgit v1.2.3 From aa93bdc5500cc93ba31afeda1a61610d117947ad Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:12 +0200 Subject: fsnotify: use helpers to access data by data_type Create helpers to access path and inode from different data types. Link: https://lore.kernel.org/r/20200319151022.31456-5-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index db3cabb4600e..5cc838db422a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -212,10 +212,36 @@ struct fsnotify_group { }; }; -/* when calling fsnotify tell it if the data is a path or inode */ -#define FSNOTIFY_EVENT_NONE 0 -#define FSNOTIFY_EVENT_PATH 1 -#define FSNOTIFY_EVENT_INODE 2 +/* When calling fsnotify tell it if the data is a path or inode */ +enum fsnotify_data_type { + FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_PATH, + FSNOTIFY_EVENT_INODE, +}; + +static inline const struct inode *fsnotify_data_inode(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_INODE: + return data; + case FSNOTIFY_EVENT_PATH: + return d_inode(((const struct path *)data)->dentry); + default: + return NULL; + } +} + +static inline const struct path *fsnotify_data_path(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_PATH: + return data; + default: + return NULL; + } +} enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, -- cgit v1.2.3 From 017de65fe58f2b0ca428b5830609520ded5898b9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:13 +0200 Subject: fsnotify: simplify arguments passing to fsnotify_parent() Instead of passing both dentry and path and having to figure out which one to use, pass data/data_type to simplify the code. Link: https://lore.kernel.org/r/20200319151022.31456-6-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 14 ++------------ include/linux/fsnotify_backend.h | 14 ++++++++------ 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 751da17e003d..860018f3e545 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -38,16 +38,6 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } -/* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, - struct dentry *dentry, __u32 mask) -{ - if (!dentry) - dentry = path->dentry; - - return __fsnotify_parent(path, dentry, mask); -} - /* * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when * an event is on a file/dentry. @@ -59,7 +49,7 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + fsnotify_parent(dentry, mask, inode, FSNOTIFY_EVENT_INODE); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } @@ -75,7 +65,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - ret = fsnotify_parent(path, NULL, mask); + ret = fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); if (ret) return ret; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5cc838db422a..337c87cf34d6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -376,9 +376,10 @@ struct fsnotify_mark { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie); -extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask); +extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie); +extern int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, + int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); @@ -533,13 +534,14 @@ static inline void fsnotify_init_event(struct fsnotify_event *event, #else -static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie) +static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie) { return 0; } -static inline int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, + const void *data, int data_type) { return 0; } -- cgit v1.2.3 From 8bf6c677ddb9c922423ea3bf494fe7c508bfbb8c Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Mon, 23 Mar 2020 16:20:19 +0100 Subject: completion: Use lockdep_assert_RT_in_threaded_ctx() in complete_all() The warning was intended to spot complete_all() users from hardirq context on PREEMPT_RT. The warning as-is will also trigger in interrupt handlers, which are threaded on PREEMPT_RT, which was not intended. Use lockdep_assert_RT_in_threaded_ctx() which triggers in non-preemptive context on PREEMPT_RT. Fixes: a5c6234e1028 ("completion: Use simple wait queues") Reported-by: kernel test robot Suggested-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200323152019.4qjwluldohuh3by5@linutronix.de --- include/linux/lockdep.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 425b4ceb7cd0..206774ac6946 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -711,6 +711,21 @@ do { \ # define lockdep_assert_in_irq() do { } while (0) #endif +#ifdef CONFIG_PROVE_RAW_LOCK_NESTING + +# define lockdep_assert_RT_in_threaded_ctx() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + current->hardirq_context && \ + !(current->hardirq_threaded || current->irq_config), \ + "Not in threaded context on PREEMPT_RT as expected\n"); \ +} while (0) + +#else + +# define lockdep_assert_RT_in_threaded_ctx() do { } while (0) + +#endif + #ifdef CONFIG_LOCKDEP void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #else -- cgit v1.2.3 From 5eee3bb7103f4a66e4b90c2817f5e72509a2a607 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 20 Mar 2020 17:51:38 +0100 Subject: net: phy: add and use phy_check_downshift So far PHY drivers have to check whether a downshift occurred to be able to notify the user. To make life of drivers authors a little bit easier move the downshift notification to phylib. phy_check_downshift() compares the highest mutually advertised speed with the actual value of phydev->speed (typically read by the PHY driver from a vendor-specific register) to detect a downshift. v2: - Add downshift hint to phy_print_status Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 36d9dea04016..99b5e3c4b621 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -365,6 +365,7 @@ struct macsec_ops; * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. + * downshifted_rate: Set true if link speed has been downshifted. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * irq: IRQ number of the PHY's interrupt (-1 if none) @@ -405,6 +406,7 @@ struct phy_device { unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; + unsigned downshifted_rate:1; unsigned autoneg:1; /* The most recently read link state */ @@ -698,6 +700,7 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); +void phy_check_downshift(struct phy_device *phydev); /** * phy_read - Convenience function for reading a given PHY register -- cgit v1.2.3 From 5f5323a14cad19323060a8cbf9d96f2280a462dd Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:51 +0800 Subject: iopoll: introduce read_poll_timeout macro this macro is an extension of readx_poll_timeout macro. the accessor function op just supports only one parameter in the readx_poll_timeout macro, but this macro can supports multiple variable parameters for it. so functions like phy_read(struct phy_device *phydev, u32 regnum) and phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) can also use this poll timeout core. and also expand it can sleep some time before read operation. Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/iopoll.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 35e15dfd4155..70f89b389648 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -13,6 +13,50 @@ #include #include +/** + * read_poll_timeout - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @args as its arguments) + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.rst). + * @timeout_us: Timeout in us, 0 means never timeout + * @sleep_before_read: if it is true, sleep @sleep_us before read. + * @args: arguments for @op poll + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ + sleep_before_read, args...) \ +({ \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + might_sleep_if((__sleep_us) != 0); \ + if (sleep_before_read && __sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ + for (;;) { \ + (val) = op(args); \ + if (cond) \ + break; \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = op(args); \ + break; \ + } \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + /** * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs * @op: accessor function (takes @addr as its only argument) -- cgit v1.2.3 From eaa6b01024a74ab5f3064f17dd88596284f497c4 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:52 +0800 Subject: iopoll: redefined readx_poll_timeout macro to simplify the code redefined readx_poll_timeout macro by read_poll_timeout to simplify the code. Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/iopoll.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 70f89b389648..cb20c733b15a 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -76,25 +76,7 @@ * macros defined below rather than this macro directly. */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ -({ \ - u64 __timeout_us = (timeout_us); \ - unsigned long __sleep_us = (sleep_us); \ - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ - might_sleep_if((__sleep_us) != 0); \ - for (;;) { \ - (val) = op(addr); \ - if (cond) \ - break; \ - if (__timeout_us && \ - ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = op(addr); \ - break; \ - } \ - if (__sleep_us) \ - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) + read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) /** * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs -- cgit v1.2.3 From bd971ff0b73927b91f4520621d49d3a801ee4837 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:53 +0800 Subject: net: phy: introduce phy_read_mmd_poll_timeout macro it is sometimes necessary to poll a phy register by phy_read_mmd() function until its value satisfies some condition. introduce phy_read_mmd_poll_timeout() macros that do this. Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/phy.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 99b5e3c4b621..3984f375126e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -787,6 +788,19 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); +#define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \ + sleep_us, timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read_mmd, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, \ + phydev, devaddr, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + /** * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit v1.2.3 From fcbd30d09ba05389cb40cc1769b565df62aead35 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Mon, 23 Mar 2020 23:05:57 +0800 Subject: net: phy: introduce phy_read_poll_timeout macro it is sometimes necessary to poll a phy register by phy_read() function until its value satisfies some condition. introduce phy_read_poll_timeout() macros that do this. Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Signed-off-by: Dejin Zheng Signed-off-by: David S. Miller --- include/linux/phy.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3984f375126e..2432ca463ddc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -717,6 +717,19 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, regnum); } +#define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \ + timeout_us, sleep_before_read) \ +({ \ + int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \ + sleep_us, timeout_us, sleep_before_read, phydev, regnum); \ + if (val < 0) \ + __ret = val; \ + if (__ret) \ + phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ + __ret; \ +}) + + /** * __phy_read - convenience function for reading a given PHY register * @phydev: the phy_device struct -- cgit v1.2.3 From 05635c14a292de0e1a221dc31c04aba3913f03c8 Mon Sep 17 00:00:00 2001 From: Jungseung Lee Date: Wed, 18 Mar 2020 21:06:14 +0900 Subject: mtd: spi-nor: Add SR 4bit block protection support Currently we are supporting block protection only for flash chips with 3 block protection bits (BP0-2) in the SR register. Enable block protection support for flashes with 4 block protection bits (BP0-3). Add a flash_info flag for flashes that describe 4 block protection bits. Add another flash_info flag for flashes in which BP3 bit is not adjacent to the BP0-2 bits. Tested with a n25q512ax3 (BP0-3) and w25q128 (BP0-2). Signed-off-by: Jungseung Lee Reviewed-by: Michael Walle Tested-by: Michael Walle Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index e656858b50a5..1e2af0ec1f03 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -111,7 +111,9 @@ #define SR_BP0 BIT(2) /* Block protect 0 */ #define SR_BP1 BIT(3) /* Block protect 1 */ #define SR_BP2 BIT(4) /* Block protect 2 */ +#define SR_BP3 BIT(5) /* Block protect 3 */ #define SR_TB_BIT5 BIT(5) /* Top/Bottom protect */ +#define SR_BP3_BIT6 BIT(6) /* Block protect 3 */ #define SR_TB_BIT6 BIT(6) /* Top/Bottom protect */ #define SR_SRWD BIT(7) /* SR write protect */ /* Spansion/Cypress specific status bits */ -- cgit v1.2.3 From dfc2d2594e4a79204a3967585245f00644b8f838 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:15 +0200 Subject: fsnotify: replace inode pointer with an object id The event inode field is used only for comparison in queue merges and cannot be dereferenced after handle_event(), because it does not hold a refcount on the inode. Replace it with an abstract id to do the same thing. Link: https://lore.kernel.org/r/20200319151022.31456-8-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 337c87cf34d6..c72cbea20ef7 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -132,8 +132,7 @@ struct fsnotify_ops { */ struct fsnotify_event { struct list_head list; - /* inode may ONLY be dereferenced during handle_event(). */ - struct inode *inode; /* either the inode the event happened to or its parent */ + unsigned long objectid; /* identifier for queue merges */ }; /* @@ -526,10 +525,10 @@ extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); static inline void fsnotify_init_event(struct fsnotify_event *event, - struct inode *inode) + unsigned long objectid) { INIT_LIST_HEAD(&event->list); - event->inode = inode; + event->objectid = objectid; } #else -- cgit v1.2.3 From 890cc39a879906b63912482dfc41944579df2dc6 Mon Sep 17 00:00:00 2001 From: Dejin Zheng Date: Tue, 24 Mar 2020 00:06:08 +0800 Subject: drivers: provide devm_platform_get_and_ioremap_resource() Since commit "drivers: provide devm_platform_ioremap_resource()", it was wrap platform_get_resource() and devm_ioremap_resource() as single helper devm_platform_ioremap_resource(). but now, many drivers still used platform_get_resource() and devm_ioremap_resource() together in the kernel tree. The reason can not be replaced is they still need use the resource variables obtained by platform_get_resource(). so provide this helper. Suggested-by: Geert Uytterhoeven Suggested-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: Dejin Zheng Link: https://lore.kernel.org/r/20200323160612.17277-2-zhengdejin5@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 041bfa412aa0..f242f66db19d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -55,6 +55,9 @@ extern struct device * platform_find_device_by_driver(struct device *start, const struct device_driver *drv); extern void __iomem * +devm_platform_get_and_ioremap_resource(struct platform_device *pdev, + unsigned int index, struct resource **res); +extern void __iomem * devm_platform_ioremap_resource(struct platform_device *pdev, unsigned int index); extern void __iomem * -- cgit v1.2.3 From 166cba71818cd49d7d815fdc6f97c63395e94fc5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:15 +0000 Subject: irqchip/gic-v4.1: Plumb skeletal VSGI irqchip Since GICv4.1 has the capability to inject 16 SGIs into each VPE, and that I'm keen not to invent too many specific interfaces to manipulate these interrupts, let's pretend that each of these SGIs is an actual Linux interrupt. For that matter, let's introduce a minimal irqchip and irqdomain setup that will get fleshed up in the following patches. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200304203330.4967-9-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 439963f4c66a..44e8c19e3d56 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -49,6 +49,11 @@ struct its_vpe { }; /* GICv4.1 implementations */ struct { + struct { + u8 priority; + bool enabled; + bool group; + } sgi_config[16]; atomic_t vmapp_count; }; }; @@ -123,6 +128,8 @@ int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); struct irq_domain_ops; -int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops); +int its_init_v4(struct irq_domain *domain, + const struct irq_domain_ops *vpe_ops, + const struct irq_domain_ops *sgi_ops); #endif -- cgit v1.2.3 From e252cf8a34d92adf41124cb59b19b49d25395548 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:16 +0000 Subject: irqchip/gic-v4.1: Add initial SGI configuration The GICv4.1 ITS has yet another new command (VSGI) which allows a VPE-targeted SGI to be configured (or have its pending state cleared). Add support for this command and plumb it into the activate irqdomain callback so that it is ready to be used. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-10-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b28acfa71f82..fd3be49ac9a5 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -502,8 +502,9 @@ #define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) #define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) #define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) -/* VMOVP and INVDB are the odd ones, as they dont have a physical counterpart */ +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ #define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) #define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) /* -- cgit v1.2.3 From 7017ff0ee1de9d45fafee88a4e7890cce92f482e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:18 +0000 Subject: irqchip/gic-v4.1: Plumb get/set_irqchip_state SGI callbacks To implement the get/set_irqchip_state callbacks (limited to the PENDING state), we have to use a particular set of hacks: - Reading the pending state is done by using a pair of new redistributor registers (GICR_VSGIR, GICR_VSGIPENDR), which allow the 16 interrupts state to be retrieved. - Setting the pending state is done by generating it as we'd otherwise do for a guest (writing to GITS_SGIR). - Clearing the pending state is done by emitting a VSGI command with the "clear" bit set. This requires some interesting locking though: - When talking to the redistributor, we must make sure that the VPE affinity doesn't change, hence taking the VPE lock. - At the same time, we must ensure that nobody accesses the same redistributor's GICR_VSGIR registers for a different VPE, which would corrupt the reading of the pending bits. We thus take the per-RD spinlock. Much fun. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-12-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index fd3be49ac9a5..590cdbeba9d5 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -345,6 +345,15 @@ #define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) #define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + /* * ITS registers, offsets from ITS_base */ @@ -368,6 +377,11 @@ #define GITS_TRANSLATER 0x10040 +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + #define GITS_CTLR_ENABLE (1U << 0) #define GITS_CTLR_ImDe (1U << 1) #define GITS_CTLR_ITS_NUMBER_SHIFT 4 -- cgit v1.2.3 From 05d32df13c6b3c0850b68928048536e9a736d520 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:19 +0000 Subject: irqchip/gic-v4.1: Plumb set_vcpu_affinity SGI callbacks Just like for vLPIs, there is some configuration information that cannot be directly communicated through the normal irqchip API, and we have to use our good old friend set_vcpu_affinity as a side-band communication mechanism. This is used to configure group and priority for a given vSGI. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200304203330.4967-13-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 44e8c19e3d56..1b34100e3536 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -103,6 +103,7 @@ enum its_vcpu_info_cmd_type { SCHEDULE_VPE, DESCHEDULE_VPE, INVALL_VPE, + PROP_UPDATE_VSGI, }; struct its_cmd_info { @@ -115,6 +116,10 @@ struct its_cmd_info { bool g0en; bool g1en; }; + struct { + u8 priority; + bool group; + }; }; }; -- cgit v1.2.3 From ae699ad348cdcd416cbf28e8a02fc468780161f7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:20 +0000 Subject: irqchip/gic-v4.1: Move doorbell management to the GICv4 abstraction layer In order to hide some of the differences between v4.0 and v4.1, move the doorbell management out of the KVM code, and into the GICv4-specific layer. This allows the calling code to ask for the doorbell when blocking, and otherwise to leave the doorbell permanently disabled. This matches the v4.1 code perfectly, and only results in a minor refactoring of the v4.0 code. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-14-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 1b34100e3536..34ed4b5754dd 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -125,7 +125,8 @@ struct its_cmd_info { int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); -int its_schedule_vpe(struct its_vpe *vpe, bool on); +int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); +int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); -- cgit v1.2.3 From 6d31b6ff985dbd144b2c4d519cf573b8f81865d9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:21 +0000 Subject: irqchip/gic-v4.1: Add VSGI allocation/teardown Allocate per-VPE SGIs when initializing the GIC-specific part of the VPE data structure. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-15-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 34ed4b5754dd..b120a01952fe 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -49,6 +49,8 @@ struct its_vpe { }; /* GICv4.1 implementations */ struct { + struct fwnode_handle *fwnode; + struct irq_domain *sgi_domain; struct { u8 priority; bool enabled; -- cgit v1.2.3 From d50676f5ce8481b98f9bbc1514b5d3f8747dd3c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 20:33:22 +0000 Subject: irqchip/gic-v4.1: Add VSGI property setup Add the SGI configuration entry point for KVM to use. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20200304203330.4967-16-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index b120a01952fe..6976b8331b60 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -134,6 +134,7 @@ int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); +int its_prop_update_vsgi(int irq, u8 priority, bool group); struct irq_domain_ops; int its_init_v4(struct irq_domain *domain, -- cgit v1.2.3 From b276527539188f1f61c082ebef27803db93e536d Mon Sep 17 00:00:00 2001 From: Christian Gromm Date: Tue, 10 Mar 2020 14:02:40 +0100 Subject: staging: most: move core files out of the staging area This patch moves the core module to the /drivers/most directory and makes all necessary changes in order to not break the build. Signed-off-by: Christian Gromm Link: https://lore.kernel.org/r/1583845362-26707-2-git-send-email-christian.gromm@microchip.com Signed-off-by: Greg Kroah-Hartman --- include/linux/most.h | 337 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 include/linux/most.h (limited to 'include/linux') diff --git a/include/linux/most.h b/include/linux/most.h new file mode 100644 index 000000000000..232e01b7f5d2 --- /dev/null +++ b/include/linux/most.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * most.h - API for component and adapter drivers + * + * Copyright (C) 2013-2015, Microchip Technology Germany II GmbH & Co. KG + */ + +#ifndef __MOST_CORE_H__ +#define __MOST_CORE_H__ + +#include +#include + +struct module; +struct interface_private; + +/** + * Interface type + */ +enum most_interface_type { + ITYPE_LOOPBACK = 1, + ITYPE_I2C, + ITYPE_I2S, + ITYPE_TSI, + ITYPE_HBI, + ITYPE_MEDIALB_DIM, + ITYPE_MEDIALB_DIM2, + ITYPE_USB, + ITYPE_PCIE +}; + +/** + * Channel direction. + */ +enum most_channel_direction { + MOST_CH_RX = 1 << 0, + MOST_CH_TX = 1 << 1, +}; + +/** + * Channel data type. + */ +enum most_channel_data_type { + MOST_CH_CONTROL = 1 << 0, + MOST_CH_ASYNC = 1 << 1, + MOST_CH_ISOC = 1 << 2, + MOST_CH_SYNC = 1 << 5, +}; + +enum most_status_flags { + /* MBO was processed successfully (data was send or received )*/ + MBO_SUCCESS = 0, + /* The MBO contains wrong or missing information. */ + MBO_E_INVAL, + /* MBO was completed as HDM Channel will be closed */ + MBO_E_CLOSE, +}; + +/** + * struct most_channel_capability - Channel capability + * @direction: Supported channel directions. + * The value is bitwise OR-combination of the values from the + * enumeration most_channel_direction. Zero is allowed value and means + * "channel may not be used". + * @data_type: Supported channel data types. + * The value is bitwise OR-combination of the values from the + * enumeration most_channel_data_type. Zero is allowed value and means + * "channel may not be used". + * @num_buffers_packet: Maximum number of buffers supported by this channel + * for packet data types (Async,Control,QoS) + * @buffer_size_packet: Maximum buffer size supported by this channel + * for packet data types (Async,Control,QoS) + * @num_buffers_streaming: Maximum number of buffers supported by this channel + * for streaming data types (Sync,AV Packetized) + * @buffer_size_streaming: Maximum buffer size supported by this channel + * for streaming data types (Sync,AV Packetized) + * @name_suffix: Optional suffix providean by an HDM that is attached to the + * regular channel name. + * + * Describes the capabilities of a MOST channel like supported Data Types + * and directions. This information is provided by an HDM for the MostCore. + * + * The Core creates read only sysfs attribute files in + * /sys/devices/most/mdev#// with the + * following attributes: + * -available_directions + * -available_datatypes + * -number_of_packet_buffers + * -number_of_stream_buffers + * -size_of_packet_buffer + * -size_of_stream_buffer + * where content of each file is a string with all supported properties of this + * very channel attribute. + */ +struct most_channel_capability { + u16 direction; + u16 data_type; + u16 num_buffers_packet; + u16 buffer_size_packet; + u16 num_buffers_streaming; + u16 buffer_size_streaming; + const char *name_suffix; +}; + +/** + * struct most_channel_config - stores channel configuration + * @direction: direction of the channel + * @data_type: data type travelling over this channel + * @num_buffers: number of buffers + * @buffer_size: size of a buffer for AIM. + * Buffer size may be cutted down by HDM in a configure callback + * to match to a given interface and channel type. + * @extra_len: additional buffer space for internal HDM purposes like padding. + * May be set by HDM in a configure callback if needed. + * @subbuffer_size: size of a subbuffer + * @packets_per_xact: number of MOST frames that are packet inside one USB + * packet. This is USB specific + * + * Describes the configuration for a MOST channel. This information is + * provided from the MostCore to a HDM (like the Medusa PCIe Interface) as a + * parameter of the "configure" function call. + */ +struct most_channel_config { + enum most_channel_direction direction; + enum most_channel_data_type data_type; + u16 num_buffers; + u16 buffer_size; + u16 extra_len; + u16 subbuffer_size; + u16 packets_per_xact; + u16 dbr_size; +}; + +/* + * struct mbo - MOST Buffer Object. + * @context: context for core completion handler + * @priv: private data for HDM + * + * public: documented fields that are used for the communications + * between MostCore and HDMs + * + * @list: list head for use by the mbo's current owner + * @ifp: (in) associated interface instance + * @num_buffers_ptr: amount of pool buffers + * @hdm_channel_id: (in) HDM channel instance + * @virt_address: (in) kernel virtual address of the buffer + * @bus_address: (in) bus address of the buffer + * @buffer_length: (in) buffer payload length + * @processed_length: (out) processed length + * @status: (out) transfer status + * @complete: (in) completion routine + * + * The core allocates and initializes the MBO. + * + * The HDM receives MBO for transfer from the core with the call to enqueue(). + * The HDM copies the data to- or from the buffer depending on configured + * channel direction, set "processed_length" and "status" and completes + * the transfer procedure by calling the completion routine. + * + * Finally, the MBO is being deallocated or recycled for further + * transfers of the same or a different HDM. + * + * Directions of usage: + * The core driver should never access any MBO fields (even if marked + * as "public") while the MBO is owned by an HDM. The ownership starts with + * the call of enqueue() and ends with the call of its complete() routine. + * + * II. + * Every HDM attached to the core driver _must_ ensure that it returns any MBO + * it owns (due to a previous call to enqueue() by the core driver) before it + * de-registers an interface or gets unloaded from the kernel. If this direction + * is violated memory leaks will occur, since the core driver does _not_ track + * MBOs it is currently not in control of. + * + */ +struct mbo { + void *context; + void *priv; + struct list_head list; + struct most_interface *ifp; + int *num_buffers_ptr; + u16 hdm_channel_id; + void *virt_address; + dma_addr_t bus_address; + u16 buffer_length; + u16 processed_length; + enum most_status_flags status; + void (*complete)(struct mbo *mbo); +}; + +/** + * Interface instance description. + * + * Describes an interface of a MOST device the core driver is bound to. + * This structure is allocated and initialized in the HDM. MostCore may not + * modify this structure. + * + * @dev: the actual device + * @mod: module + * @interface Interface type. \sa most_interface_type. + * @description PRELIMINARY. + * Unique description of the device instance from point of view of the + * interface in free text form (ASCII). + * It may be a hexadecimal presentation of the memory address for the MediaLB + * IP or USB device ID with USB properties for USB interface, etc. + * @num_channels Number of channels and size of the channel_vector. + * @channel_vector Properties of the channels. + * Array index represents channel ID by the driver. + * @configure Callback to change data type for the channel of the + * interface instance. May be zero if the instance of the interface is not + * configurable. Parameter channel_config describes direction and data + * type for the channel, configured by the higher level. The content of + * @enqueue Delivers MBO to the HDM for processing. + * After HDM completes Rx- or Tx- operation the processed MBO shall + * be returned back to the MostCore using completion routine. + * The reason to get the MBO delivered from the MostCore after the channel + * is poisoned is the re-opening of the channel by the application. + * In this case the HDM shall hold MBOs and service the channel as usual. + * The HDM must be able to hold at least one MBO for each channel. + * The callback returns a negative value on error, otherwise 0. + * @poison_channel Informs HDM about closing the channel. The HDM shall + * cancel all transfers and synchronously or asynchronously return + * all enqueued for this channel MBOs using the completion routine. + * The callback returns a negative value on error, otherwise 0. + * @request_netinfo: triggers retrieving of network info from the HDM by + * means of "Message exchange over MDP/MEP" + * The call of the function request_netinfo with the parameter on_netinfo as + * NULL prohibits use of the previously obtained function pointer. + * @priv Private field used by mostcore to store context information. + */ +struct most_interface { + struct device *dev; + struct device *driver_dev; + struct module *mod; + enum most_interface_type interface; + const char *description; + unsigned int num_channels; + struct most_channel_capability *channel_vector; + void *(*dma_alloc)(struct mbo *mbo, u32 size); + void (*dma_free)(struct mbo *mbo, u32 size); + int (*configure)(struct most_interface *iface, int channel_idx, + struct most_channel_config *channel_config); + int (*enqueue)(struct most_interface *iface, int channel_idx, + struct mbo *mbo); + int (*poison_channel)(struct most_interface *iface, int channel_idx); + void (*request_netinfo)(struct most_interface *iface, int channel_idx, + void (*on_netinfo)(struct most_interface *iface, + unsigned char link_stat, + unsigned char *mac_addr)); + void *priv; + struct interface_private *p; +}; + +/** + * struct most_component - identifies a loadable component for the mostcore + * @list: list_head + * @name: component name + * @probe_channel: function for core to notify driver about channel connection + * @disconnect_channel: callback function to disconnect a certain channel + * @rx_completion: completion handler for received packets + * @tx_completion: completion handler for transmitted packets + */ +struct most_component { + struct list_head list; + const char *name; + struct module *mod; + int (*probe_channel)(struct most_interface *iface, int channel_idx, + struct most_channel_config *cfg, char *name, + char *param); + int (*disconnect_channel)(struct most_interface *iface, + int channel_idx); + int (*rx_completion)(struct mbo *mbo); + int (*tx_completion)(struct most_interface *iface, int channel_idx); + int (*cfg_complete)(void); +}; + +/** + * most_register_interface - Registers instance of the interface. + * @iface: Pointer to the interface instance description. + * + * Returns a pointer to the kobject of the generated instance. + * + * Note: HDM has to ensure that any reference held on the kobj is + * released before deregistering the interface. + */ +int most_register_interface(struct most_interface *iface); + +/** + * Deregisters instance of the interface. + * @intf_instance Pointer to the interface instance description. + */ +void most_deregister_interface(struct most_interface *iface); +void most_submit_mbo(struct mbo *mbo); + +/** + * most_stop_enqueue - prevents core from enqueing MBOs + * @iface: pointer to interface + * @channel_idx: channel index + */ +void most_stop_enqueue(struct most_interface *iface, int channel_idx); + +/** + * most_resume_enqueue - allow core to enqueue MBOs again + * @iface: pointer to interface + * @channel_idx: channel index + * + * This clears the enqueue halt flag and enqueues all MBOs currently + * in wait fifo. + */ +void most_resume_enqueue(struct most_interface *iface, int channel_idx); +int most_register_component(struct most_component *comp); +int most_deregister_component(struct most_component *comp); +struct mbo *most_get_mbo(struct most_interface *iface, int channel_idx, + struct most_component *comp); +void most_put_mbo(struct mbo *mbo); +int channel_has_mbo(struct most_interface *iface, int channel_idx, + struct most_component *comp); +int most_start_channel(struct most_interface *iface, int channel_idx, + struct most_component *comp); +int most_stop_channel(struct most_interface *iface, int channel_idx, + struct most_component *comp); +int __init configfs_init(void); +int most_register_configfs_subsys(struct most_component *comp); +void most_deregister_configfs_subsys(struct most_component *comp); +int most_add_link(char *mdev, char *mdev_ch, char *comp_name, char *link_name, + char *comp_param); +int most_remove_link(char *mdev, char *mdev_ch, char *comp_name); +int most_set_cfg_buffer_size(char *mdev, char *mdev_ch, u16 val); +int most_set_cfg_subbuffer_size(char *mdev, char *mdev_ch, u16 val); +int most_set_cfg_dbr_size(char *mdev, char *mdev_ch, u16 val); +int most_set_cfg_num_buffers(char *mdev, char *mdev_ch, u16 val); +int most_set_cfg_datatype(char *mdev, char *mdev_ch, char *buf); +int most_set_cfg_direction(char *mdev, char *mdev_ch, char *buf); +int most_set_cfg_packets_xact(char *mdev, char *mdev_ch, u16 val); +int most_cfg_complete(char *comp_name); +void most_interface_register_notify(const char *mdev_name); +#endif /* MOST_CORE_H_ */ -- cgit v1.2.3 From 821747386cb6cd75593a8854208b8af188b4caed Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Mar 2020 11:40:44 +0530 Subject: bus: mhi: core: Pass module owner during client driver registration The module owner field can be used to prevent the removal of kernel modules when there are any device files associated with it opened in userspace. Hence, modify the API to pass module owner field. For convenience, module_mhi_driver() macro is used which takes care of passing the module owner through THIS_MODULE of the module of the driver and also avoiding the use of specifying the default MHI client driver register/unregister routines. Suggested-by: Greg Kroah-Hartman Signed-off-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20200324061050.14845-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 79cb9f898544..d83e7772681b 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -514,11 +514,28 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, */ void mhi_unregister_controller(struct mhi_controller *mhi_cntrl); +/* + * module_mhi_driver() - Helper macro for drivers that don't do + * anything special other than using default mhi_driver_register() and + * mhi_driver_unregister(). This eliminates a lot of boilerplate. + * Each module may only use this macro once. + */ +#define module_mhi_driver(mhi_drv) \ + module_driver(mhi_drv, mhi_driver_register, \ + mhi_driver_unregister) + +/* + * Macro to avoid include chaining to get THIS_MODULE + */ +#define mhi_driver_register(mhi_drv) \ + __mhi_driver_register(mhi_drv, THIS_MODULE) + /** - * mhi_driver_register - Register driver with MHI framework + * __mhi_driver_register - Register driver with MHI framework * @mhi_drv: Driver associated with the device + * @owner: The module owner */ -int mhi_driver_register(struct mhi_driver *mhi_drv); +int __mhi_driver_register(struct mhi_driver *mhi_drv, struct module *owner); /** * mhi_driver_unregister - Unregister a driver for mhi_devices -- cgit v1.2.3 From 0dc64c2b9496ae720809554070ff1ef5b38cbb91 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 21 Jan 2020 15:51:33 +0530 Subject: firmware: xilinx: Add DLL reset support SD DLL resets are required for some of the operations on ZynqMP platform. Add DLL reset support in ZynqMP firmware driver for SD DLL reset. Signed-off-by: Manish Narani Acked-by: Michal Simek Link: https://lore.kernel.org/r/1579602095-30060-3-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- include/linux/firmware/xlnx-zynqmp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 2cd12ebd6826..c8c42214e7fb 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -100,7 +100,8 @@ enum pm_ret_status { }; enum pm_ioctl_id { - IOCTL_SET_SD_TAPDELAY = 7, + IOCTL_SD_DLL_RESET = 6, + IOCTL_SET_SD_TAPDELAY, IOCTL_SET_PLL_FRAC_MODE, IOCTL_GET_PLL_FRAC_MODE, IOCTL_SET_PLL_FRAC_DATA, @@ -271,6 +272,12 @@ enum tap_delay_type { PM_TAPDELAY_OUTPUT, }; +enum dll_reset_type { + PM_DLL_RESET_ASSERT, + PM_DLL_RESET_RELEASE, + PM_DLL_RESET_PULSE, +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID -- cgit v1.2.3 From 511ce378e16f07b66ab78118587b7cc6ac197364 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 12 Feb 2020 12:12:56 +0800 Subject: mmc: Add MMC host software queue support Now the MMC read/write stack will always wait for previous request is completed by mmc_blk_rw_wait(), before sending a new request to hardware, or queue a work to complete request, that will bring context switching overhead and spend some extra time to poll the card for busy completion for I/O writes via sending CMD13, especially for high I/O per second rates, to affect the IO performance. Thus this patch introduces MMC software queue interface based on the hardware command queue engine's interfaces, which is similar with the hardware command queue engine's idea, that can remove the context switching. Moreover we set the default queue depth as 64 for software queue, which allows more requests to be prepared, merged and inserted into IO scheduler to improve performance, but we only allow 2 requests in flight, that is enough to let the irq handler always trigger the next request without a context switch, as well as avoiding a long latency. Moreover the host controller should support HW busy detection for I/O operations when enabling the host software queue. That means, the host controller must not complete a data transfer request, until after the card stops signals busy. From the fio testing data in cover letter, we can see the software queue can improve some performance with 4K block size, increasing about 16% for random read, increasing about 90% for random write, though no obvious improvement for sequential read and write. Moreover we can expand the software queue interface to support MMC packed request or packed command in future. Reviewed-by: Arnd Bergmann Signed-off-by: Baolin Wang Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/4409c1586a9b3ed20d57ad2faf6c262fc3ccb6e2.1581478568.git.baolin.wang7@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4c5eb3aa8e72..94e3c87c1a58 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -463,6 +463,9 @@ struct mmc_host { bool cqe_enabled; bool cqe_on; + /* Host Software Queue support */ + bool hsq_enabled; + unsigned long private[0] ____cacheline_aligned; }; -- cgit v1.2.3 From 40c96853fef1bdef13d2cd05b21d4f06b2de6321 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 4 Feb 2020 09:54:43 +0100 Subject: mmc: core: Enable re-use of mmc_blk_in_tran_state() To allow subsequent changes to re-use the code from the static function mmc_blk_in_tran_state(), let's move it to a public header. While at it, let's also rename it to mmc_ready_for_data(), as to try to better describe its purpose. Signed-off-by: Ulf Hansson Tested-by: Baolin Wang Tested-by: Ludovic Barre Reviewed-by: Ludovic Barre Link: https://lore.kernel.org/r/20200204085449.32585-7-ulf.hansson@linaro.org --- include/linux/mmc/mmc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 897a87c4c827..4b85ef05a906 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -161,6 +161,16 @@ static inline bool mmc_op_multi(u32 opcode) #define R1_STATE_PRG 7 #define R1_STATE_DIS 8 +static inline bool mmc_ready_for_data(u32 status) +{ + /* + * Some cards mishandle the status bits, so make sure to check both the + * busy indication and the card state. + */ + return status & R1_READY_FOR_DATA && + R1_CURRENT_STATE(status) == R1_STATE_TRAN; +} + /* * MMC/SD in SPI mode reports R1 status always, and R2 for SEND_STATUS * R1 is the low order byte; R2 is the next highest byte, when present. -- cgit v1.2.3 From 8ee5fc0e0b3bed087b38f46f4c272b64c0b33d47 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Wed, 8 Jan 2020 20:39:19 +0530 Subject: mmc: sdhci_am654: Update OTAPDLY writes According to the latest AM65x Data Manual[1], a different output tap delay value is optimum for a given speed mode. Therefore, deprecate the ti,otap-del-sel binding and introduce a new binding for each of the possible MMC/SD speed modes. If the legacy mode is not found, fall back to old binding to maintain dts compatibility. [1] http://www.ti.com/lit/gpn/am6526 Signed-off-by: Faiz Abbas Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200108150920.14547-3-faiz_abbas@ti.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 94e3c87c1a58..f5db24a6a860 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -322,6 +322,8 @@ struct mmc_host { #define MMC_CAP_3_3V_DDR (1 << 11) /* Host supports eMMC DDR 3.3V */ #define MMC_CAP_1_8V_DDR (1 << 12) /* Host supports eMMC DDR 1.8V */ #define MMC_CAP_1_2V_DDR (1 << 13) /* Host supports eMMC DDR 1.2V */ +#define MMC_CAP_DDR (MMC_CAP_3_3V_DDR | MMC_CAP_1_8V_DDR | \ + MMC_CAP_1_2V_DDR) #define MMC_CAP_POWER_OFF_CARD (1 << 14) /* Can power off after boot */ #define MMC_CAP_BUS_WIDTH_TEST (1 << 15) /* CMD14/CMD19 bus width ok */ #define MMC_CAP_UHS_SDR12 (1 << 16) /* Host supports UHS SDR12 mode */ -- cgit v1.2.3 From 5bd2acdcdde22231057b12ee8978dd87c29ff322 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 19 Feb 2020 16:25:49 +0800 Subject: mmc: sdhci-esdhc-imx: add strobe-dll-delay-target support strobe-dll-delay-target is the delay cell add on the strobe line. Strobe line the the uSDHC loopback read clock which is use in HS400 mode. Different strobe-dll-delay-target may need to set for different board/SoC. If this delay cell is not set to an appropriate value, we may see some read operation meet CRC error after HS400 mode select which already pass the tuning. This patch add the strobe-dll-delay-target setting in driver, so that user can easily config this delay cell in dts file. Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1582100757-20683-1-git-send-email-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- include/linux/platform_data/mmc-esdhc-imx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index 6c006078c8a1..0434f68eda86 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -37,5 +37,6 @@ struct esdhc_platform_data { unsigned int delay_line; unsigned int tuning_step; /* The delay cell steps in tuning procedure */ unsigned int tuning_start_tap; /* The start delay cell point in tuning procedure */ + unsigned int strobe_dll_delay_target; /* The delay cell for strobe pad (read clock) */ }; #endif /* __ASM_ARCH_IMX_ESDHC_H */ -- cgit v1.2.3 From 1a91a36aba9c232c73e4a5fce038147f5d29e566 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 16:31:25 -0600 Subject: mmc: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200226223125.GA20630@embeddedor Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 +- include/linux/mmc/sdio_func.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f5db24a6a860..c318fb5b6a94 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -468,7 +468,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; - unsigned long private[0] ____cacheline_aligned; + unsigned long private[] ____cacheline_aligned; }; struct device_node; diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 5a177f7a83c3..fa2aaab5e57a 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -25,7 +25,7 @@ struct sdio_func_tuple { struct sdio_func_tuple *next; unsigned char code; unsigned char size; - unsigned char data[0]; + unsigned char data[]; }; /* -- cgit v1.2.3 From ea3edd4dc23027083fbb4a73b65114d08fe73a76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:11 +0100 Subject: block: remove __bdevname There is no good reason for __bdevname to exist. Just open code printing the string in the callers. For three of them the format string can be trivially merged into existing printk statements, and in init/do_mounts.c we can at least do the scnprintf once at the start of the function, and unconditional of CONFIG_BLOCK to make the output for tiny configfs a little more helpful. Acked-by: Theodore Ts'o # for ext4 Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..561b35e3b95b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2699,7 +2699,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name) #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_MAX 512 -extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern void blkdev_show(struct seq_file *,off_t); -- cgit v1.2.3 From 3ad5cee5cd000dc05e6c2410b06fc1d818e7b1e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:13 +0100 Subject: block: move sysfs methods shared by disks and partitions to genhd.c Move the sysfs _show methods that are used both on the full disk and partition nodes to genhd.c instead of hiding them in the partitioning code. Also move the declaration for these methods to block/blk.h so that we don't expose them to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c1a816785c8..612d38fce55c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -706,20 +706,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -extern ssize_t part_size_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); -#ifdef CONFIG_FAIL_MAKE_REQUEST -extern ssize_t part_fail_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count); -#endif /* CONFIG_FAIL_MAKE_REQUEST */ - #define alloc_disk_node(minors, node_id) \ ({ \ static struct lock_class_key __key; \ -- cgit v1.2.3 From f17c21c1ecb80e957bafa07d6454836854be7cf2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:14 +0100 Subject: block: remove alloc_part_info and free_part_info There isn't any good reason not to simply open code the allocation and freeing of the partition_meta_info structure. Especially as one of the branches in alloc_part_info is entirely dead code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 612d38fce55c..77f8bb8edfcd 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -465,19 +465,6 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw); -static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) -{ - if (disk) - return kzalloc_node(sizeof(struct partition_meta_info), - GFP_KERNEL, disk->node_id); - return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); -} - -static inline void free_part_info(struct hd_struct *part) -{ - kfree(part->info); -} - void update_io_ticks(struct hd_struct *part, unsigned long now); /* block/genhd.c */ @@ -755,7 +742,7 @@ static inline void hd_struct_kill(struct hd_struct *part) static inline void hd_free_part(struct hd_struct *part) { free_part_stats(part); - free_part_info(part); + kfree(part->info); percpu_ref_exit(&part->ref); } -- cgit v1.2.3 From 1a9fba3a77a5b39d1c9e1611758303f2649474e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:18 +0100 Subject: block: unexport read_dev_sector and put_dev_sector read_dev_sector and put_dev_sector are now only used by the partition parsing code. Remove the export for read_dev_sector and merge it into the only caller. Clean the mess up a bit by using goto labels and the SECTOR_SHIFT constant. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f629d40c645c..53a1325efbc3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1484,15 +1484,6 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } -typedef struct {struct page *v;} Sector; - -unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); - -static inline void put_dev_sector(Sector p) -{ - put_page(p.v); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From 74cc979c3c7f8328b24651daf15280f07533e735 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:19 +0100 Subject: block: cleanup how md_autodetect_dev is called Add a new include/linux/raid/detect.h header to declare the md_autodetect_dev prototype which can be shared between md and the partition code. Then use IS_BUILTIN to call it instead of the ifdef magic. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/raid/detect.h | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 include/linux/raid/detect.h (limited to 'include/linux') diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h new file mode 100644 index 000000000000..37dd3f40cd31 --- /dev/null +++ b/include/linux/raid/detect.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +void md_autodetect_dev(dev_t dev); -- cgit v1.2.3 From 1442f76d4317b420580e11238d20789708c742a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:26 +0100 Subject: block: move struct partition out of genhd.h struct partition is the on-disk format of a MSDOS partition table entry. Move it out of genhd.h into a new msdos_partition.h header and give it a msdos_ prefix to avoid confusion. Also move the magic number from block/partitions/msdos.h to the new header so that it can be used by the SCSI drivers looking at the DOS partition tables. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ------------- include/linux/msdos_partition.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 include/linux/msdos_partition.h (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 77f8bb8edfcd..3d84da9ec0fa 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -70,19 +70,6 @@ enum { #include #include -struct partition { - unsigned char boot_ind; /* 0x80 - active */ - unsigned char head; /* starting head */ - unsigned char sector; /* starting sector */ - unsigned char cyl; /* starting cylinder */ - unsigned char sys_ind; /* What partition type */ - unsigned char end_head; /* end head */ - unsigned char end_sector; /* end sector */ - unsigned char end_cyl; /* end cylinder */ - __le32 start_sect; /* starting sector counting from 0 */ - __le32 nr_sects; /* nr of sectors in partition */ -} __attribute__((packed)); - struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h new file mode 100644 index 000000000000..a8e2c1b4bc66 --- /dev/null +++ b/include/linux/msdos_partition.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MSDOS_PARTITION_H +#define _LINUX_MSDOS_PARTITION_H + +#define MSDOS_LABEL_MAGIC 0xAA55 + +struct msdos_partition { + u8 boot_ind; /* 0x80 - active */ + u8 head; /* starting head */ + u8 sector; /* starting sector */ + u8 cyl; /* starting cylinder */ + u8 sys_ind; /* What partition type */ + u8 end_head; /* end head */ + u8 end_sector; /* end sector */ + u8 end_cyl; /* end cylinder */ + __le32 start_sect; /* starting sector counting from 0 */ + __le32 nr_sects; /* nr of sectors in partition */ +} __packed; + +#endif /* LINUX_MSDOS_PARTITION_H */ -- cgit v1.2.3 From 0226e9ead44b2eb8f2471d24e0b0c5ff60bc322c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:27 +0100 Subject: block: move the *_PARTITION enum out of genhd.h The enum containing the *_PARTITION symbolic names is only relevant for the partition parser. More specifically most values are MSDOS partition table system indicators and thus should go straight into msdos.c. One value is only used by the sun partition parser, and the sun and sgi partition parsers use the same value as the x86 Linux RAID indicator to also indicate RAID autodetection. Duplicate them in sun.c and sgi.c given that the different partition types use entirely different values otherwise. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 30 ------------------------------ include/linux/msdos_partition.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d84da9ec0fa..3050b0ee9cc7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -30,36 +30,6 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; -enum { -/* These three have identical behaviour; use the second one if DOS FDISK gets - confused about extended/logical partitions starting past cylinder 1023. */ - DOS_EXTENDED_PARTITION = 5, - LINUX_EXTENDED_PARTITION = 0x85, - WIN98_EXTENDED_PARTITION = 0x0f, - - SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION, - - LINUX_SWAP_PARTITION = 0x82, - LINUX_DATA_PARTITION = 0x83, - LINUX_LVM_PARTITION = 0x8e, - LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ - - SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, - NEW_SOLARIS_X86_PARTITION = 0xbf, - - DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ - DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ - DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ - EZD_PARTITION = 0x55, /* EZ-DRIVE */ - - FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ - OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ - NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ - BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ - MINIX_PARTITION = 0x81, /* Minix Partition ID */ - UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ -}; - #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h index a8e2c1b4bc66..e151af072cd1 100644 --- a/include/linux/msdos_partition.h +++ b/include/linux/msdos_partition.h @@ -17,4 +17,35 @@ struct msdos_partition { __le32 nr_sects; /* nr of sectors in partition */ } __packed; +enum msdos_sys_ind { + /* + * These three have identical behaviour; use the second one if DOS FDISK + * gets confused about extended/logical partitions starting past + * cylinder 1023. + */ + DOS_EXTENDED_PARTITION = 5, + LINUX_EXTENDED_PARTITION = 0x85, + WIN98_EXTENDED_PARTITION = 0x0f, + + LINUX_SWAP_PARTITION = 0x82, + LINUX_DATA_PARTITION = 0x83, + LINUX_LVM_PARTITION = 0x8e, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ + + SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, + NEW_SOLARIS_X86_PARTITION = 0xbf, + + DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ + DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ + DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ + EZD_PARTITION = 0x55, /* EZ-DRIVE */ + + FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ + OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ + NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ + BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ + MINIX_PARTITION = 0x81, /* Minix Partition ID */ + UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ +}; + #endif /* LINUX_MSDOS_PARTITION_H */ -- cgit v1.2.3 From cb0ab52652123c47cb72e665ce9fdd3029dcb175 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:28 +0100 Subject: partitions/msdos: remove LINUX_SWAP_PARTITION Just always use NEW_SOLARIS_X86_PARTITION and explain the situation, as that is less confusing than two names for a single value. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/msdos_partition.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h index e151af072cd1..2cb82db2a43c 100644 --- a/include/linux/msdos_partition.h +++ b/include/linux/msdos_partition.h @@ -27,12 +27,11 @@ enum msdos_sys_ind { LINUX_EXTENDED_PARTITION = 0x85, WIN98_EXTENDED_PARTITION = 0x0f, - LINUX_SWAP_PARTITION = 0x82, LINUX_DATA_PARTITION = 0x83, LINUX_LVM_PARTITION = 0x8e, LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ - SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, + SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */ NEW_SOLARIS_X86_PARTITION = 0xbf, DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ -- cgit v1.2.3 From 3f4fc59c1321b74df27dcd5d77b37989ed93265b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Mar 2020 08:25:29 +0100 Subject: block: move the various x86 Unix label formats out of genhd.h All these are just used in block/partitions/msdos.c, so move them out of the genhd.h driver included by every driver. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 143 -------------------------------------------------- 1 file changed, 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3050b0ee9cc7..da62b44b15be 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -473,149 +473,6 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -#ifdef CONFIG_SOLARIS_X86_PARTITION - -#define SOLARIS_X86_NUMSLICE 16 -#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) - -struct solaris_x86_slice { - __le16 s_tag; /* ID tag of partition */ - __le16 s_flag; /* permission flags */ - __le32 s_start; /* start sector no of partition */ - __le32 s_size; /* # of blocks in partition */ -}; - -struct solaris_x86_vtoc { - unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */ - __le32 v_sanity; /* to verify vtoc sanity */ - __le32 v_version; /* layout version */ - char v_volume[8]; /* volume name */ - __le16 v_sectorsz; /* sector size in bytes */ - __le16 v_nparts; /* number of partitions */ - unsigned int v_reserved[10]; /* free space */ - struct solaris_x86_slice - v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ - unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */ - char v_asciilabel[128]; /* for compatibility */ -}; - -#endif /* CONFIG_SOLARIS_X86_PARTITION */ - -#ifdef CONFIG_BSD_DISKLABEL -/* - * BSD disklabel support by Yossi Gottlieb - * updated by Marc Espie - */ - -/* check against BSD src/sys/sys/disklabel.h for consistency */ - -#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ -#define BSD_MAXPARTITIONS 16 -#define OPENBSD_MAXPARTITIONS 16 -#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ -struct bsd_disklabel { - __le32 d_magic; /* the magic number */ - __s16 d_type; /* drive type */ - __s16 d_subtype; /* controller/d_type specific */ - char d_typename[16]; /* type name, e.g. "eagle" */ - char d_packname[16]; /* pack identifier */ - __u32 d_secsize; /* # of bytes per sector */ - __u32 d_nsectors; /* # of data sectors per track */ - __u32 d_ntracks; /* # of tracks per cylinder */ - __u32 d_ncylinders; /* # of data cylinders per unit */ - __u32 d_secpercyl; /* # of data sectors per cylinder */ - __u32 d_secperunit; /* # of data sectors per unit */ - __u16 d_sparespertrack; /* # of spare sectors per track */ - __u16 d_sparespercyl; /* # of spare sectors per cylinder */ - __u32 d_acylinders; /* # of alt. cylinders per unit */ - __u16 d_rpm; /* rotational speed */ - __u16 d_interleave; /* hardware sector interleave */ - __u16 d_trackskew; /* sector 0 skew, per track */ - __u16 d_cylskew; /* sector 0 skew, per cylinder */ - __u32 d_headswitch; /* head switch time, usec */ - __u32 d_trkseek; /* track-to-track seek, usec */ - __u32 d_flags; /* generic flags */ -#define NDDATA 5 - __u32 d_drivedata[NDDATA]; /* drive-type specific information */ -#define NSPARE 5 - __u32 d_spare[NSPARE]; /* reserved for future use */ - __le32 d_magic2; /* the magic number (again) */ - __le16 d_checksum; /* xor of data incl. partitions */ - - /* filesystem and partition information: */ - __le16 d_npartitions; /* number of partitions in following */ - __le32 d_bbsize; /* size of boot area at sn0, bytes */ - __le32 d_sbsize; /* max size of fs superblock, bytes */ - struct bsd_partition { /* the partition table */ - __le32 p_size; /* number of sectors in partition */ - __le32 p_offset; /* starting sector */ - __le32 p_fsize; /* filesystem basic fragment size */ - __u8 p_fstype; /* filesystem type, see below */ - __u8 p_frag; /* filesystem fragments per block */ - __le16 p_cpg; /* filesystem cylinders per group */ - } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ -}; - -#endif /* CONFIG_BSD_DISKLABEL */ - -#ifdef CONFIG_UNIXWARE_DISKLABEL -/* - * Unixware slices support by Andrzej Krzysztofowicz - * and Krzysztof G. Baranowski - */ - -#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ -#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ -#define UNIXWARE_NUMSLICE 16 -#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ - -struct unixware_slice { - __le16 s_label; /* label */ - __le16 s_flags; /* permission flags */ - __le32 start_sect; /* starting sector */ - __le32 nr_sects; /* number of sectors in slice */ -}; - -struct unixware_disklabel { - __le32 d_type; /* drive type */ - __le32 d_magic; /* the magic number */ - __le32 d_version; /* version number */ - char d_serial[12]; /* serial number of the device */ - __le32 d_ncylinders; /* # of data cylinders per device */ - __le32 d_ntracks; /* # of tracks per cylinder */ - __le32 d_nsectors; /* # of data sectors per track */ - __le32 d_secsize; /* # of bytes per sector */ - __le32 d_part_start; /* # of first sector of this partition */ - __le32 d_unknown1[12]; /* ? */ - __le32 d_alt_tbl; /* byte offset of alternate table */ - __le32 d_alt_len; /* byte length of alternate table */ - __le32 d_phys_cyl; /* # of physical cylinders per device */ - __le32 d_phys_trk; /* # of physical tracks per cylinder */ - __le32 d_phys_sec; /* # of physical sectors per track */ - __le32 d_phys_bytes; /* # of physical bytes per sector */ - __le32 d_unknown2; /* ? */ - __le32 d_unknown3; /* ? */ - __le32 d_pad[8]; /* pad */ - - struct unixware_vtoc { - __le32 v_magic; /* the magic number */ - __le32 v_version; /* version number */ - char v_name[8]; /* volume name */ - __le16 v_nslices; /* # of slices */ - __le16 v_unknown1; /* ? */ - __le32 v_reserved[10]; /* reserved */ - struct unixware_slice - v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ - } vtoc; - -}; /* 408 */ - -#endif /* CONFIG_UNIXWARE_DISKLABEL */ - -#ifdef CONFIG_MINIX_SUBPARTITION -# define MINIX_NR_SUBPARTITIONS 4 -#endif /* CONFIG_MINIX_SUBPARTITION */ - #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -- cgit v1.2.3 From c0560f51cf77472f4ed113539b0a02ca6cda7961 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 24 Mar 2020 09:27:56 -0600 Subject: vfio: allow external user to get vfio group from device external user calls vfio_group_get_external_user_from_dev() with a device pointer to get the VFIO group associated with this device. The VFIO group is checked to be vialbe and have IOMMU set. Then container user counter is increased and VFIO group reference is hold to prevent the VFIO group from disposal before external user exits. when the external user finishes using of the VFIO group, it calls vfio_group_put_external_user() to dereference the VFIO group and the container user counter. Suggested-by: Alex Williamson Signed-off-by: Yan Zhao Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e42a711a2800..fb71e0ac0e76 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -94,6 +94,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device + *dev); extern bool vfio_external_group_match_file(struct vfio_group *group, struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); -- cgit v1.2.3 From 8d46c0cca5f4dc0538173d62cd36b1119b5105bc Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 24 Mar 2020 09:27:57 -0600 Subject: vfio: introduce vfio_dma_rw to read/write a range of IOVAs vfio_dma_rw will read/write a range of user space memory pointed to by IOVA into/from a kernel buffer without enforcing pinning the user space memory. TODO: mark the IOVAs to user space memory dirty if they are written in vfio_dma_rw(). Cc: Kevin Tian Signed-off-by: Yan Zhao Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index fb71e0ac0e76..34b2fdf4de6e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -82,6 +82,8 @@ struct vfio_iommu_driver_ops { struct notifier_block *nb); int (*unregister_notifier)(void *iommu_data, struct notifier_block *nb); + int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, + void *data, size_t count, bool write); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -109,6 +111,9 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage); +extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, + void *data, size_t len, bool write); + /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0, -- cgit v1.2.3 From 40280cf7e8ca7d31bb0a9d626f36f458fec32815 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 24 Mar 2020 09:27:57 -0600 Subject: vfio: avoid inefficient operations on VFIO group in vfio_pin/unpin_pages vfio_group_pin_pages() and vfio_group_unpin_pages() are introduced to avoid inefficient search/check/ref/deref opertions associated with VFIO group as those in each calling into vfio_pin_pages() and vfio_unpin_pages(). VFIO group is taken as arg directly. The callers combine search/check/ref/deref operations associated with VFIO group by calling vfio_group_get_external_user()/vfio_group_get_external_user_from_dev() beforehand, and vfio_group_put_external_user() afterwards. Suggested-by: Alex Williamson Signed-off-by: Yan Zhao Signed-off-by: Alex Williamson --- include/linux/vfio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 34b2fdf4de6e..be2bd358b952 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -111,6 +111,12 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage); +extern int vfio_group_pin_pages(struct vfio_group *group, + unsigned long *user_iova_pfn, int npage, + int prot, unsigned long *phys_pfn); +extern int vfio_group_unpin_pages(struct vfio_group *group, + unsigned long *user_iova_pfn, int npage); + extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit v1.2.3 From 5f3874c2a2310d9bd6969ca6764961d27a843b9d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 Mar 2020 09:28:25 -0600 Subject: vfio: Include optional device match in vfio_device_ops callbacks Allow bus drivers to provide their own callback to match a device to the user provided string. Reviewed-by: Cornelia Huck Reviewed-by: Kevin Tian Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e42a711a2800..029694b977f2 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -26,6 +26,9 @@ * operations documented below * @mmap: Perform mmap(2) on a region of the device file descriptor * @request: Request for the bus driver to release the device + * @match: Optional device name match callback (return: 0 for no-match, >0 for + * match, -errno for abort (ex. match with insufficient or incorrect + * additional args) */ struct vfio_device_ops { char *name; @@ -39,6 +42,7 @@ struct vfio_device_ops { unsigned long arg); int (*mmap)(void *device_data, struct vm_area_struct *vma); void (*request)(void *device_data, unsigned int count); + int (*match)(void *device_data, char *buf); }; extern struct iommu_group *vfio_iommu_group_get(struct device *dev); -- cgit v1.2.3 From 86332c343491c6d2228a1e0c80b1ea98a2653d20 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 23 Mar 2020 21:14:24 -0700 Subject: regulator: qcom_smd: Add pmi8994 regulator support The pmi8994 is commonly found on MSM8996 based devices, such as the Dragonboard 820c, where it supplies power to a number of LDOs on the primary PMIC. Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20200324041424.518160-1-bjorn.andersson@linaro.org Signed-off-by: Mark Brown --- include/linux/soc/qcom/smd-rpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 9e4fdd861a51..da304ce8c8f7 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -10,6 +10,7 @@ struct qcom_smd_rpm; /* * Constants used for addressing resources in the RPM. */ +#define QCOM_SMD_RPM_BBYB 0x62796262 #define QCOM_SMD_RPM_BOBB 0x62626f62 #define QCOM_SMD_RPM_BOOST 0x61747362 #define QCOM_SMD_RPM_BUS_CLK 0x316b6c63 -- cgit v1.2.3 From f05a3849f6449f67843113778bf56e02f2b4ddf8 Mon Sep 17 00:00:00 2001 From: "Thomas Hellstrom (VMware)" Date: Tue, 24 Mar 2020 18:46:48 +0100 Subject: fs: Constify vma argument to vma_is_dax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function is used by upcoming vma_is_special_huge() with which we want to use a const vma argument. Since for vma_is_dax() the vma argument is only dereferenced for reading, constify it. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom (VMware) Reviewed-by: Roland Scheidegger Acked-by: Christian König --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..2b38ce5b73ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3391,7 +3391,7 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); } -static inline bool vma_is_dax(struct vm_area_struct *vma) +static inline bool vma_is_dax(const struct vm_area_struct *vma) { return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } -- cgit v1.2.3 From 2484ca9b6a20451debb789d0a89af6f15de99826 Mon Sep 17 00:00:00 2001 From: "Thomas Hellstrom (VMware)" Date: Tue, 24 Mar 2020 18:47:17 +0100 Subject: mm: Introduce vma_is_special_huge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For VM_PFNMAP and VM_MIXEDMAP vmas that want to support transhuge pages and -page table entries, introduce vma_is_special_huge() that takes the same codepaths as vma_is_dax(). The use of "special" follows the definition in memory.c, vm_normal_page(): "Special" mappings do not wish to be associated with a "struct page" (either it doesn't exist, or it exists but they don't want to touch it) For PAGE_SIZE pages, "special" is determined per page table entry to be able to deal with COW pages. But since we don't have huge COW pages, we can classify a vma as either "special huge" or "normal huge". Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom (VMware) Acked-by: Christian König Acked-by: Andrew Morton --- include/linux/mm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c54fb96cb1e6..bdd79a72bb42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2867,6 +2867,23 @@ extern long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, unsigned int pages_per_huge_page, bool allow_pagefault); + +/** + * vma_is_special_huge - Are transhuge page-table entries considered special? + * @vma: Pointer to the struct vm_area_struct to consider + * + * Whether transhuge page-table entries are considered "special" following + * the definition in vm_normal_page(). + * + * Return: true if transhuge page-table entries should be considered special, + * false otherwise. + */ +static inline bool vma_is_special_huge(const struct vm_area_struct *vma) +{ + return vma_is_dax(vma) || (vma->vm_file && + (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #ifdef CONFIG_DEBUG_PAGEALLOC -- cgit v1.2.3 From 9a9731b18c9bb70c023f0b2c731726fd5167673e Mon Sep 17 00:00:00 2001 From: "Thomas Hellstrom (VMware)" Date: Tue, 24 Mar 2020 18:48:09 +0100 Subject: mm: Add vmf_insert_pfn_xxx_prot() for huge page-table entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For graphics drivers needing to modify the page-protection, add huge page-table entries counterparts to vmf_insert_pfn_prot(). Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom (VMware) Acked-by: Christian König Acked-by: Andrew Morton --- include/linux/huge_mm.h | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 5aca3d1bdb32..f63b0882c1b3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -47,8 +47,45 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write); + +/** + * vmf_insert_pfn_pmd - insert a pmd size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pmd size pfn. See vmf_insert_pfn() for additional info. + * + * Return: vm_fault_t value. + */ +static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, + bool write) +{ + return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write); +} +vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write); + +/** + * vmf_insert_pfn_pud - insert a pud size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pud size pfn. See vmf_insert_pfn() for additional info. + * + * Return: vm_fault_t value. + */ +static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, + bool write) +{ + return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write); +} + enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, -- cgit v1.2.3 From ba5bade4cc0d2013cdf5634dae554693c968a090 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Mar 2020 14:13:46 +0100 Subject: x86/devicetable: Move x86 specific macro out of generic code There is no reason that this gunk is in a generic header file. The wildcard defines need to stay as they are required by file2alias. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/20200320131508.736205164@linutronix.de --- include/linux/mod_devicetable.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..f8b66d43acf6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -667,9 +667,7 @@ struct x86_cpu_id { kernel_ulong_t driver_data; }; -#define X86_FEATURE_MATCH(x) \ - { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x } - +/* Wild cards for x86_cpu_id::vendor, family, model and feature */ #define X86_VENDOR_ANY 0xffff #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 -- cgit v1.2.3 From e6282fc6f889debe4d6eb6332dc6e49739faa5cb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Mar 2020 14:32:11 +0200 Subject: i2c: core: Provide generic definitions for bus frequencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are few maximum bus frequencies being used in the I²C core code. Provide generic definitions for bus frequencies and use them in the core. The drivers may use predefined constants where it is appropriate. Some of them are already using these under slightly different names. We will convert them later to use newly introduced defines. Note, the name of modes are chosen to follow well established naming scheme [1]. These definitions will also help to avoid typos in the numbers that may lead to subtle errors. [1]: https://en.wikipedia.org/wiki/I%C2%B2C#Differences_between_modes Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f834687989f7..72e759328cee 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -39,6 +39,14 @@ enum i2c_slave_event; typedef int (*i2c_slave_cb_t)(struct i2c_client *client, enum i2c_slave_event event, u8 *val); +/* I2C Frequency Modes */ +#define I2C_MAX_STANDARD_MODE_FREQ 100000 +#define I2C_MAX_FAST_MODE_FREQ 400000 +#define I2C_MAX_FAST_MODE_PLUS_FREQ 1000000 +#define I2C_MAX_TURBO_MODE_FREQ 1400000 +#define I2C_MAX_HIGH_SPEED_MODE_FREQ 3400000 +#define I2C_MAX_ULTRA_FAST_MODE_FREQ 5000000 + struct module; struct property_entry; -- cgit v1.2.3 From adc6162b9a0c60a81cf6a107196924526cd186f6 Mon Sep 17 00:00:00 2001 From: Mason Yang Date: Wed, 18 Mar 2020 15:42:27 +0800 Subject: mtd: rawnand: Add support for manufacturer specific suspend/resume operation Patch nand_suspend() & nand_resume() to let manufacturers overwrite suspend/resume operations. Signed-off-by: Mason Yang Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/1584517348-14486-2-git-send-email-masonccyang@mxic.com.tw --- include/linux/mtd/rawnand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 49ed50fb44ab..1e76196f9829 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1064,6 +1064,8 @@ struct nand_legacy { * @lock: lock protecting the suspended field. Also used to * serialize accesses to the NAND device. * @suspended: set to 1 when the device is suspended, 0 when it's not. + * @suspend: [REPLACEABLE] specific NAND device suspend operation + * @resume: [REPLACEABLE] specific NAND device resume operation * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -1119,6 +1121,8 @@ struct nand_chip { struct mutex lock; unsigned int suspended : 1; + int (*suspend)(struct nand_chip *chip); + void (*resume)(struct nand_chip *chip); uint8_t *oob_poi; struct nand_controller *controller; -- cgit v1.2.3 From 995bb1092326b8ba8fa29456c334ac6a49765ccd Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Mon, 16 Mar 2020 13:14:32 -0700 Subject: extcon: Mark extcon_get_edev_name() function as exported symbol extcon_get_edev_name() function provides client driver to request extcon dev's name. If extcon driver and client driver are compiled as loadable modules, extcon_get_edev_name() function symbol is not visible to client driver. Hence mark extcon_find_edev_name() function as exported symbol. Signed-off-by: Mayank Rana Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 1b1d77ec2114..fd183fb9c20f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -286,6 +286,11 @@ static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, { return ERR_PTR(-ENODEV); } + +static inline const char *extcon_get_edev_name(struct extcon_dev *edev) +{ + return NULL; +} #endif /* CONFIG_EXTCON */ /* -- cgit v1.2.3 From 6d7434931ac36ff649c1ba09380d0799fea84795 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 8 Jan 2020 19:35:49 +0900 Subject: PM / devfreq: Remove unneeded extern keyword Remove unneeded extern keyword from devfreq-related header file and adjust the indentation of function parameter to keep the consistency in header file Reviewed-by: Lukasz Luba Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 100 ++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index c6f82d4bec9f..82630fafacde 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -201,24 +201,23 @@ struct devfreq_freqs { }; #if defined(CONFIG_PM_DEVFREQ) -extern struct devfreq *devfreq_add_device(struct device *dev, - struct devfreq_dev_profile *profile, - const char *governor_name, - void *data); -extern int devfreq_remove_device(struct devfreq *devfreq); -extern struct devfreq *devm_devfreq_add_device(struct device *dev, - struct devfreq_dev_profile *profile, - const char *governor_name, - void *data); -extern void devm_devfreq_remove_device(struct device *dev, - struct devfreq *devfreq); +struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + const char *governor_name, + void *data); +int devfreq_remove_device(struct devfreq *devfreq); +struct devfreq *devm_devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, + const char *governor_name, + void *data); +void devm_devfreq_remove_device(struct device *dev, struct devfreq *devfreq); /* Supposed to be called by PM callbacks */ -extern int devfreq_suspend_device(struct devfreq *devfreq); -extern int devfreq_resume_device(struct devfreq *devfreq); +int devfreq_suspend_device(struct devfreq *devfreq); +int devfreq_resume_device(struct devfreq *devfreq); -extern void devfreq_suspend(void); -extern void devfreq_resume(void); +void devfreq_suspend(void); +void devfreq_resume(void); /** * update_devfreq() - Reevaluate the device and configure frequency @@ -226,35 +225,34 @@ extern void devfreq_resume(void); * * Note: devfreq->lock must be held */ -extern int update_devfreq(struct devfreq *devfreq); +int update_devfreq(struct devfreq *devfreq); /* Helper functions for devfreq user device driver with OPP. */ -extern struct dev_pm_opp *devfreq_recommended_opp(struct device *dev, - unsigned long *freq, u32 flags); -extern int devfreq_register_opp_notifier(struct device *dev, - struct devfreq *devfreq); -extern int devfreq_unregister_opp_notifier(struct device *dev, - struct devfreq *devfreq); -extern int devm_devfreq_register_opp_notifier(struct device *dev, - struct devfreq *devfreq); -extern void devm_devfreq_unregister_opp_notifier(struct device *dev, - struct devfreq *devfreq); -extern int devfreq_register_notifier(struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list); -extern int devfreq_unregister_notifier(struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list); -extern int devm_devfreq_register_notifier(struct device *dev, +struct dev_pm_opp *devfreq_recommended_opp(struct device *dev, + unsigned long *freq, u32 flags); +int devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq); +int devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq); +int devm_devfreq_register_opp_notifier(struct device *dev, + struct devfreq *devfreq); +void devm_devfreq_unregister_opp_notifier(struct device *dev, + struct devfreq *devfreq); +int devfreq_register_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); +int devfreq_unregister_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); +int devm_devfreq_register_notifier(struct device *dev, struct devfreq *devfreq, struct notifier_block *nb, unsigned int list); -extern void devm_devfreq_unregister_notifier(struct device *dev, +void devm_devfreq_unregister_notifier(struct device *dev, struct devfreq *devfreq, struct notifier_block *nb, unsigned int list); -extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, - int index); +struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** @@ -311,9 +309,9 @@ struct devfreq_passive_data { #else /* !CONFIG_PM_DEVFREQ */ static inline struct devfreq *devfreq_add_device(struct device *dev, - struct devfreq_dev_profile *profile, - const char *governor_name, - void *data) + struct devfreq_dev_profile *profile, + const char *governor_name, + void *data) { return ERR_PTR(-ENOSYS); } @@ -350,31 +348,31 @@ static inline void devfreq_suspend(void) {} static inline void devfreq_resume(void) {} static inline struct dev_pm_opp *devfreq_recommended_opp(struct device *dev, - unsigned long *freq, u32 flags) + unsigned long *freq, u32 flags) { return ERR_PTR(-EINVAL); } static inline int devfreq_register_opp_notifier(struct device *dev, - struct devfreq *devfreq) + struct devfreq *devfreq) { return -EINVAL; } static inline int devfreq_unregister_opp_notifier(struct device *dev, - struct devfreq *devfreq) + struct devfreq *devfreq) { return -EINVAL; } static inline int devm_devfreq_register_opp_notifier(struct device *dev, - struct devfreq *devfreq) + struct devfreq *devfreq) { return -EINVAL; } static inline void devm_devfreq_unregister_opp_notifier(struct device *dev, - struct devfreq *devfreq) + struct devfreq *devfreq) { } @@ -393,22 +391,22 @@ static inline int devfreq_unregister_notifier(struct devfreq *devfreq, } static inline int devm_devfreq_register_notifier(struct device *dev, - struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) { return 0; } static inline void devm_devfreq_unregister_notifier(struct device *dev, - struct devfreq *devfreq, - struct notifier_block *nb, - unsigned int list) + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) { } static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, - int index) + int index) { return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From 7a51320ecd394202d80131ad7837a72ca2213e64 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2020 15:54:14 +0100 Subject: PM / devfreq: Get rid of some doc warnings Mark "void *data" as literal, in order to avoid those doc warnings: ./include/linux/devfreq.h:156: WARNING: Inline emphasis start-string without end-string. ./include/linux/devfreq.h:259: WARNING: Inline emphasis start-string without end-string. ./include/linux/devfreq.h:279: WARNING: Inline emphasis start-string without end-string. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 82630fafacde..57e871a559a9 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -158,7 +158,7 @@ struct devfreq_stats { * functions except for the context of callbacks defined in struct * devfreq_governor, the governor should protect its access with the * struct mutex lock in struct devfreq. A governor may use this mutex - * to protect its own private data in void *data as well. + * to protect its own private data in ``void *data`` as well. */ struct devfreq { struct list_head node; @@ -256,7 +256,7 @@ struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** - * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq + * struct devfreq_simple_ondemand_data - ``void *data`` fed to struct devfreq * and devfreq_add_device * @upthreshold: If the load is over this value, the frequency jumps. * Specify 0 to use the default. Valid value = 0 to 100. @@ -276,7 +276,7 @@ struct devfreq_simple_ondemand_data { #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) /** - * struct devfreq_passive_data - void *data fed to struct devfreq + * struct devfreq_passive_data - ``void *data`` fed to struct devfreq * and devfreq_add_device * @parent: the devfreq instance of parent device. * @get_target_freq: Optional callback, Returns desired operating frequency -- cgit v1.2.3 From d19d2de61fb131abcd29f7c61d3f168f687bfd6e Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Sun, 15 Mar 2020 20:13:37 +0800 Subject: gpio: mmio: introduce BGPIOF_NO_SET_ON_INPUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some gpio controllers ignores pin value writing when that pin is configured as input mode. As a result, bgpio_dir_out should set pin to output before configuring pin values or gpio pin values can't be set up properly. Introduce two variants of bgpio_dir_out: bgpio_dir_out_val_first and bgpio_dir_out_dir_first, and assign direction_output according to a new flag: BGPIOF_NO_SET_ON_INPUT. Signed-off-by: Chuanhong Guo Tested-by: René van Dorst Reviewed-by: Sergio Paracuellos Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6ef05bccc0a6..ed65e00ee977 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -572,6 +572,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ +#define BGPIOF_NO_SET_ON_INPUT BIT(6) int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq); -- cgit v1.2.3 From 9e2ba2c34f1922ca1e0c7d31b30ace5842c2e7d1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:19 +0200 Subject: fanotify: send FAN_DIR_MODIFY event flavor with dir inode and name Dirent events are going to be supported in two flavors: 1. Directory fid info + mask that includes the specific event types (e.g. FAN_CREATE) and an optional FAN_ONDIR flag. 2. Directory fid info + name + mask that includes only FAN_DIR_MODIFY. To request the second event flavor, user needs to set the event type FAN_DIR_MODIFY in the mark mask. The first flavor is supported since kernel v5.1 for groups initialized with flag FAN_REPORT_FID. It is intended to be used for watching directories in "batch mode" - the watcher is notified when directory is changed and re-scans the directory content in response. This event flavor is stored more compactly in the event queue, so it is optimal for workloads with frequent directory changes. The second event flavor is intended to be used for watching large directories, where the cost of re-scan of the directory on every change is considered too high. The watcher getting the event with the directory fid and entry name is expected to call fstatat(2) to query the content of the entry after the change. Legacy inotify events are reported with name and event mask (e.g. "foo", FAN_CREATE | FAN_ONDIR). That can lead users to the conclusion that there is *currently* an entry "foo" that is a sub-directory, when in fact "foo" may be negative or non-dir by the time user gets the event. To make it clear that the current state of the named entry is unknown, when reporting an event with name info, fanotify obfuscates the specific event types (e.g. create,delete,rename) and uses a common event type - FAN_DIR_MODIFY to describe the change. This should make it harder for users to make wrong assumptions and write buggy filesystem monitors. At this point, name info reporting is not yet implemented, so trying to set FAN_DIR_MODIFY in mark mask will return -EINVAL. Link: https://lore.kernel.org/r/20200319151022.31456-12-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 6 ++++++ include/linux/fsnotify_backend.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 860018f3e545..5ab28f6c7d26 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -30,6 +30,12 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, const struct qstr *name, u32 cookie) { fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); + /* + * Send another flavor of the event without child inode data and + * without the specific event type (e.g. FS_CREATE|FS_IS_DIR). + * The name is relative to the dir inode the event is reported to. + */ + fsnotify(dir, FS_DIR_MODIFY, dir, FSNOTIFY_EVENT_INODE, name, 0); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c72cbea20ef7..f0c506405b54 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -47,6 +47,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* This inode cares about things that happen to its children. Always set for @@ -66,7 +67,8 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | \ + FS_DIR_MODIFY) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) -- cgit v1.2.3 From ee0c8e494cc3c135350cd5c4752e82af3feae1ab Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Feb 2020 10:00:32 +0100 Subject: backlight: corgi: Convert to use GPIO descriptors The code in the Corgi backlight driver can be considerably simplified by moving to GPIO descriptors and lookup tables from the board files instead of passing GPIO numbers using the old API. Make sure to encode inversion semantics for the Akita and Spitz platforms inside the GPIO lookup table and drop the custom inversion semantics from the driver. All in-tree users are converted in this patch. Signed-off-by: Linus Walleij Acked-by: Robert Jarzmik Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones --- include/linux/spi/corgi_lcd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h index edf4beccdadb..0b857616919c 100644 --- a/include/linux/spi/corgi_lcd.h +++ b/include/linux/spi/corgi_lcd.h @@ -11,9 +11,6 @@ struct corgi_lcd_platform_data { int default_intensity; int limit_mask; - int gpio_backlight_on; /* -1 if n/a */ - int gpio_backlight_cont; /* -1 if n/a */ - void (*notify)(int intensity); void (*kick_battery)(void); }; -- cgit v1.2.3 From 93ef1429e556f739a208e3968883ed51480580e8 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 23 Mar 2020 13:50:54 +0000 Subject: cpu/hotplug: Add new {add,remove}_cpu() functions The new functions use device_{online,offline}() which are userspace safe. This is in preparation to move cpu_{up, down} kernel users to use a safer interface that is not racy with userspace. Suggested-by: "Paul E. McKenney" Signed-off-by: Qais Yousef Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Link: https://lkml.kernel.org/r/20200323135110.30522-2-qais.yousef@arm.com --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 1ca2baf817ed..cf8cf38dca43 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,6 +89,7 @@ extern ssize_t arch_cpu_release(const char *, size_t); #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; int cpu_up(unsigned int cpu); +int add_cpu(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); @@ -118,6 +119,7 @@ extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); +int remove_cpu(unsigned int cpu); #else /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 0441a5597c5d02ed7abed1d989eaaa1595dedac5 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 23 Mar 2020 13:50:55 +0000 Subject: cpu/hotplug: Create a new function to shutdown nonboot cpus This function will be used later in machine_shutdown() for some architectures. disable_nonboot_cpus() is not safe to use when doing machine_down(), because it relies on freeze_secondary_cpus() which in turn is a suspend/resume related freeze and could abort if the logic detects any pending activities that can prevent finishing the offlining process. Signed-off-by: Qais Yousef Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323135110.30522-3-qais.yousef@arm.com --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index cf8cf38dca43..64a246e9c8db 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); int remove_cpu(unsigned int cpu); +extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ @@ -131,6 +132,7 @@ static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } +static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ /* Wrappers which go away once all code is converted */ -- cgit v1.2.3 From d720f98604391dab6aa3cb4c1bc005ed1aba4703 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 23 Mar 2020 13:51:01 +0000 Subject: cpu/hotplug: Provide bringup_hibernate_cpu() arm64 uses cpu_up() in the resume from hibernation code to ensure that the CPU on which the system hibernated is online. Provide a core function for this. [ tglx: Split out from the combo arm64 patch ] Signed-off-by: Qais Yousef Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Cc: Will Deacon Link: https://lkml.kernel.org/r/20200323135110.30522-9-qais.yousef@arm.com --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 64a246e9c8db..9dc1e892e193 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -93,6 +93,7 @@ int add_cpu(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); +int bringup_hibernate_cpu(unsigned int sleep_cpu); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 -- cgit v1.2.3 From b99a26593b5190fac6b5c1f81a7f8cc128a25c98 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 23 Mar 2020 13:51:09 +0000 Subject: cpu/hotplug: Move bringup of secondary CPUs out of smp_init() This is the last direct user of cpu_up() before it can become an internal implementation detail of the cpu subsystem. Signed-off-by: Qais Yousef Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323135110.30522-17-qais.yousef@arm.com --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9dc1e892e193..8b295f78da25 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -94,6 +94,7 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); +void bringup_nonboot_cpus(unsigned int setup_max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 -- cgit v1.2.3 From 33c3736ec88811b9b6f6ce2cc8967f6b97c3db5e Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 23 Mar 2020 13:51:10 +0000 Subject: cpu/hotplug: Hide cpu_up/down() Use separate functions for the device core to bring a CPU up and down. Users outside the device core must use add/remove_cpu() which will take care of extra housekeeping work like keeping sysfs in sync. Make cpu_up/down() static and replace the extra layer of indirection. [ tglx: Removed the extra wrapper functions and adjusted function names ] Signed-off-by: Qais Yousef Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200323135110.30522-18-qais.yousef@arm.com --- include/linux/cpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8b295f78da25..9ead281157d3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -88,8 +88,8 @@ extern ssize_t arch_cpu_release(const char *, size_t); #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; -int cpu_up(unsigned int cpu); int add_cpu(unsigned int cpu); +int cpu_device_up(struct device *dev); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); @@ -120,8 +120,8 @@ extern void lockdep_assert_cpus_held(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); -int cpu_down(unsigned int cpu); int remove_cpu(unsigned int cpu); +int cpu_device_down(struct device *dev); extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 2b8bd423614c595540eaadcfbc702afe8e155e50 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:04 +0300 Subject: block/diskstats: more accurate approximation of io_ticks for slow disks Currently io_ticks is approximated by adding one at each start and end of requests if jiffies counter has changed. This works perfectly for requests shorter than a jiffy or if one of requests starts/ends at each jiffy. If disk executes just one request at a time and they are longer than two jiffies then only first and last jiffies will be accounted. Fix is simple: at the end of request add up into io_ticks jiffies passed since last update rather than just one jiffy. Example: common HDD executes random read 4k requests around 12ms. fio --name=test --filename=/dev/sdb --rw=randread --direct=1 --runtime=30 & iostat -x 10 sdb Note changes of iostat's "%util" 8,43% -> 99,99% before/after patch: Before: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,60 0,00 330,40 0,00 8,00 0,96 12,09 12,09 0,00 1,02 8,43 After: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,50 0,00 330,00 0,00 8,00 1,00 12,10 12,10 0,00 12,12 99,99 Now io_ticks does not loose time between start and end of requests, but for queue-depth > 1 some I/O time between adjacent starts might be lost. For load estimation "%util" is not as useful as average queue length, but it clearly shows how often disk queue is completely empty. Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index da62b44b15be..13bb51f37b3f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -422,7 +422,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw); -void update_io_ticks(struct hd_struct *part, unsigned long now); +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, -- cgit v1.2.3 From ea18e0f0a63af9064db3d4065d90fa743ae0991b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:06 +0300 Subject: block/diskstats: accumulate all per-cpu counters in one pass Reading /proc/diskstats iterates over all cpus for summing each field. It's faster to sum all fields in one pass. Hammering /proc/diskstats with fio shows 2x performance improvement: fio --name=test --numjobs=$JOBS --filename=/proc/diskstats \ --size=1k --bs=1k --fallocate=none --create_on_open=1 \ --time_based=1 --runtime=10 --invalidate=0 --group_report JOBS=1 JOBS=10 Before: 7k iops 64k iops After: 18k iops 120k iops Also this way code is more compact: add/remove: 1/0 grow/shrink: 0/2 up/down: 194/-1540 (-1346) Function old new delta part_stat_read_all - 194 +194 diskstats_show 1344 631 -713 part_stat_show 1219 392 -827 Total: Before=14966947, After=14965601, chg -0.01% Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 13bb51f37b3f..b0c588d1aa29 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -380,9 +380,6 @@ static inline void free_part_stats(struct hd_struct *part) #endif /* CONFIG_SMP */ -#define part_stat_read_msecs(part, which) \ - div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC) - #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ -- cgit v1.2.3 From 8cd5b8fc00716fb71f6b32d594b38a8f286d6c20 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:08 +0300 Subject: block/diskstats: replace time_in_queue with sum of request times Column "time_in_queue" in diskstats is supposed to show total waiting time of all requests. I.e. value should be equal to the sum of times from other columns. But this is not true, because column "time_in_queue" is counted separately in jiffies rather than in nanoseconds as other times. This patch removes redundant counter for "time_in_queue" and shows total time of read, write, discard and flush requests. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b0c588d1aa29..790fdc3e0b3d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -46,7 +46,6 @@ struct disk_stats { unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; - unsigned long time_in_queue; local_t in_flight[2]; }; -- cgit v1.2.3 From eea9673250db4e854e9998ef9da6d4584857f0ea Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 25 Mar 2020 10:03:36 -0500 Subject: exec: Add exec_update_mutex to replace cred_guard_mutex The cred_guard_mutex is problematic as it is held over possibly indefinite waits for userspace. The possible indefinite waits for userspace that I have identified are: The cred_guard_mutex is held in PTRACE_EVENT_EXIT waiting for the tracer. The cred_guard_mutex is held over "put_user(0, tsk->clear_child_tid)" in exit_mm(). The cred_guard_mutex is held over "get_user(futex_offset, ...") in exit_robust_list. The cred_guard_mutex held over copy_strings. The functions get_user and put_user can trigger a page fault which can potentially wait indefinitely in the case of userfaultfd or if userspace implements part of the page fault path. In any of those cases the userspace process that the kernel is waiting for might make a different system call that winds up taking the cred_guard_mutex and result in deadlock. Holding a mutex over any of those possibly indefinite waits for userspace does not appear necessary. Add exec_update_mutex that will just cover updating the process during exec where the permissions and the objects pointed to by the task struct may be out of sync. The plan is to switch the users of cred_guard_mutex to exec_update_mutex one by one. This lets us move forward while still being careful and not introducing any regressions. Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") Reviewed-by: Kirill Tkhai Signed-off-by: "Eric W. Biederman" Signed-off-by: Bernd Edlinger Signed-off-by: Eric W. Biederman --- include/linux/binfmts.h | 8 +++++++- include/linux/sched/signal.h | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b40fc633f3be..a345d9fed3d8 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,13 @@ struct linux_binprm { * exec has happened. Used to sanitize execution environment * and to set AT_SECURE auxv for glibc. */ - secureexec:1; + secureexec:1, + /* + * Set by flush_old_exec, when exec_mmap has been called. + * This is past the point of no return, when the + * exec_update_mutex has been taken. + */ + called_exec_mmap:1; #ifdef __alpha__ unsigned int taso:1; #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 88050259c466..a29df79540ce 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -224,7 +224,14 @@ struct signal_struct { struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations - * (notably. ptrace) */ + * (notably. ptrace) + * Deprecated do not use in new code. + * Use exec_update_mutex instead. + */ + struct mutex exec_update_mutex; /* Held while task_struct is being + * updated during exec, and may have + * inconsistent permissions. + */ } __randomize_layout; /* -- cgit v1.2.3 From 31eb6186797c149665fd44f3847bdf8d539efa59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:35 +0100 Subject: block: mark block_depr static Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 790fdc3e0b3d..b322e805a916 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -27,7 +27,6 @@ #define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; -extern struct kobject *block_depr; extern struct class block_class; #define DISK_MAX_PARTS 256 -- cgit v1.2.3 From 6005771c17db56b6a9acc12bd084134191560e18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:36 +0100 Subject: block: mark part_in_flight and part_in_flight_rw static Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b322e805a916..c0c5bb51fa56 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -409,9 +409,6 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) -unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); -void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw); void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, -- cgit v1.2.3 From 29125ed624eeb3ac2eb7bca313a8de29c1c84dcd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:40 +0100 Subject: block: move guard_bio_eod to bio.c This is bio layer functionality and not related to buffer heads. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 853d92ceee64..a430e9c1c2d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -471,6 +471,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); +void guard_bio_eod(struct bio *bio); static inline void zero_fill_bio(struct bio *bio) { -- cgit v1.2.3 From 581e26004a09c50e5017caadc850ea17e374a5ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:41 +0100 Subject: block: move block layer internals out of include/linux/genhd.h None of this needs to be exposed to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 120 -------------------------------------------------- 1 file changed, 120 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c0c5bb51fa56..14354a6e89c2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -298,9 +298,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter, extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); -extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, - sector_t sector); - /* * Macros to operate on percpu disk statistics: * @@ -409,13 +406,6 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) -void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); -void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); - -void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); - /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); @@ -465,27 +455,11 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -#define ADDPART_FLAG_NONE 0 -#define ADDPART_FLAG_RAID 1 -#define ADDPART_FLAG_WHOLEDISK 2 - -extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); -extern void blk_free_devt(dev_t devt); -extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); -extern char *disk_name (struct gendisk *hd, int partno, char *buf); int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); -extern int disk_expand_part_tbl(struct gendisk *disk, int target); -extern struct hd_struct * __must_check add_partition(struct gendisk *disk, - int partno, sector_t start, - sector_t len, int flags, - struct partition_meta_info - *info); -extern void __delete_partition(struct percpu_ref *); -extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); @@ -517,100 +491,6 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -static inline int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, __delete_partition, 0, - GFP_KERNEL)) - return -ENOMEM; - return 0; -} - -static inline void hd_struct_get(struct hd_struct *part) -{ - percpu_ref_get(&part->ref); -} - -static inline int hd_struct_try_get(struct hd_struct *part) -{ - return percpu_ref_tryget_live(&part->ref); -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - percpu_ref_put(&part->ref); -} - -static inline void hd_struct_kill(struct hd_struct *part) -{ - percpu_ref_kill(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - free_part_stats(part); - kfree(part->info); - percpu_ref_exit(&part->ref); -} - -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} - -#if defined(CONFIG_BLK_DEV_INTEGRITY) -extern void blk_integrity_add(struct gendisk *); -extern void blk_integrity_del(struct gendisk *); -#else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline void blk_integrity_add(struct gendisk *disk) { } -static inline void blk_integrity_del(struct gendisk *disk) { } -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From c6a564ffadc9105880329710164ee493f0de103c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Mar 2020 16:48:42 +0100 Subject: block: move the part_stat* helpers from genhd.h to a new header These macros are just used by a few files. Move them out of genhd.h, which is included everywhere into a new standalone header. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 108 ------------------------------------------- include/linux/part_stat.h | 115 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 108 deletions(-) create mode 100644 include/linux/part_stat.h (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 14354a6e89c2..927eed0be179 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -298,114 +298,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter, extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); -/* - * Macros to operate on percpu disk statistics: - * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters - * and should be called between disk_stat_lock() and - * disk_stat_unlock(). - * - * part_stat_read() can be called at any time. - * - * part_stat_{add|set_all}() and {init|free}_part_stats are for - * internal use only. - */ -#ifdef CONFIG_SMP -#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) - -#define part_stat_get(part, field) \ - part_stat_get_cpu(part, field, smp_processor_id()) - -#define part_stat_read(part, field) \ -({ \ - typeof((part)->dkstats->field) res = 0; \ - unsigned int _cpu; \ - for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ - res; \ -}) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, - sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ - free_percpu(part->dkstats); -} - -#else /* !CONFIG_SMP */ -#define part_stat_lock() ({ rcu_read_lock(); 0; }) -#define part_stat_unlock() rcu_read_unlock() - -#define part_stat_get(part, field) ((part)->dkstats.field) -#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) -#define part_stat_read(part, field) part_stat_get(part, field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - memset(&part->dkstats, value, sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ -} - -#endif /* CONFIG_SMP */ - -#define part_stat_read_accum(part, field) \ - (part_stat_read(part, field[STAT_READ]) + \ - part_stat_read(part, field[STAT_WRITE]) + \ - part_stat_read(part, field[STAT_DISCARD])) - -#define __part_stat_add(part, field, addnd) \ - (part_stat_get(part, field) += (addnd)) - -#define part_stat_add(part, field, addnd) do { \ - __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ -} while (0) - -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) - /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h new file mode 100644 index 000000000000..ece607607a86 --- /dev/null +++ b/include/linux/part_stat.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PART_STAT_H +#define _LINUX_PART_STAT_H + +#include + +/* + * Macros to operate on percpu disk statistics: + * + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. + */ +#ifdef CONFIG_SMP +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) + +#define part_stat_read(part, field) \ +({ \ + typeof((part)->dkstats->field) res = 0; \ + unsigned int _cpu; \ + for_each_possible_cpu(_cpu) \ + res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res; \ +}) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + int i; + + for_each_possible_cpu(i) + memset(per_cpu_ptr(part->dkstats, i), value, + sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ + free_percpu(part->dkstats); +} + +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() + +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + memset(&part->dkstats, value, sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ +} + +#endif /* CONFIG_SMP */ + +#define part_stat_read_accum(part, field) \ + (part_stat_read(part, field[STAT_READ]) + \ + part_stat_read(part, field[STAT_WRITE]) + \ + part_stat_read(part, field[STAT_DISCARD])) + +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add(&part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) + +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + +#endif /* _LINUX_PART_STAT_H */ -- cgit v1.2.3 From 44d705b0370b1d581f46ff23e5d33e8b5ff8ec58 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Mar 2020 17:10:22 +0200 Subject: fanotify: report name info for FAN_DIR_MODIFY event Report event FAN_DIR_MODIFY with name in a variable length record similar to how fid's are reported. With name info reporting implemented, setting FAN_DIR_MODIFY in mark mask is now allowed. When events are reported with name, the reported fid identifies the directory and the name follows the fid. The info record type for this event info is FAN_EVENT_INFO_TYPE_DFID_NAME. For now, all reported events have at most one info record which is either FAN_EVENT_INFO_TYPE_FID or FAN_EVENT_INFO_TYPE_DFID_NAME (for FAN_DIR_MODIFY). Later on, events "on child" will report both records. There are several ways that an application can use this information: 1. When watching a single directory, the name is always relative to the watched directory, so application need to fstatat(2) the name relative to the watched directory. 2. When watching a set of directories, the application could keep a map of dirfd for all watched directories and hash the map by fid obtained with name_to_handle_at(2). When getting a name event, the fid in the event info could be used to lookup the base dirfd in the map and then call fstatat(2) with that dirfd. 3. When watching a filesystem (FAN_MARK_FILESYSTEM) or a large set of directories, the application could use open_by_handle_at(2) with the fid in event info to obtain dirfd for the directory where event happened and call fstatat(2) with this dirfd. The last option scales better for a large number of watched directories. The first two options may be available in the future also for non privileged fanotify watchers, because open_by_handle_at(2) requires the CAP_DAC_READ_SEARCH capability. Link: https://lore.kernel.org/r/20200319151022.31456-15-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b79fa9bb7359..3049a6c06d9e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,7 +47,8 @@ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_DIR_MODIFY) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ -- cgit v1.2.3 From 9ce3bf225e5a908756b90b8f7bbc38834427296b Mon Sep 17 00:00:00 2001 From: Clement Leger Date: Mon, 2 Mar 2020 10:38:55 +0100 Subject: remoteproc: Use size_t type for len in da_to_va With upcoming changes in elf loader for elf64 support, section size will be a u64. When used with da_to_va, this will potentially lead to overflow if using the current "int" type for len argument. Change da_to_va prototype to use a size_t for len and fix all users of this function. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Signed-off-by: Clement Leger Link: https://lore.kernel.org/r/20200302093902.27849-2-cleger@kalray.eu Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 16ad66683ad0..89215798eaea 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -374,7 +374,7 @@ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); - void * (*da_to_va)(struct rproc *rproc, u64 da, int len); + void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len); int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); int (*handle_rsc)(struct rproc *rproc, u32 rsc_type, void *rsc, int offset, int avail); -- cgit v1.2.3 From 096ee78669d2bc8fccc40117de8d4e838a0c80db Mon Sep 17 00:00:00 2001 From: Clement Leger Date: Mon, 2 Mar 2020 10:38:56 +0100 Subject: remoteproc: Use size_t instead of int for rproc_mem_entry len Now that rproc_da_to_va uses a size_t for length, use a size_t for len field of rproc_mem_entry. Function used to create such structures now takes a size_t instead of int to allow full size range to be handled. Reviewed-by: Bjorn Andersson Reviewed-by: Mathieu Poirier Signed-off-by: Clement Leger Link: https://lore.kernel.org/r/20200302093902.27849-3-cleger@kalray.eu Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 89215798eaea..bee559330204 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -329,7 +329,7 @@ struct rproc; struct rproc_mem_entry { void *va; dma_addr_t dma; - int len; + size_t len; u32 da; void *priv; char name[32]; @@ -599,13 +599,13 @@ void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem); struct rproc_mem_entry * rproc_mem_entry_init(struct device *dev, - void *va, dma_addr_t dma, int len, u32 da, + void *va, dma_addr_t dma, size_t len, u32 da, int (*alloc)(struct rproc *, struct rproc_mem_entry *), int (*release)(struct rproc *, struct rproc_mem_entry *), const char *name, ...); struct rproc_mem_entry * -rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, int len, +rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, u32 da, const char *name, ...); int rproc_boot(struct rproc *rproc); -- cgit v1.2.3 From e4ae4b7d01699d0f3ea61bbef119f2d67e5455c0 Mon Sep 17 00:00:00 2001 From: Clement Leger Date: Mon, 2 Mar 2020 10:38:57 +0100 Subject: remoteproc: Use u64 type for boot_addr elf64 entry is defined as a u64. Since boot_addr is used to store the elf entry point, change boot_addr type to u64 to support both elf32 and elf64. In the same time, fix users that were using this variable. Reviewed-by: Bjorn Andersson Signed-off-by: Clement Leger Link: https://lore.kernel.org/r/20200302093902.27849-4-cleger@kalray.eu [bjorn: Fixes up return type of rproc_get_boot_addr()] Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index bee559330204..1683d6c386a6 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -382,7 +382,7 @@ struct rproc_ops { struct rproc *rproc, const struct firmware *fw); int (*load)(struct rproc *rproc, const struct firmware *fw); int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); - u32 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); + u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); }; /** @@ -498,7 +498,7 @@ struct rproc { int num_traces; struct list_head carveouts; struct list_head mappings; - u32 bootaddr; + u64 bootaddr; struct list_head rvdevs; struct list_head subdevs; struct idr notifyids; -- cgit v1.2.3 From 8f4033507d856be9a7983921ab3d2a1d03b9a093 Mon Sep 17 00:00:00 2001 From: Clement Leger Date: Mon, 2 Mar 2020 10:39:02 +0100 Subject: remoteproc: Adapt coredump to generate correct elf type Now that remoteproc can load an elf64, coredump elf class should be the same as the loaded elf class. In order to do that, add a elf_class field to rproc with default values. If an elf is loaded successfully, this field will be updated with the loaded elf class. Then, the coredump core code has been modified to use the generic elf macro in order to create an elf file with correct class. Reviewed-by: Mathieu Poirier Reviewed-by: Bjorn Andersson Signed-off-by: Clement Leger Link: https://lore.kernel.org/r/20200302093902.27849-9-cleger@kalray.eu Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 1683d6c386a6..ed127b2d35ca 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -514,6 +514,7 @@ struct rproc { bool auto_boot; struct list_head dump_segments; int nb_vdev; + u8 elf_class; }; /** -- cgit v1.2.3 From dc5192c449368eed3385f4405670aa3ed21d6270 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 23 Mar 2020 22:29:02 -0700 Subject: remoteproc: Introduce "panic" callback in ops Introduce generic support for handling kernel panics in remoteproc drivers, in order to allow operations needed for aiding in post mortem system debugging, such as flushing caches etc. The function can return a number of milliseconds needed by the remote to "settle" and the core will wait the longest returned duration before returning from the panic handler. Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200324052904.738594-3-bjorn.andersson@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index ed127b2d35ca..9c07d7958c53 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -369,6 +369,8 @@ enum rsc_handling_status { * expects to find it * @sanity_check: sanity check the fw image * @get_boot_addr: get boot address to entry point specified in firmware + * @panic: optional callback to react to system panic, core will delay + * panic at least the returned number of milliseconds */ struct rproc_ops { int (*start)(struct rproc *rproc); @@ -383,6 +385,7 @@ struct rproc_ops { int (*load)(struct rproc *rproc, const struct firmware *fw); int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); + unsigned long (*panic)(struct rproc *rproc); }; /** -- cgit v1.2.3 From 1070f24d4ae90420db342fe54c1ed90ef1129bb5 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 24 Mar 2020 13:00:28 +0200 Subject: remoteproc/omap: Remove the platform_data header The platform data header for OMAP remoteproc is no longer used for anything post ti-sysc and DT conversion, so just remove it completely. Signed-off-by: Tero Kristo Acked-by: Suman Anna Reviewed-by: Andrew F. Davis Acked-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200324110035.29907-9-t-kristo@ti.com Signed-off-by: Bjorn Andersson --- include/linux/platform_data/remoteproc-omap.h | 51 --------------------------- 1 file changed, 51 deletions(-) delete mode 100644 include/linux/platform_data/remoteproc-omap.h (limited to 'include/linux') diff --git a/include/linux/platform_data/remoteproc-omap.h b/include/linux/platform_data/remoteproc-omap.h deleted file mode 100644 index 7e3a16097672..000000000000 --- a/include/linux/platform_data/remoteproc-omap.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Remote Processor - omap-specific bits - * - * Copyright (C) 2011 Texas Instruments, Inc. - * Copyright (C) 2011 Google, Inc. - */ - -#ifndef _PLAT_REMOTEPROC_H -#define _PLAT_REMOTEPROC_H - -struct rproc_ops; -struct platform_device; - -/* - * struct omap_rproc_pdata - omap remoteproc's platform data - * @name: the remoteproc's name - * @oh_name: omap hwmod device - * @oh_name_opt: optional, secondary omap hwmod device - * @firmware: name of firmware file to load - * @mbox_name: name of omap mailbox device to use with this rproc - * @ops: start/stop rproc handlers - * @device_enable: omap-specific handler for enabling a device - * @device_shutdown: omap-specific handler for shutting down a device - * @set_bootaddr: omap-specific handler for setting the rproc boot address - */ -struct omap_rproc_pdata { - const char *name; - const char *oh_name; - const char *oh_name_opt; - const char *firmware; - const char *mbox_name; - const struct rproc_ops *ops; - int (*device_enable)(struct platform_device *pdev); - int (*device_shutdown)(struct platform_device *pdev); - void (*set_bootaddr)(u32); -}; - -#if defined(CONFIG_OMAP_REMOTEPROC) || defined(CONFIG_OMAP_REMOTEPROC_MODULE) - -void __init omap_rproc_reserve_cma(void); - -#else - -static inline void __init omap_rproc_reserve_cma(void) -{ -} - -#endif - -#endif /* _PLAT_REMOTEPROC_H */ -- cgit v1.2.3 From 0774a964ef561b7170d8d1b1bfe6f88002b6d219 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 20 Mar 2020 13:55:40 -0700 Subject: KVM: Fix out of range accesses to memslots Reset the LRU slot if it becomes invalid when deleting a memslot to fix an out-of-bounds/use-after-free access when searching through memslots. Explicitly check for there being no used slots in search_memslots(), and in the caller of s390's approximation variant. Fixes: 36947254e5f9 ("KVM: Dynamically size memslot array based on number of used slots") Reported-by: Qian Cai Cc: Peter Xu Signed-off-by: Sean Christopherson Message-Id: <20200320205546.2396-2-sean.j.christopherson@intel.com> Acked-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 35bc52e187a2..b19dee4ed7d9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1032,6 +1032,9 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) int slot = atomic_read(&slots->lru_slot); struct kvm_memory_slot *memslots = slots->memslots; + if (unlikely(!slots->used_slots)) + return NULL; + if (gfn >= memslots[slot].base_gfn && gfn < memslots[slot].base_gfn + memslots[slot].npages) return &memslots[slot]; -- cgit v1.2.3 From 55c2b8b9a383487f4f083f62d163fe3278fece1a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Mar 2020 16:21:52 +0100 Subject: mmc: core: Re-work the code for eMMC sanitize The error path for sanitize operations that completes with -ETIMEDOUT, is tightly coupled with the internal request handling code of the core. More precisely, mmc_wait_for_req_done() checks for specific sanitize errors. This is not only inefficient as it affects all types of requests, but also hackish. Therefore, let's improve the behaviour by moving the error path out of the mmc core. To do that, retuning needs to be held while running the sanitize operation. Moreover, to avoid exporting unnecessary symbols to the mmc block module, let's move the code into the mmc_ops.c file. While updating the actual code, let's also take the opportunity to clean up some of the mess around it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200316152152.15122-1-ulf.hansson@linaro.org --- include/linux/mmc/core.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index b7ba8810a3b5..29aa50711626 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -107,9 +107,6 @@ struct mmc_command { */ unsigned int busy_timeout; /* busy detect timeout in ms */ - /* Set this flag only for blocking sanitize request */ - bool sanitize_busy; - struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ }; -- cgit v1.2.3 From 3316ab2b45f6bf4797d8d65b22fda3cc13318890 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Mar 2020 11:40:45 +0530 Subject: bus: mhi: core: Add support for reading MHI info from device The MHI register base has several registers used for getting the MHI specific information such as version, family, major, and minor numbers from the device. This information can be used by the controller drivers for usecases such as applying quirks for a specific revision etc... While at it, let's also rearrange the local variables in mhi_register_controller(). Suggested-by: Hemant Kumar Reviewed-by: Jeffrey Hugo Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200324061050.14845-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d83e7772681b..ad1996001965 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -310,6 +310,10 @@ struct mhi_controller_config { * @sw_ev_rings: Number of software event rings * @nr_irqs_req: Number of IRQs required to operate (optional) * @nr_irqs: Number of IRQ allocated by bus master (required) + * @family_number: MHI controller family number + * @device_number: MHI controller device number + * @major_version: MHI controller major revision number + * @minor_version: MHI controller minor revision number * @mhi_event: MHI event ring configurations table * @mhi_cmd: MHI command ring configurations table * @mhi_ctxt: MHI device context, shared memory between host and device @@ -348,6 +352,15 @@ struct mhi_controller_config { * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) * they can be populated depending on the usecase. + * + * The following fields are present for the purpose of implementing any device + * specific quirks or customizations for specific MHI revisions used in device + * by the controller drivers. The MHI stack will just populate these fields + * during mhi_register_controller(): + * family_number + * device_number + * major_version + * minor_version */ struct mhi_controller { struct device *cntrl_dev; @@ -375,6 +388,10 @@ struct mhi_controller { u32 sw_ev_rings; u32 nr_irqs_req; u32 nr_irqs; + u32 family_number; + u32 device_number; + u32 major_version; + u32 minor_version; struct mhi_event *mhi_event; struct mhi_cmd *mhi_cmd; -- cgit v1.2.3 From d7242c4641fba521a1ea9dbccb11a40cf38cd912 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 18 Mar 2020 17:22:47 -0400 Subject: pNFS: Add a helper to allocate the array of buckets Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 94c77ed55ce1..e91c917c9c1c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1270,10 +1270,19 @@ struct pnfs_commit_bucket { struct nfs_writeverf direct_verf; }; +struct pnfs_commit_array { + struct list_head cinfo_list; + struct list_head lseg_list; + struct pnfs_layout_segment *lseg; + struct rcu_head rcu; + unsigned int nbuckets; + struct pnfs_commit_bucket buckets[]; +}; + struct pnfs_ds_commit_info { - int nwritten; - int ncommitting; - int nbuckets; + unsigned int nwritten; + unsigned int ncommitting; + unsigned int nbuckets; struct pnfs_commit_bucket *buckets; }; -- cgit v1.2.3 From 7c8978c0837d40c302f5e90d24c298d9ca9fc097 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 25 Mar 2020 12:34:06 +0100 Subject: driver core: platform: Initialize dma_parms for platform devices It's currently the platform driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common platform bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Cc: Suggested-by: Christoph Hellwig Tested-by: Ludovic Barre Reviewed-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200325113407.26996-2-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 041bfa412aa0..81900b3cbe37 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,6 +25,7 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; + struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3 From 5caf6102e32ead7ed5d21b5309c1a4a7d70e6a9f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 25 Mar 2020 12:34:07 +0100 Subject: amba: Initialize dma_parms for amba devices It's currently the amba driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common amba bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Cc: Cc: Russell King Suggested-by: Christoph Hellwig Tested-by: Ludovic Barre Reviewed-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20200325113407.26996-3-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 26f0ecf401ea..0bbfd647f5c6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,6 +65,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3 From 6f09eae3b5d974ef845e56690d6bc2b8f2a70acd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:57:57 +0100 Subject: ata: expose ncq_enable_prio sysfs attribute only on NCQ capable hosts There is no point in exposing ncq_enable_prio sysfs attribute for devices on PATA and non-NCQ capable SATA hosts so: * remove dev_attr_ncq_prio_enable from ata_common_sdev_attrs[] * add ata_ncq_sdev_attrs[] * update ATA_NCQ_SHT() macro to use ata_ncq_sdev_attrs[] Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 710e09dae910..350fa584acde 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1334,6 +1334,7 @@ extern int ata_link_nr_enabled(struct ata_link *link); extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; extern struct device_attribute *ata_common_sdev_attrs[]; +extern struct device_attribute *ata_ncq_sdev_attrs[]; /* * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated @@ -1341,7 +1342,7 @@ extern struct device_attribute *ata_common_sdev_attrs[]; * edge driver's module reference, otherwise the driver can be unloaded * even if the scsi_device is being accessed. */ -#define ATA_BASE_SHT(drv_name) \ +#define __ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ .name = drv_name, \ .ioctl = ata_scsi_ioctl, \ @@ -1355,11 +1356,15 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ - .unlock_native_capacity = ata_scsi_unlock_native_capacity, \ + .unlock_native_capacity = ata_scsi_unlock_native_capacity + +#define ATA_BASE_SHT(drv_name) \ + __ATA_BASE_SHT(drv_name), \ .sdev_attrs = ata_common_sdev_attrs #define ATA_NCQ_SHT(drv_name) \ - ATA_BASE_SHT(drv_name), \ + __ATA_BASE_SHT(drv_name), \ + .sdev_attrs = ata_ncq_sdev_attrs, \ .change_queue_depth = ata_scsi_change_queue_depth /* -- cgit v1.2.3 From 8ba5a45c998cdbfb565fb7670782407c3e4a25ba Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:02 +0100 Subject: ata: optimize struct ata_force_param size Optimize struct ata_force_param size by: - using u8 for cbl and spd_limit fields - using u16 for lflags field Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 41064 573 40 41677 a2cd drivers/ata/libata-core.o after: 40654 573 40 41267 a133 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 350fa584acde..236e4c55be48 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -174,6 +174,7 @@ enum { ATA_DEV_NONE = 11, /* no device */ /* struct ata_link flags */ + /* NOTE: struct ata_force_param currently stores lflags in u16 */ ATA_LFLAG_NO_HRST = (1 << 1), /* avoid hardreset */ ATA_LFLAG_NO_SRST = (1 << 2), /* avoid softreset */ ATA_LFLAG_ASSUME_ATA = (1 << 3), /* assume ATA class */ -- cgit v1.2.3 From a9b2c120e34bcfe49f837830ee4bfbd2aad4b5c8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:07 +0100 Subject: ata: separate PATA timings code from libata-core.c Separate PATA timings code from libata-core.c: * add PATA_TIMINGS config option and make corresponding PATA host drivers (and ATA ACPI code) select it * move following PATA timings code to libata-pata-timings.c: - ata_timing_quantize() - ata_timing_merge() - ata_timing_find_mode() - ata_timing_compute() * group above functions together in * include libata-pata-timings.c in the build when PATA_TIMINGS config option is enabled * cover ata_timing_cycle2mode() with CONFIG_ATA_ACPI ifdef (it depends on code from libata-core.c and libata-pata-timings.c while its only user is ATA ACPI) Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 39688 573 40 40301 9d6d drivers/ata/libata-core.o after: 37820 572 40 38432 9620 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 236e4c55be48..500b709ed3de 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1205,12 +1205,6 @@ extern int ata_cable_unknown(struct ata_port *ap); /* Timing helpers */ extern unsigned int ata_pio_need_iordy(const struct ata_device *); -extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode); -extern int ata_timing_compute(struct ata_device *, unsigned short, - struct ata_timing *, int, int); -extern void ata_timing_merge(const struct ata_timing *, - const struct ata_timing *, struct ata_timing *, - unsigned int); extern u8 ata_timing_cycle2mode(unsigned int xfer_shift, int cycle); /* PCI */ @@ -1807,6 +1801,16 @@ static inline int ata_dma_enabled(struct ata_device *adev) return (adev->dma_mode == 0xFF ? 0 : 1); } +/************************************************************************** + * PATA timings - drivers/ata/libata-pata-timings.c + */ +extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode); +extern int ata_timing_compute(struct ata_device *, unsigned short, + struct ata_timing *, int, int); +extern void ata_timing_merge(const struct ata_timing *, + const struct ata_timing *, struct ata_timing *, + unsigned int); + /************************************************************************** * PMP - drivers/ata/libata-pmp.c */ -- cgit v1.2.3 From 7caa30ea8238aed4f9998d032bfca7a91f6c36cd Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:08 +0100 Subject: ata: add CONFIG_SATA_HOST=n version of ata_ncq_enabled() When CONFIG_SATA_HOST=n there are no NCQ capable host drivers built so it is safe to hardwire ata_ncq_enabled() to always return zero. Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 37820 572 40 38432 9620 drivers/ata/libata-core.o 21040 105 576 21721 54d9 drivers/ata/libata-scsi.o 17405 18 0 17423 440f drivers/ata/libata-eh.o after: 37582 572 40 38194 9532 drivers/ata/libata-core.o 20702 105 576 21383 5387 drivers/ata/libata-scsi.o 17353 18 0 17371 43db drivers/ata/libata-eh.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 500b709ed3de..661d76038684 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1633,6 +1633,8 @@ extern struct ata_device *ata_dev_next(struct ata_device *dev, */ static inline int ata_ncq_enabled(struct ata_device *dev) { + if (!IS_ENABLED(CONFIG_SATA_HOST)) + return 0; return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF | ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; } -- cgit v1.2.3 From 7fe183c773c42f9814cd361c45a0233f441bc4fc Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:11 +0100 Subject: ata: start separating SATA specific code from libata-core.c Start separating SATA specific code from libata-core.c: * move following functions to libata-sata.c: - ata_tf_to_fis() - ata_tf_from_fis() - sata_link_scr_lpm() - ata_slave_link_init() - sata_lpm_ignore_phy_events() * group above functions together in * include libata-sata.c in the build when CONFIG_SATA_HOST=y Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 37582 572 40 38194 9532 drivers/ata/libata-core.o after: 36762 572 40 37374 91fe drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 661d76038684..b419d7412f71 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1082,8 +1082,6 @@ extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); -extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, - bool spm_wakeup); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); @@ -1094,7 +1092,6 @@ extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); -extern int ata_slave_link_init(struct ata_port *ap); extern void ata_host_get(struct ata_host *host); extern void ata_host_put(struct ata_host *host); extern int ata_host_start(struct ata_host *host); @@ -1152,9 +1149,6 @@ extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); -extern void ata_tf_to_fis(const struct ata_taskfile *tf, - u8 pmp, int is_cmd, u8 *fis); -extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern unsigned long ata_pack_xfermask(unsigned long pio_mask, unsigned long mwdma_mask, unsigned long udma_mask); extern void ata_unpack_xfermask(unsigned long xfer_mask, @@ -1195,6 +1189,16 @@ extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q); + +/* + * SATA specific code - drivers/ata/libata-sata.c + */ +extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, + bool spm_wakeup); +extern int ata_slave_link_init(struct ata_port *ap); +extern void ata_tf_to_fis(const struct ata_taskfile *tf, + u8 pmp, int is_cmd, u8 *fis); +extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern bool sata_lpm_ignore_phy_events(struct ata_link *link); extern int ata_cable_40wire(struct ata_port *ap); -- cgit v1.2.3 From 6eab1bc0eecb541f4c383a0823902dc8f5d99861 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:12 +0100 Subject: ata: move sata_scr_*() to libata-sata.c * move sata_scr_*() to libata-sata.c * add static inlines for CONFIG_SATA_HOST=n case Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 35642 572 40 36254 8d9e drivers/ata/libata-core.o 16607 18 0 16625 40f1 drivers/ata/libata-eh.o after: 32846 572 40 33458 82b2 drivers/ata/libata-core.o 16243 18 0 16261 3f85 drivers/ata/libata-eh.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b419d7412f71..86703ce5a33e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1125,10 +1125,6 @@ extern void ata_sas_tport_delete(struct ata_port *ap); extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); -extern int sata_scr_valid(struct ata_link *link); -extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); -extern int sata_scr_write(struct ata_link *link, int reg, u32 val); -extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM @@ -1193,6 +1189,26 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * /* * SATA specific code - drivers/ata/libata-sata.c */ +#ifdef CONFIG_SATA_HOST +extern int sata_scr_valid(struct ata_link *link); +extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); +extern int sata_scr_write(struct ata_link *link, int reg, u32 val); +extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); +#else +static inline int sata_scr_valid(struct ata_link *link) { return 0; } +static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val) +{ + return -EOPNOTSUPP; +} +static inline int sata_scr_write(struct ata_link *link, int reg, u32 val) +{ + return -EOPNOTSUPP; +} +static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) +{ + return -EOPNOTSUPP; +} +#endif extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -- cgit v1.2.3 From ab4117cf2470618ffd5af16fa7c363b81260d6e7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:13 +0100 Subject: ata: move *sata_set_spd*() to libata-sata.c * move *sata_set_spd*() to libata-sata.c * add static inlines for CONFIG_SATA_HOST=n case Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 32842 572 40 33458 82ae drivers/ata/libata-core.o after: 32812 572 40 33428 8290 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 86703ce5a33e..f0817a8f1e3f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1074,7 +1074,6 @@ static inline int ata_port_is_dummy(struct ata_port *ap) return ap->ops == &ata_dummy_port_ops; } -extern int sata_set_spd(struct ata_link *link); extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); @@ -1194,6 +1193,7 @@ extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); +extern int sata_set_spd(struct ata_link *link); #else static inline int sata_scr_valid(struct ata_link *link) { return 0; } static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val) @@ -1208,6 +1208,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) { return -EOPNOTSUPP; } +static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } #endif extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); -- cgit v1.2.3 From 9d3158f5cb11142d85c351fa0e0087ef95ac9cb8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:14 +0100 Subject: ata: move sata_link_{debounce,resume}() to libata-sata.c * move sata_link_{debounce,resume}() to libata-sata.c * add static inline for CONFIG_SATA_HOST=n case (only one, for sata_link_resume() is needed) Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 32816 572 40 33428 8294 drivers/ata/libata-core.o after: 32724 572 40 33336 8238 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f0817a8f1e3f..b05538d06919 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1077,10 +1077,6 @@ static inline int ata_port_is_dummy(struct ata_port *ap) extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); -extern int sata_link_debounce(struct ata_link *link, - const unsigned long *params, unsigned long deadline); -extern int sata_link_resume(struct ata_link *link, const unsigned long *params, - unsigned long deadline); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); @@ -1194,6 +1190,8 @@ extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); +extern int sata_link_resume(struct ata_link *link, const unsigned long *params, + unsigned long deadline); #else static inline int sata_scr_valid(struct ata_link *link) { return 0; } static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val) @@ -1209,7 +1207,15 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) return -EOPNOTSUPP; } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } +static inline int sata_link_resume(struct ata_link *link, + const unsigned long *params, + unsigned long deadline) +{ + return -EOPNOTSUPP; +} #endif +extern int sata_link_debounce(struct ata_link *link, + const unsigned long *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -- cgit v1.2.3 From 78c97c80d76b0590fc6ff5e20f4b18f105aa4fae Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:15 +0100 Subject: ata: move sata_link_hardreset() to libata-sata.c * move sata_link_hardreset() to libata-sata.c * add static inline for CONFIG_SATA_HOST=n case * make sata_set_spd_needed() static Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 32724 572 40 33336 8238 drivers/ata/libata-core.o after: 32559 572 40 33171 8193 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b05538d06919..981f73c02509 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1077,9 +1077,6 @@ static inline int ata_port_is_dummy(struct ata_port *ap) extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); -extern int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, unsigned long deadline, - bool *online, int (*check_ready)(struct ata_link *)); extern int sata_std_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); @@ -1190,6 +1187,9 @@ extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); +extern int sata_link_hardreset(struct ata_link *link, + const unsigned long *timing, unsigned long deadline, + bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); #else @@ -1207,6 +1207,16 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) return -EOPNOTSUPP; } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } +static inline int sata_link_hardreset(struct ata_link *link, + const unsigned long *timing, + unsigned long deadline, + bool *online, + int (*check_ready)(struct ata_link *)) +{ + if (online) + *online = false; + return -EOPNOTSUPP; +} static inline int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline) -- cgit v1.2.3 From 61a11986d33d01dbef745d49c0536961eb06d2f1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:16 +0100 Subject: ata: move ata_qc_complete_multiple() to libata-sata.c * move ata_qc_complete_multiple() to libata-sata.c Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 32559 572 40 33171 8193 drivers/ata/libata-core.o after: 32162 572 40 32774 8006 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 981f73c02509..08fec96a6a1e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1160,7 +1160,6 @@ extern void ata_id_c_string(const u16 *id, unsigned char *s, extern unsigned int ata_do_dev_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); -extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, @@ -1232,6 +1231,7 @@ extern int ata_slave_link_init(struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); +extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); extern bool sata_lpm_ignore_phy_events(struct ata_link *link); extern int ata_cable_40wire(struct ata_port *ap); -- cgit v1.2.3 From 2b384ede7107a528c65ec826e045b572bcb2aa0b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:17 +0100 Subject: ata: move sata_deb_timing_*() to libata-sata.c * move sata_deb_timing_*() to libata-sata.c * add static inline for sata_ehc_deb_timing() for CONFIG_SATA_HOST=n case Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 32158 572 40 32770 8002 drivers/ata/libata-core.o after: 32015 572 40 32627 7f73 drivers/ata/libata-core.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 08fec96a6a1e..90c929b5df3d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1019,10 +1019,6 @@ struct ata_timing { /* * Core layer - drivers/ata/libata-core.c */ -extern const unsigned long sata_deb_timing_normal[]; -extern const unsigned long sata_deb_timing_hotplug[]; -extern const unsigned long sata_deb_timing_long[]; - extern struct ata_port_operations ata_dummy_port_ops; extern const struct ata_port_info ata_dummy_port_info; @@ -1060,15 +1056,6 @@ static inline int is_multi_taskfile(struct ata_taskfile *tf) (tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT); } -static inline const unsigned long * -sata_ehc_deb_timing(struct ata_eh_context *ehc) -{ - if (ehc->i.flags & ATA_EHI_HOTPLUGGED) - return sata_deb_timing_hotplug; - else - return sata_deb_timing_normal; -} - static inline int ata_port_is_dummy(struct ata_port *ap) { return ap->ops == &ata_dummy_port_ops; @@ -1181,6 +1168,19 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST +extern const unsigned long sata_deb_timing_normal[]; +extern const unsigned long sata_deb_timing_hotplug[]; +extern const unsigned long sata_deb_timing_long[]; + +static inline const unsigned long * +sata_ehc_deb_timing(struct ata_eh_context *ehc) +{ + if (ehc->i.flags & ATA_EHI_HOTPLUGGED) + return sata_deb_timing_hotplug; + else + return sata_deb_timing_normal; +} + extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); @@ -1192,6 +1192,11 @@ extern int sata_link_hardreset(struct ata_link *link, extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); #else +static inline const unsigned long * +sata_ehc_deb_timing(struct ata_eh_context *ehc) +{ + return NULL; +} static inline int sata_scr_valid(struct ata_link *link) { return 0; } static inline int sata_scr_read(struct ata_link *link, int reg, u32 *val) { -- cgit v1.2.3 From ec811a94c5bb6916d01c02dc99c8cecfa59cbb85 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:18 +0100 Subject: ata: start separating SATA specific code from libata-scsi.c Start separating SATA specific code from libata-scsi.c: * un-static ata_scsi_find_dev() * move following code to libata-sata.c: - SATA only sysfs device attributes handling - __ata_change_queue_depth() - ata_scsi_change_queue_depth() * cover with CONFIG_SATA_HOST ifdef SATA only sysfs device attributes handling code and ATA_SHT_NCQ() macro in Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 20702 105 576 21383 5387 drivers/ata/libata-scsi.o after: 19137 23 576 19736 4d18 drivers/ata/libata-scsi.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 90c929b5df3d..ae74fd048a32 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -530,12 +530,14 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes, unsigned long deadline); typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); -extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_unload_heads; +#ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_enable; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; +#endif enum sw_activity { OFF, @@ -1371,7 +1373,6 @@ extern int ata_link_nr_enabled(struct ata_link *link); extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; extern struct device_attribute *ata_common_sdev_attrs[]; -extern struct device_attribute *ata_ncq_sdev_attrs[]; /* * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated @@ -1399,10 +1400,14 @@ extern struct device_attribute *ata_ncq_sdev_attrs[]; __ATA_BASE_SHT(drv_name), \ .sdev_attrs = ata_common_sdev_attrs +#ifdef CONFIG_SATA_HOST +extern struct device_attribute *ata_ncq_sdev_attrs[]; + #define ATA_NCQ_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .sdev_attrs = ata_ncq_sdev_attrs, \ .change_queue_depth = ata_scsi_change_queue_depth +#endif /* * PMP helpers -- cgit v1.2.3 From 15964ff72832ee489ae9e31c4e1a924e80e05dcb Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:19 +0100 Subject: ata: move ata_sas_*() to libata-sata.c * un-inline: - ata_scsi_dump_cdb() - __ata_scsi_queuecmd() * un-static: - ata_scsi_sdev_config() - ata_scsi_dev_config() - ata_scsi_dump_cdb() - __ata_scsi_queuecmd() * move ata_sas_*() to libata-sata.c: * add static inlines for CONFIG_SATA_HOST=n case for ata_sas_{allocate,free}_tag() Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 19137 23 576 19736 4d18 drivers/ata/libata-scsi.o after: 18330 23 576 18929 49f1 drivers/ata/libata-scsi.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index ae74fd048a32..da899f18a3e9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1094,18 +1094,6 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); -extern void ata_sas_port_destroy(struct ata_port *); -extern struct ata_port *ata_sas_port_alloc(struct ata_host *, - struct ata_port_info *, struct Scsi_Host *); -extern void ata_sas_async_probe(struct ata_port *ap); -extern int ata_sas_sync_probe(struct ata_port *ap); -extern int ata_sas_port_init(struct ata_port *); -extern int ata_sas_port_start(struct ata_port *ap); -extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); -extern void ata_sas_tport_delete(struct ata_port *ap); -extern void ata_sas_port_stop(struct ata_port *ap); -extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); -extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern bool ata_link_online(struct ata_link *link); extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM @@ -1235,6 +1223,18 @@ extern int sata_link_debounce(struct ata_link *link, extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); +extern void ata_sas_port_destroy(struct ata_port *); +extern struct ata_port *ata_sas_port_alloc(struct ata_host *, + struct ata_port_info *, struct Scsi_Host *); +extern void ata_sas_async_probe(struct ata_port *ap); +extern int ata_sas_sync_probe(struct ata_port *ap); +extern int ata_sas_port_init(struct ata_port *); +extern int ata_sas_port_start(struct ata_port *ap); +extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); +extern void ata_sas_tport_delete(struct ata_port *ap); +extern void ata_sas_port_stop(struct ata_port *ap); +extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); +extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); -- cgit v1.2.3 From a695de27fca5cefce0d1d93bab681e35605fee55 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:20 +0100 Subject: ata: start separating SATA specific code from libata-eh.c Start separating SATA specific code from libata-eh.c: * move sata_async_notification() to libata-sata.c: Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 16243 18 0 16261 3f85 drivers/ata/libata-eh.o after: 16164 18 0 16182 3f36 drivers/ata/libata-eh.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index da899f18a3e9..9c7ca659dc94 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1240,6 +1240,7 @@ extern void ata_tf_to_fis(const struct ata_taskfile *tf, extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); extern bool sata_lpm_ignore_phy_events(struct ata_link *link); +extern int sata_async_notification(struct ata_port *ap); extern int ata_cable_40wire(struct ata_port *ap); extern int ata_cable_80wire(struct ata_port *ap); @@ -1332,7 +1333,6 @@ extern void ata_port_wait_eh(struct ata_port *ap); extern int ata_link_abort(struct ata_link *link); extern int ata_port_abort(struct ata_port *ap); extern int ata_port_freeze(struct ata_port *ap); -extern int sata_async_notification(struct ata_port *ap); extern void ata_eh_freeze_port(struct ata_port *ap); extern void ata_eh_thaw_port(struct ata_port *ap); -- cgit v1.2.3 From a0ccd2511b6f70394b30f8290da8bfc723d6bc07 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 26 Mar 2020 16:58:21 +0100 Subject: ata: move ata_eh_analyze_ncq_error() & co. to libata-sata.c * move ata_eh_analyze_ncq_error() and ata_eh_read_log_10h() to libata-sata.c * add static inline for ata_eh_analyze_ncq_error() for CONFIG_SATA_HOST=n case (link->sactive is non-zero only if NCQ commands are actually queued so empty function body is sufficient) Code size savings on m68k arch using (modified) atari_defconfig: text data bss dec hex filename before: 16164 18 0 16182 3f36 drivers/ata/libata-eh.o after: 15446 18 0 15464 3c68 drivers/ata/libata-eh.o Reviewed-by: Christoph Hellwig Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9c7ca659dc94..cffa4714bfa8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1181,6 +1181,7 @@ extern int sata_link_hardreset(struct ata_link *link, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned long * sata_ehc_deb_timing(struct ata_eh_context *ehc) @@ -1217,6 +1218,7 @@ static inline int sata_link_resume(struct ata_link *link, { return -EOPNOTSUPP; } +static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); @@ -1339,7 +1341,6 @@ extern void ata_eh_thaw_port(struct ata_port *ap); extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); -extern void ata_eh_analyze_ncq_error(struct ata_link *link); extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, -- cgit v1.2.3 From ddfaed17a779ddb728e5534a87596950842d6b04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 14:53:06 +0100 Subject: mm/hmm: don't provide a stub for hmm_range_fault() All callers of hmm_range_fault depend on CONFIG_HMM_MIRROR, so don't bother with a stub. Link: https://lore.kernel.org/r/20200316135310.899364-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Zi Yan Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index ddf9f7144c43..c102e359b59d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -225,17 +225,10 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) -#ifdef CONFIG_HMM_MIRROR /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ long hmm_range_fault(struct hmm_range *range, unsigned int flags); -#else -static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) -{ - return -EOPNOTSUPP; -} -#endif /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range -- cgit v1.2.3 From 96268163f9c9443e7f73a202253f68566f93dc79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 14:53:07 +0100 Subject: mm/hmm: remove the unused HMM_FAULT_ALLOW_RETRY flag The HMM_FAULT_ALLOW_RETRY isn't used anywhere in the tree. Remove it and the weird -EAGAIN handling where handle_mm_fault() drops the mmap_sem. Link: https://lore.kernel.org/r/20200316135310.899364-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c102e359b59d..4bf8d6997b12 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -217,11 +217,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, range->flags[HMM_PFN_VALID]; } -/* - * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. - */ -#define HMM_FAULT_ALLOW_RETRY (1 << 0) - /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) -- cgit v1.2.3 From f894ddd5ff01d3bb48faf4dc533536bf5269c17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:13 +0100 Subject: memremap: add an owner field to struct dev_pagemap Add a new opaque owner field to struct dev_pagemap, which will allow the hmm and migrate_vma code to identify who owns ZONE_DEVICE memory, and refuse to work on mappings not owned by the calling entity. Link: https://lore.kernel.org/r/20200316193216.920734-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- include/linux/memremap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..60d97e8fd3c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -103,6 +103,9 @@ struct dev_pagemap_ops { * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table + * @owner: an opaque pointer identifying the entity that manages this + * instance. Used by various helpers to make sure that no + * foreign ZONE_DEVICE memory is accessed. */ struct dev_pagemap { struct vmem_altmap altmap; @@ -113,6 +116,7 @@ struct dev_pagemap { enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; + void *owner; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) -- cgit v1.2.3 From 800bb1c8dc80bb4121446b56813067f3ea44edee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:14 +0100 Subject: mm: handle multiple owners of device private pages in migrate_vma Add a new src_owner field to struct migrate_vma. If the field is set, only device private pages with page->pgmap->owner equal to that field are migrated. If the field is not set only "normal" pages are migrated. Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent with CPU") Link: https://lore.kernel.org/r/20200316193216.920734-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- include/linux/migrate.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 72120061b7d4..3e546cbf03dd 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -196,6 +196,14 @@ struct migrate_vma { unsigned long npages; unsigned long start; unsigned long end; + + /* + * Set to the owner value also stored in page->pgmap->owner for + * migrating out of device private memory. If set only device + * private pages with this owner are migrated. If not set + * device private pages are not migrated at all. + */ + void *src_owner; }; int migrate_vma_setup(struct migrate_vma *args); -- cgit v1.2.3 From 17ffdc482982af92bddb59692af1c5e1de23d184 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:15 +0100 Subject: mm: simplify device private page handling in hmm_range_fault Remove the HMM_PFN_DEVICE_PRIVATE flag, no driver has ever set this flag on input, and the only place that uses it on output can be trivially changed to use is_device_private_page(). This removes the ability to request that device_private pages are faulted back into system memory. Link: https://lore.kernel.org/r/20200316193216.920734-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 4bf8d6997b12..5e6034f105c3 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -74,7 +74,6 @@ * Flags: * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) * * The driver provides a flags array for mapping page protections to device * PTE bits. If the driver valid bit for an entry is bit 3, @@ -86,7 +85,6 @@ enum hmm_pfn_flag_e { HMM_PFN_VALID = 0, HMM_PFN_WRITE, - HMM_PFN_DEVICE_PRIVATE, HMM_PFN_FLAG_MAX }; -- cgit v1.2.3 From 08ddddda667b3b7aaac10641418283f78118c5cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:16 +0100 Subject: mm/hmm: check the device private page owner in hmm_range_fault() hmm_range_fault() will succeed for any kind of device private memory, even if it doesn't belong to the calling entity. While nouveau has some crude checks for that, they are broken because they assume nouveau is the only user of device private memory. Fix this by passing in an expected pgmap owner in the hmm_range_fault structure. If a device_private page is found and doesn't match the owner then it is treated as an non-present and non-faultable page. This prevents a bug in amdgpu, where it doesn't know how to handle device_private pages, but hmm_range_fault would return them anyhow. Fixes: 4ef589dc9b10 ("mm/hmm/devmem: device memory hotplug using ZONE_DEVICE") Link: https://lore.kernel.org/r/20200316193216.920734-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 5e6034f105c3..bb6be4428633 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -132,6 +132,7 @@ enum hmm_pfn_value_e { * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) * @valid: pfns array did not change since it has been fill by an HMM function + * @dev_private_owner: owner of device private pages */ struct hmm_range { struct mmu_interval_notifier *notifier; @@ -144,6 +145,7 @@ struct hmm_range { uint64_t default_flags; uint64_t pfn_flags_mask; uint8_t pfn_shift; + void *dev_private_owner; }; /* -- cgit v1.2.3 From 119cd59fcfbe70fb3fcab4e64cd232bcc3807585 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Feb 2020 19:54:56 -0500 Subject: x86: get rid of put_user_try in __setup_rt_frame() (both 32bit and 64bit) Straightforward, except for save_altstack_ex() stuck in those. Replace that thing with an analogue that would use unsafe_put_user() instead of put_user_ex() (called compat_save_altstack()) and be done with that. Signed-off-by: Al Viro --- include/linux/signal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 1a5f88316b08..05bacd2ab135 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -444,12 +444,12 @@ void signals_init(void); int restore_altstack(const stack_t __user *); int __save_altstack(stack_t __user *, unsigned long); -#define save_altstack_ex(uss, sp) do { \ +#define unsafe_save_altstack(uss, sp, label) do { \ stack_t __user *__uss = uss; \ struct task_struct *t = current; \ - put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \ - put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ - put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + unsafe_put_user((void __user *)t->sas_ss_sp, &__uss->ss_sp, label); \ + unsafe_put_user(t->sas_ss_flags, &__uss->ss_flags, label); \ + unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \ if (t->sas_ss_flags & SS_AUTODISARM) \ sas_ss_reset(t); \ } while (0); -- cgit v1.2.3 From 5908220b2b3d6918f88cd645a39e1dcb84d1c5d9 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 25 Mar 2020 15:52:30 +0300 Subject: net: introduce the MACSEC netdev feature This patch introduce a new netdev feature, which will be used by drivers to state they can perform MACsec transformations in hardware. The patchset was gathered by Mark, macsec functinality itself was implemented by Dmitry, Mark and Pavel Belous. Signed-off-by: Antoine Tenart Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 34d050bb1ae6..9d53c5ad272c 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -83,6 +83,8 @@ enum { NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ + NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ + /* * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe @@ -154,6 +156,7 @@ enum { #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) #define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) +#define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) /* Finds the next feature with the highest number of the range of start till 0. */ -- cgit v1.2.3 From 30e9bb8472f4454d0544020574bb03d96ffa0e52 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 25 Mar 2020 15:52:31 +0300 Subject: net: add a reference to MACsec ops in net_device This patch adds a reference to MACsec ops to the net_device structure, allowing net device drivers to implement offloading operations for MACsec. Signed-off-by: Antoine Tenart Signed-off-by: Mark Starovoytov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..b521500b244d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -53,6 +53,8 @@ struct netpoll_info; struct device; struct phy_device; struct dsa_port; +struct macsec_context; +struct macsec_ops; struct sfp_bus; /* 802.11 specific */ @@ -1819,6 +1821,8 @@ enum netdev_priv_flags { * that follow this device when it is moved * to another network namespace. * + * @macsec_ops: MACsec offloading ops + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2113,6 +2117,11 @@ struct net_device { unsigned wol_enabled:1; struct list_head net_notifier_list; + +#if IS_ENABLED(CONFIG_MACSEC) + /* MACsec management functions */ + const struct macsec_ops *macsec_ops; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 4d9cf7df8d355e519adb8b2f8759c84e1e633070 Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 16 Feb 2020 17:32:06 -0600 Subject: mfd: Add support for Azoteq IQS620A/621/622/624/625 This patch adds core support for the Azoteq IQS620A, IQS621, IQS622, IQS624 and IQS625 multi-function sensors. Signed-off-by: Jeff LaBundy Signed-off-by: Lee Jones --- include/linux/mfd/iqs62x.h | 139 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 include/linux/mfd/iqs62x.h (limited to 'include/linux') diff --git a/include/linux/mfd/iqs62x.h b/include/linux/mfd/iqs62x.h new file mode 100644 index 000000000000..043d3b6de9ec --- /dev/null +++ b/include/linux/mfd/iqs62x.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Azoteq IQS620A/621/622/624/625 Multi-Function Sensors + * + * Copyright (C) 2019 Jeff LaBundy + */ + +#ifndef __LINUX_MFD_IQS62X_H +#define __LINUX_MFD_IQS62X_H + +#define IQS620_PROD_NUM 0x41 +#define IQS621_PROD_NUM 0x46 +#define IQS622_PROD_NUM 0x42 +#define IQS624_PROD_NUM 0x43 +#define IQS625_PROD_NUM 0x4E + +#define IQS621_ALS_FLAGS 0x16 +#define IQS622_ALS_FLAGS 0x14 + +#define IQS624_HALL_UI 0x70 +#define IQS624_HALL_UI_WHL_EVENT BIT(4) +#define IQS624_HALL_UI_INT_EVENT BIT(3) +#define IQS624_HALL_UI_AUTO_CAL BIT(2) + +#define IQS624_INTERVAL_DIV 0x7D + +#define IQS620_GLBL_EVENT_MASK 0xD7 +#define IQS620_GLBL_EVENT_MASK_PMU BIT(6) + +#define IQS62X_NUM_KEYS 16 +#define IQS62X_NUM_EVENTS (IQS62X_NUM_KEYS + 5) + +#define IQS62X_EVENT_SIZE 10 + +enum iqs62x_ui_sel { + IQS62X_UI_PROX, + IQS62X_UI_SAR1, +}; + +enum iqs62x_event_reg { + IQS62X_EVENT_NONE, + IQS62X_EVENT_SYS, + IQS62X_EVENT_PROX, + IQS62X_EVENT_HYST, + IQS62X_EVENT_HALL, + IQS62X_EVENT_ALS, + IQS62X_EVENT_IR, + IQS62X_EVENT_WHEEL, + IQS62X_EVENT_INTER, + IQS62X_EVENT_UI_LO, + IQS62X_EVENT_UI_HI, +}; + +enum iqs62x_event_flag { + /* keys */ + IQS62X_EVENT_PROX_CH0_T, + IQS62X_EVENT_PROX_CH0_P, + IQS62X_EVENT_PROX_CH1_T, + IQS62X_EVENT_PROX_CH1_P, + IQS62X_EVENT_PROX_CH2_T, + IQS62X_EVENT_PROX_CH2_P, + IQS62X_EVENT_HYST_POS_T, + IQS62X_EVENT_HYST_POS_P, + IQS62X_EVENT_HYST_NEG_T, + IQS62X_EVENT_HYST_NEG_P, + IQS62X_EVENT_SAR1_ACT, + IQS62X_EVENT_SAR1_QRD, + IQS62X_EVENT_SAR1_MOVE, + IQS62X_EVENT_SAR1_HALT, + IQS62X_EVENT_WHEEL_UP, + IQS62X_EVENT_WHEEL_DN, + + /* switches */ + IQS62X_EVENT_HALL_N_T, + IQS62X_EVENT_HALL_N_P, + IQS62X_EVENT_HALL_S_T, + IQS62X_EVENT_HALL_S_P, + + /* everything else */ + IQS62X_EVENT_SYS_RESET, +}; + +struct iqs62x_event_data { + u16 ui_data; + u8 als_flags; + u8 ir_flags; + u8 interval; +}; + +struct iqs62x_event_desc { + enum iqs62x_event_reg reg; + u8 mask; + u8 val; +}; + +struct iqs62x_dev_desc { + const char *dev_name; + const struct mfd_cell *sub_devs; + int num_sub_devs; + + u8 prod_num; + u8 sw_num; + const u8 *cal_regs; + int num_cal_regs; + + u8 prox_mask; + u8 sar_mask; + u8 hall_mask; + u8 hyst_mask; + u8 temp_mask; + u8 als_mask; + u8 ir_mask; + + u8 prox_settings; + u8 als_flags; + u8 hall_flags; + u8 hyst_shift; + + u8 interval; + u8 interval_div; + + u8 clk_div; + const char *fw_name; + const enum iqs62x_event_reg (*event_regs)[IQS62X_EVENT_SIZE]; +}; + +struct iqs62x_core { + const struct iqs62x_dev_desc *dev_desc; + struct i2c_client *client; + struct regmap *regmap; + struct blocking_notifier_head nh; + struct list_head fw_blk_head; + struct completion fw_done; + enum iqs62x_ui_sel ui_sel; +}; + +extern const struct iqs62x_event_desc iqs62x_events[IQS62X_NUM_EVENTS]; + +#endif /* __LINUX_MFD_IQS62X_H */ -- cgit v1.2.3 From 0c81604516afc0f3aedbb4dcf8215df7e5c7ef32 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 20 Mar 2020 09:11:00 +0100 Subject: mfd: rn5t618: Add IRQ support This adds support for IRQ handling in the RC5T619 which is required for properly implementing subdevices like RTC. For now only definitions for the variant RC5T619 are included. Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones --- include/linux/mfd/rn5t618.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h index d62ef48060b5..739571656f2b 100644 --- a/include/linux/mfd/rn5t618.h +++ b/include/linux/mfd/rn5t618.h @@ -242,9 +242,24 @@ enum { RC5T619, }; +/* RN5T618 IRQ definitions */ +enum { + RN5T618_IRQ_SYS = 0, + RN5T618_IRQ_DCDC, + RN5T618_IRQ_RTC, + RN5T618_IRQ_ADC, + RN5T618_IRQ_GPIO, + RN5T618_IRQ_CHG, + RN5T618_NR_IRQS, +}; + struct rn5t618 { struct regmap *regmap; + struct device *dev; long variant; + + int irq; + struct regmap_irq_chip_data *irq_data; }; #endif /* __LINUX_MFD_RN5T618_H */ -- cgit v1.2.3 From 11027ce6f1d2a20af16b0eae3d21d3ab78089262 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 20 Mar 2020 09:11:01 +0100 Subject: mfd: rn5t618: Add RTC related registers Defines for some RTC related registers were missing, also they were not included in the volatile register list Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones --- include/linux/mfd/rn5t618.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h index 739571656f2b..fba0df13d9a8 100644 --- a/include/linux/mfd/rn5t618.h +++ b/include/linux/mfd/rn5t618.h @@ -139,6 +139,17 @@ #define RN5T618_INTPOL 0x9c #define RN5T618_INTEN 0x9d #define RN5T618_INTMON 0x9e + +#define RN5T618_RTC_SECONDS 0xA0 +#define RN5T618_RTC_MDAY 0xA4 +#define RN5T618_RTC_MONTH 0xA5 +#define RN5T618_RTC_YEAR 0xA6 +#define RN5T618_RTC_ADJUST 0xA7 +#define RN5T618_RTC_ALARM_Y_SEC 0xA8 +#define RN5T618_RTC_DAL_MONTH 0xAC +#define RN5T618_RTC_CTRL1 0xAE +#define RN5T618_RTC_CTRL2 0xAF + #define RN5T618_PREVINDAC 0xb0 #define RN5T618_BATDAC 0xb1 #define RN5T618_CHGCTL1 0xb3 -- cgit v1.2.3 From 0008d0c3b1ab03b046b04b7bd9d70df1e2fffbfc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2020 16:08:26 +0100 Subject: iommu: Define dev_iommu_fwspec_get() for !CONFIG_IOMMU_API There are users outside of the IOMMU code that need to call that function. Define it for !CONFIG_IOMMU_API too so that compilation does not break. Reported-by: kbuild test robot Signed-off-by: Joerg Roedel Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200326150841.10083-2-joro@8bytes.org --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d1ba76c9a64..505163e9702a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1073,6 +1073,10 @@ static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, return -ENODEV; } +static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) +{ + return NULL; +} #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From 045a70426067d6a22e3e5745b55efc18fa75aabf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2020 16:08:30 +0100 Subject: iommu: Rename struct iommu_param to dev_iommu The term dev_iommu aligns better with other existing structures and their accessor functions. Signed-off-by: Joerg Roedel Tested-by: Will Deacon # arm-smmu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Greg Kroah-Hartman Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200326150841.10083-6-joro@8bytes.org --- include/linux/device.h | 6 +++--- include/linux/iommu.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0cd7c647c16c..af621f9fe85b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -44,7 +44,7 @@ struct iommu_ops; struct iommu_group; struct iommu_fwspec; struct dev_pin_info; -struct iommu_param; +struct dev_iommu; /** * struct subsys_interface - interfaces to device functions @@ -514,7 +514,7 @@ struct dev_links_info { * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. * @iommu_fwspec: IOMMU-specific properties supplied by firmware. - * @iommu_param: Per device generic IOMMU runtime data + * @iommu: Per device generic IOMMU runtime data * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). @@ -614,7 +614,7 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; struct iommu_fwspec *iommu_fwspec; - struct iommu_param *iommu_param; + struct dev_iommu *iommu; bool offline_disabled:1; bool offline:1; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 505163e9702a..843baaa65f10 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -365,7 +365,7 @@ struct iommu_fault_param { }; /** - * struct iommu_param - collection of per-device IOMMU data + * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data * @@ -373,7 +373,7 @@ struct iommu_fault_param { * struct iommu_group *iommu_group; * struct iommu_fwspec *iommu_fwspec; */ -struct iommu_param { +struct dev_iommu { struct mutex lock; struct iommu_fault_param *fault_param; }; -- cgit v1.2.3 From 72acd9df18f12420001f901493c54b7364f34d60 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2020 16:08:31 +0100 Subject: iommu: Move iommu_fwspec to struct dev_iommu Move the iommu_fwspec pointer in struct device into struct dev_iommu. This is a step in the effort to reduce the iommu related pointers in struct device to one. Signed-off-by: Joerg Roedel Tested-by: Will Deacon # arm-smmu Reviewed-by: Jean-Philippe Brucker Reviewed-by: Greg Kroah-Hartman Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200326150841.10083-7-joro@8bytes.org --- include/linux/device.h | 3 --- include/linux/iommu.h | 12 ++++++++---- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index af621f9fe85b..9610d0accd88 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -42,7 +42,6 @@ struct device_node; struct fwnode_handle; struct iommu_ops; struct iommu_group; -struct iommu_fwspec; struct dev_pin_info; struct dev_iommu; @@ -513,7 +512,6 @@ struct dev_links_info { * gone away. This should be set by the allocator of the * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. - * @iommu_fwspec: IOMMU-specific properties supplied by firmware. * @iommu: Per device generic IOMMU runtime data * * @offline_disabled: If set, the device is permanently online. @@ -613,7 +611,6 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; - struct iommu_fwspec *iommu_fwspec; struct dev_iommu *iommu; bool offline_disabled:1; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 843baaa65f10..d031ddc0596b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -368,14 +368,15 @@ struct iommu_fault_param { * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data + * @fwspec: IOMMU fwspec data * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; - * struct iommu_fwspec *iommu_fwspec; */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; + struct iommu_fault_param *fault_param; + struct iommu_fwspec *fwspec; }; int iommu_device_register(struct iommu_device *iommu); @@ -614,13 +615,16 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { - return dev->iommu_fwspec; + if (dev->iommu) + return dev->iommu->fwspec; + else + return NULL; } static inline void dev_iommu_fwspec_set(struct device *dev, struct iommu_fwspec *fwspec) { - dev->iommu_fwspec = fwspec; + dev->iommu->fwspec = fwspec; } int iommu_probe_device(struct device *dev); -- cgit v1.2.3 From f9867f416ee721141e1664810516b8ebc2563cdd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2020 16:08:33 +0100 Subject: iommu: Introduce accessors for iommu private data Add dev_iommu_priv_get/set() functions to access per-device iommu private data. This makes it easier to move the pointer to a different location. Signed-off-by: Joerg Roedel Tested-by: Will Deacon # arm-smmu Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200326150841.10083-9-joro@8bytes.org --- include/linux/iommu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d031ddc0596b..49e3173260b3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -627,6 +627,16 @@ static inline void dev_iommu_fwspec_set(struct device *dev, dev->iommu->fwspec = fwspec; } +static inline void *dev_iommu_priv_get(struct device *dev) +{ + return dev->iommu->fwspec->iommu_priv; +} + +static inline void dev_iommu_priv_set(struct device *dev, void *priv) +{ + dev->iommu->fwspec->iommu_priv = priv; +} + int iommu_probe_device(struct device *dev); void iommu_release_device(struct device *dev); -- cgit v1.2.3 From 986d5ecc56999800a5d112a70e88522d9212aefd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2020 16:08:41 +0100 Subject: iommu: Move fwspec->iommu_priv to struct dev_iommu Move the pointer for iommu private data from struct iommu_fwspec to struct dev_iommu. Signed-off-by: Joerg Roedel Tested-by: Will Deacon # arm-smmu Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20200326150841.10083-17-joro@8bytes.org --- include/linux/iommu.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 49e3173260b3..7ef8b0bda695 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -369,6 +369,7 @@ struct iommu_fault_param { * * @fault_param: IOMMU detected device fault reporting data * @fwspec: IOMMU fwspec data + * @priv: IOMMU Driver private data * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -377,6 +378,7 @@ struct dev_iommu { struct mutex lock; struct iommu_fault_param *fault_param; struct iommu_fwspec *fwspec; + void *priv; }; int iommu_device_register(struct iommu_device *iommu); @@ -589,7 +591,6 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); struct iommu_fwspec { const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; - void *iommu_priv; u32 flags; u32 num_pasid_bits; unsigned int num_ids; @@ -629,12 +630,12 @@ static inline void dev_iommu_fwspec_set(struct device *dev, static inline void *dev_iommu_priv_get(struct device *dev) { - return dev->iommu->fwspec->iommu_priv; + return dev->iommu->priv; } static inline void dev_iommu_priv_set(struct device *dev, void *priv) { - dev->iommu->fwspec->iommu_priv = priv; + dev->iommu->priv = priv; } int iommu_probe_device(struct device *dev); -- cgit v1.2.3 From 860f89e6182479149bb6c27f5f44989b0628a176 Mon Sep 17 00:00:00 2001 From: Benjamin Thiel Date: Thu, 26 Mar 2020 14:50:41 +0100 Subject: x86/efi: Add a prototype for efi_arch_mem_reserve() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... in order to fix a -Wmissing-ptototypes warning: arch/x86/platform/efi/quirks.c:245:13: warning: no previous prototype for ‘efi_arch_mem_reserve’ [-Wmissing-prototypes] \ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) Signed-off-by: Benjamin Thiel Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200326135041.3264-1-b.thiel@posteo.de --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7efd7072cca5..e4b28ae1ba61 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1703,4 +1703,6 @@ struct linux_efi_memreserve { void efi_pci_disable_bridge_busmaster(void); +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 6546b19f95acc986807de981402bbac6b3a94b0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:29 +0900 Subject: perf/core: Add PERF_SAMPLE_CGROUP feature The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in the sample. It will add a 64-bit id to identify current cgroup and it's the file handle in the cgroup file system. Userspace should use this information with PERF_RECORD_CGROUP event to match which cgroup it belongs. I put it before PERF_SAMPLE_AUX for simplicity since it just needs a 64-bit word. But if we want bigger samples, I can work on that direction too. Committer testing: $ pahole perf_sample_data | grep -w cgroup -B5 -A5 /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ struct perf_regs regs_intr; /* 312 16 */ /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */ u64 stack_user_size; /* 328 8 */ u64 phys_addr; /* 336 8 */ u64 cgroup; /* 344 8 */ /* size: 384, cachelines: 6, members: 22 */ /* padding: 32 */ }; $ Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8768a39b5258..9c3e7619c929 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1020,6 +1020,7 @@ struct perf_sample_data { u64 stack_user_size; u64 phys_addr; + u64 cgroup; } ____cacheline_aligned; /* default value for data source */ -- cgit v1.2.3 From 348e114bbd4dce430eae70f01a04c8fc259b4cf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:07:17 +0100 Subject: block: move the ->devnode callback to struct block_device_operations There really isn't any good reason to stash a method directly into struct gendisk. Move it together with the other block device operations. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/genhd.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53a1325efbc3..e8defd718d62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1697,6 +1697,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 927eed0be179..85b9e253cd39 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -191,7 +191,6 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned short events; /* supported events */ unsigned short event_flags; /* flags related to event processing */ -- cgit v1.2.3 From 2152862298fbfd237d37c231dfca8ae8f3ed0e48 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 Mar 2020 08:01:42 +0200 Subject: IB/mlx5: Limit the scope of struct mlx5_bfreg_info to mlx5_ib struct mlx5_bfreg_info is used by mlx5_ib only but is exposed to both RDMA and netdev parts of mlx5 driver. Move that struct to mlx5_ib namespace, clean vertical space alignment and convert lib_uar_4k from bool to bitfield. Link: https://lore.kernel.org/r/20200324060143.1569116-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1de78f001d26..a30d834fdf7e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -213,23 +213,6 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; -struct mlx5_bfreg_info { - u32 *sys_pages; - int num_low_latency_bfregs; - unsigned int *count; - - /* - * protect bfreg allocation data structs - */ - struct mutex lock; - u32 ver; - bool lib_uar_4k; - u32 num_sys_pages; - u32 num_static_sys_pages; - u32 total_num_bfregs; - u32 num_dyn_bfregs; -}; - struct mlx5_cmd_first { __be32 data[4]; }; -- cgit v1.2.3 From 9a81ef42b238b28829a46ecf13c7aacb79b9b3ac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 27 Mar 2020 11:53:09 -0400 Subject: SUNRPC/cache: don't allow invalid entries to be flushed Trond points out in commit 277f27e2f277 ("SUNRPC/cache: Allow garbage collection of invalid cache entries") that we allow invalid cache entries to persist indefinitely. That fix, however, reintroduces the problem fixed by Kinglong Mee's commit d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache as unexpired"), where an invalid cache entry is immediately removed by a flush before mountd responds to it. The result is that the server thread that should be waiting for mountd to fill in that entry instead gets an -ETIMEDOUT return from cache_check(). Symptoms are the server becoming unresponsive after a restart, reproduceable by running pynfs 4.1 test REBT5. Instead, take a compromise approach: allow invalid cache entries to be removed after they expire, but not to be removed by a cache flush. Fixes: 277f27e2f277 ("SUNRPC/cache: Allow garbage collection ... ") Signed-off-by: J. Bruce Fields Signed-off-by: Chuck Lever --- include/linux/sunrpc/cache.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 532cdbda43da..10891b70fc7b 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -209,8 +209,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time >= h->last_refresh); + if (h->expiry_time < seconds_since_boot()) + return true; + if (!test_bit(CACHE_VALID, &h->flags)) + return false; + return detail->flush_time >= h->last_refresh; } extern int cache_check(struct cache_detail *detail, -- cgit v1.2.3 From 2f227bb99934a4faa6dfe2cda2594bce8897a323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:30:08 +0100 Subject: block: add a blk_mq_init_queue_data helper This allows a driver to pass a queuedata member before ->init_hctx is called. null_blk currently open codes this logic, but I'd rather have it in the core to ease future maintainance. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 31344d5f83e2..f389d7c724bd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -412,6 +412,8 @@ enum { << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, + void *queuedata); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init); -- cgit v1.2.3 From 3d745ea5b095a3985129e162900b7e6c22518a9d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 09:30:11 +0100 Subject: block: simplify queue allocation Current make_request based drivers use either blk_alloc_queue_node or blk_alloc_queue to allocate a queue, and then set up the make_request_fn function pointer and a few parameters using the blk_queue_make_request helper. Simplify this by passing the make_request pointer to blk_alloc_queue, and while at it merge the _node variant into the main helper by always passing a node_id, and remove the superfluous gfp_mask parameter. A lower-level __blk_alloc_queue is kept for the blk-mq case. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e8defd718d62..3f27ff08483e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1063,7 +1063,6 @@ extern void blk_abort_request(struct request *); * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); -extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); @@ -1140,8 +1139,7 @@ extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); +struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From 24670b1a31661815777c2e88b94c162e47ea43fc Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 24 Mar 2020 08:14:24 +0200 Subject: net/mlx5: Add support for RDMA TX steering Add new RDMA TX flow steering namespace. Flow steering rules in this namespace are used to filter transmitted RDMA traffic. Link: https://lore.kernel.org/r/20200324061425.1570190-2-leon@kernel.org Signed-off-by: Michael Guralnik Reviewed-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/device.h | 6 ++++++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0e62c3db45e5..2b90097a6cf9 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1211,6 +1211,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_RDMA_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_rdma.cap) +#define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) + +#define MLX5_CAP_FLOWTABLE_RDMA_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_rdma.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4cae16016b2b..44c9fe792fc4 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -77,6 +77,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_EGRESS, MLX5_FLOW_NAMESPACE_RDMA_RX, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL, + MLX5_FLOW_NAMESPACE_RDMA_TX, }; enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9b8ff4e57002..600366ef2d84 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -705,7 +705,7 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; - u8 reserved_at_a00[0x200]; + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_rdma; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; -- cgit v1.2.3 From 130879f1ee0e25b0391b8c78b3baac6fe41f4d38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Mar 2020 18:48:37 +0100 Subject: block: move bio_map_* to blk-map.c The bio_map_* helpers are just the low-level helpers for the blk_rq_map_* APIs. Move them together for better logical grouping, as no there isn't much overlap with other code in bio.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a430e9c1c2d2..c1c0f9ea4e63 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -441,14 +441,6 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_release_pages(struct bio *bio, bool mark_dirty); -struct rq_map_data; -extern struct bio *bio_map_user_iov(struct request_queue *, - struct iov_iter *, gfp_t); -extern void bio_unmap_user(struct bio *); -extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, - gfp_t); -extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, - gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -463,12 +455,6 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); - -extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, - struct iov_iter *, - gfp_t); -extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); void guard_bio_eod(struct bio *bio); -- cgit v1.2.3 From 02694e86356dcf72d39329e52630234ad687e206 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 25 Mar 2020 10:49:54 -0700 Subject: block: add a zone condition debug helper Add a helper to stringify the zone conditions. We use this helper in the next patch to track zone conditions in tracepoints. Reviewed-by: Damien Le Moal Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f629d40c645c..a5acf17e7d76 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -952,6 +952,10 @@ static inline unsigned int blk_rq_stats_sectors(const struct request *rq) } #ifdef CONFIG_BLK_DEV_ZONED + +/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ +const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); + static inline unsigned int blk_rq_zone_no(struct request *rq) { return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); -- cgit v1.2.3 From c21e7168848d4ff4158120dbd4464f0d5cfb1456 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Mar 2020 13:36:36 -0400 Subject: NFSv4/pnfs: Support a list of commit arrays in struct pnfs_ds_commit_info When we have multiple layout segments with different lists of mirrored data, we need to track the commits on a per layout segment basis. This patch adds a list to support this tracking in struct pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e91c917c9c1c..9946787eda72 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1280,6 +1280,7 @@ struct pnfs_commit_array { }; struct pnfs_ds_commit_info { + struct list_head commits; unsigned int nwritten; unsigned int ncommitting; unsigned int nbuckets; -- cgit v1.2.3 From a9901899b649dc80ef75c14d6d78059cae14def7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 20 Mar 2020 16:04:06 -0400 Subject: pNFS: Add infrastructure for cleaning up per-layout commit structures Ensure that both the file and flexfiles layout types clean up when freeing the layout segments. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9946787eda72..33be2ee2a248 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1275,6 +1275,7 @@ struct pnfs_commit_array { struct list_head lseg_list; struct pnfs_layout_segment *lseg; struct rcu_head rcu; + refcount_t refcount; unsigned int nbuckets; struct pnfs_commit_bucket buckets[]; }; -- cgit v1.2.3 From 0aa647b7369dd29de0789c321111b2e4668c46b2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Mar 2020 09:50:05 -0400 Subject: NFS: Remove bucket array from struct pnfs_ds_commit_info Remove the unused bucket array in struct pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 33be2ee2a248..2903597ec88c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1284,8 +1284,6 @@ struct pnfs_ds_commit_info { struct list_head commits; unsigned int nwritten; unsigned int ncommitting; - unsigned int nbuckets; - struct pnfs_commit_bucket *buckets; }; struct nfs41_state_protection { @@ -1396,22 +1394,11 @@ struct nfs41_free_stateid_res { unsigned int status; }; -static inline void -nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) -{ - kfree(cinfo->buckets); -} - #else struct pnfs_ds_commit_info { }; -static inline void -nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) -{ -} - #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 -- cgit v1.2.3 From 9c455a8c1e146dac3a6d1405fe6a7096177b9546 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Mar 2020 11:13:05 -0400 Subject: NFS/pNFS: Clean up pNFS commit operations Move the pNFS commit related operations into a separate structure that can be carried by the pnfs_ds_commit_info. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2903597ec88c..adbbeae9ce5b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1284,6 +1284,7 @@ struct pnfs_ds_commit_info { struct list_head commits; unsigned int nwritten; unsigned int ncommitting; + const struct pnfs_commit_ops *ops; }; struct nfs41_state_protection { -- cgit v1.2.3 From c84bea59449aaa699a0600a50f59d441cc1d4501 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 22 Mar 2020 14:47:38 -0400 Subject: NFS/pNFS: Simplify bucket layout segment reference counting Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index adbbeae9ce5b..7bbb1f6fc1b1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1265,8 +1265,7 @@ struct nfstime4 { struct pnfs_commit_bucket { struct list_head written; struct list_head committing; - struct pnfs_layout_segment *wlseg; - struct pnfs_layout_segment *clseg; + struct pnfs_layout_segment *lseg; struct nfs_writeverf direct_verf; }; -- cgit v1.2.3 From c9b7a4a72ff64e67b7e877a99fd652230dc26058 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Mar 2020 17:32:32 -0400 Subject: ring-buffer/tracing: Have iterator acknowledge dropped events Have the ring_buffer_iterator set a flag if events were dropped as it were to go and peek at the next event. Have the trace file display this fact if it happened with a "LOST EVENTS" message. Link: http://lkml.kernel.org/r/20200317213417.045858900@goodmis.org Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 0ae603b79b0e..c76b2f3b3ac4 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -138,6 +138,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts); void ring_buffer_iter_advance(struct ring_buffer_iter *iter); void ring_buffer_iter_reset(struct ring_buffer_iter *iter); int ring_buffer_iter_empty(struct ring_buffer_iter *iter); +bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter); unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu); -- cgit v1.2.3 From 8ced32ffadc857eaa45d62c0c5a34cf6f37168ea Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Mar 2020 14:56:50 +0100 Subject: gpiolib: Introduce gpiod_set_config() The GPIO Aggregator will need a method to forward a .set_config() call to its parent gpiochip. This requires obtaining the gpio_chip and offset for a given gpio_desc. While gpiod_to_chip() is public, gpio_chip_hwgpio() is not, so there is currently no method to obtain the needed GPIO offset parameter. Hence introduce a public gpiod_set_config() helper, which invokes the .set_config() callback through a gpio_desc pointer, like is done for most other gpio_chip callbacks. Rewrite the existing gpiod_set_debounce() helper as a wrapper around gpiod_set_config(), to avoid duplication. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200324135653.6676-5-geert+renesas@glider.be Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 0a72fccf60ff..901aab89d025 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -157,6 +157,7 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_array *array_info, unsigned long *value_bitmap); +int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); @@ -473,6 +474,13 @@ static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, return 0; } +static inline int gpiod_set_config(struct gpio_desc *desc, unsigned long config) +{ + /* GPIO can never have been requested */ + WARN_ON(desc); + return -ENOSYS; +} + static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) { /* GPIO can never have been requested */ -- cgit v1.2.3 From 16efafa31bc1fd7c8646dccbf30eeef3ad495d5a Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 27 Mar 2020 15:05:32 +0530 Subject: PCI: Add new PCI_VPD_RO_KEYWORD_SERIALNO macro This patch adds a new macro for serial number keyword. Acked-by: Bjorn Helgaas Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fc54b8922e66..a048fba311d2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2184,6 +2184,7 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask); #define PCI_VPD_INFO_FLD_HDR_SIZE 3 #define PCI_VPD_RO_KEYWORD_PARTNO "PN" +#define PCI_VPD_RO_KEYWORD_SERIALNO "SN" #define PCI_VPD_RO_KEYWORD_MFR_ID "MN" #define PCI_VPD_RO_KEYWORD_VENDOR0 "V0" #define PCI_VPD_RO_KEYWORD_CHKSUM "RV" -- cgit v1.2.3 From ab41ca3455a208392ce95f4086d5708dc37bff86 Mon Sep 17 00:00:00 2001 From: Murali Krishna Policharla Date: Fri, 27 Mar 2020 21:55:40 +0200 Subject: net: phy: bcm7xx: add jumbo frame configuration to PHY The BCM7XX PHY family requires special configuration to pass jumbo frames. Do that during initial PHY setup. Signed-off-by: Murali Krishna Policharla Reviewed-by: Scott Branden Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index b475e7f20d28..6462c5447872 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -79,6 +79,7 @@ #define MII_BCM54XX_ECR 0x10 /* BCM54xx extended control register */ #define MII_BCM54XX_ECR_IM 0x1000 /* Interrupt mask */ #define MII_BCM54XX_ECR_IF 0x0800 /* Interrupt force */ +#define MII_BCM54XX_ECR_FIFOE 0x0001 /* FIFO elasticity */ #define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ #define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ @@ -119,6 +120,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x00 #define MII_BCM54XX_AUXCTL_ACTL_TX_6DB 0x0400 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 +#define MII_BCM54XX_AUXCTL_ACTL_EXT_PKT_LEN 0x4000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 -- cgit v1.2.3 From f970b977e068aa54e6eaf916a964a0abaf028afe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Mar 2020 17:00:15 -0300 Subject: mm/hmm: remove unused code and tidy comments Delete several functions that are never called, fix some desync between comments and structure content, toss the now out of date top of file header, and move one function only used by hmm.c into hmm.c Link: https://lore.kernel.org/r/20200327200021.29372-4-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 104 +--------------------------------------------------- 1 file changed, 2 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index bb6be4428633..daee6508a3f6 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -3,58 +3,8 @@ * Copyright 2013 Red Hat Inc. * * Authors: Jérôme Glisse - */ -/* - * Heterogeneous Memory Management (HMM) - * - * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it - * is for. Here we focus on the HMM API description, with some explanation of - * the underlying implementation. - * - * Short description: HMM provides a set of helpers to share a virtual address - * space between CPU and a device, so that the device can access any valid - * address of the process (while still obeying memory protection). HMM also - * provides helpers to migrate process memory to device memory, and back. Each - * set of functionality (address space mirroring, and migration to and from - * device memory) can be used independently of the other. - * - * - * HMM address space mirroring API: - * - * Use HMM address space mirroring if you want to mirror a range of the CPU - * page tables of a process into a device page table. Here, "mirror" means "keep - * synchronized". Prerequisites: the device must provide the ability to write- - * protect its page tables (at PAGE_SIZE granularity), and must be able to - * recover from the resulting potential page faults. * - * HMM guarantees that at any point in time, a given virtual address points to - * either the same memory in both CPU and device page tables (that is: CPU and - * device page tables each point to the same pages), or that one page table (CPU - * or device) points to no entry, while the other still points to the old page - * for the address. The latter case happens when the CPU page table update - * happens first, and then the update is mirrored over to the device page table. - * This does not cause any issue, because the CPU page table cannot start - * pointing to a new page until the device page table is invalidated. - * - * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any - * updates to each device driver that has registered a mirror. It also provides - * some API calls to help with taking a snapshot of the CPU page table, and to - * synchronize with any updates that might happen concurrently. - * - * - * HMM migration to and from device memory: - * - * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with - * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page - * of the device memory, and allows the device driver to manage its memory - * using those struct pages. Having struct pages for device memory makes - * migration easier. Because that memory is not addressable by the CPU it must - * never be pinned to the device; in other words, any CPU page fault can always - * cause the device memory to be migrated (copied/moved) back to regular memory. - * - * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that - * allows use of a device DMA engine to perform the copy operation between - * regular system memory and device memory. + * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. */ #ifndef LINUX_HMM_H #define LINUX_HMM_H @@ -120,9 +70,6 @@ enum hmm_pfn_value_e { * * @notifier: a mmu_interval_notifier that includes the start/end * @notifier_seq: result of mmu_interval_read_begin() - * @hmm: the core HMM structure this range is active against - * @vma: the vm area struct for the range - * @list: all range lock are on a list * @start: range virtual start address (inclusive) * @end: range virtual end address (exclusive) * @pfns: array of pfns (big enough for the range) @@ -130,8 +77,7 @@ enum hmm_pfn_value_e { * @values: pfn value for some special case (none, special, error, ...) * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter - * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) - * @valid: pfns array did not change since it has been fill by an HMM function + * @pfn_shift: pfn shift value (should be <= PAGE_SHIFT) * @dev_private_owner: owner of device private pages */ struct hmm_range { @@ -171,52 +117,6 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); } -/* - * hmm_device_entry_to_pfn() - return pfn value store in a device entry - * @range: range use to decode device entry value - * @entry: device entry to extract pfn from - * Return: pfn value if device entry is valid, -1UL otherwise - */ -static inline unsigned long -hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) -{ - if (pfn == range->values[HMM_PFN_NONE]) - return -1UL; - if (pfn == range->values[HMM_PFN_ERROR]) - return -1UL; - if (pfn == range->values[HMM_PFN_SPECIAL]) - return -1UL; - if (!(pfn & range->flags[HMM_PFN_VALID])) - return -1UL; - return (pfn >> range->pfn_shift); -} - -/* - * hmm_device_entry_from_page() - create a valid device entry for a page - * @range: range use to encode HMM pfn value - * @page: page for which to create the device entry - * Return: valid device entry for the page - */ -static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, - struct page *page) -{ - return (page_to_pfn(page) << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - -/* - * hmm_device_entry_from_pfn() - create a valid device entry value from pfn - * @range: range use to encode HMM pfn value - * @pfn: pfn value for which to create the device entry - * Return: valid device entry for the pfn - */ -static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, - unsigned long pfn) -{ - return (pfn << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) -- cgit v1.2.3 From 6bfef2f9194519ca23dee405a9f4db461a7a7826 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Mar 2020 17:00:16 -0300 Subject: mm/hmm: remove HMM_FAULT_SNAPSHOT Now that flags are handled on a fine-grained per-page basis this global flag is redundant and has a confusing overlap with the pfn_flags_mask and default_flags. Normalize the HMM_FAULT_SNAPSHOT behavior into one place. Callers needing the SNAPSHOT behavior should set a pfn_flags_mask and default_flags that always results in a cleared HMM_PFN_VALID. Then no pages will be faulted, and HMM_FAULT_SNAPSHOT is not a special flow that overrides the masking mechanism. As this is the last flag, also remove the flags argument. If future flags are needed they can be part of the struct hmm_range function arguments. Link: https://lore.kernel.org/r/20200327200021.29372-5-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index daee6508a3f6..7475051100c7 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -117,13 +117,10 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); } -/* Don't fault in missing PTEs, just snapshot the current state. */ -#define HMM_FAULT_SNAPSHOT (1 << 1) - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -long hmm_range_fault(struct hmm_range *range, unsigned int flags); +long hmm_range_fault(struct hmm_range *range); /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range -- cgit v1.2.3 From f318903c0bf42448b4c884732df2bbb0ef7a2284 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Mar 2020 16:58:52 +0100 Subject: bpf: Add netns cookie and enable it for bpf cgroup hooks In Cilium we're mainly using BPF cgroup hooks today in order to implement kube-proxy free Kubernetes service translation for ClusterIP, NodePort (*), ExternalIP, and LoadBalancer as well as HostPort mapping [0] for all traffic between Cilium managed nodes. While this works in its current shape and avoids packet-level NAT for inter Cilium managed node traffic, there is one major limitation we're facing today, that is, lack of netns awareness. In Kubernetes, the concept of Pods (which hold one or multiple containers) has been built around network namespaces, so while we can use the global scope of attaching to root BPF cgroup hooks also to our advantage (e.g. for exposing NodePort ports on loopback addresses), we also have the need to differentiate between initial network namespaces and non-initial one. For example, ExternalIP services mandate that non-local service IPs are not to be translated from the host (initial) network namespace as one example. Right now, we have an ugly work-around in place where non-local service IPs for ExternalIP services are not xlated from connect() and friends BPF hooks but instead via less efficient packet-level NAT on the veth tc ingress hook for Pod traffic. On top of determining whether we're in initial or non-initial network namespace we also have a need for a socket-cookie like mechanism for network namespaces scope. Socket cookies have the nice property that they can be combined as part of the key structure e.g. for BPF LRU maps without having to worry that the cookie could be recycled. We are planning to use this for our sessionAffinity implementation for services. Therefore, add a new bpf_get_netns_cookie() helper which would resolve both use cases at once: bpf_get_netns_cookie(NULL) would provide the cookie for the initial network namespace while passing the context instead of NULL would provide the cookie from the application's network namespace. We're using a hole, so no size increase; the assignment happens only once. Therefore this allows for a comparison on initial namespace as well as regular cookie usage as we have today with socket cookies. We could later on enable this helper for other program types as well as we would see need. (*) Both externalTrafficPolicy={Local|Cluster} types [0] https://github.com/cilium/cilium/blob/master/bpf/bpf_sock.c Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/c47d2346982693a9cf9da0e12690453aded4c788.1585323121.git.daniel@iogearbox.net --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bdb981c204fa..78046c570596 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -233,6 +233,7 @@ enum bpf_arg_type { ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ + ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ -- cgit v1.2.3 From 0f09abd105da6c37713d2b253730a86cb45e127a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 27 Mar 2020 16:58:54 +0100 Subject: bpf: Enable bpf cgroup hooks to retrieve cgroup v2 and ancestor id Enable the bpf_get_current_cgroup_id() helper for connect(), sendmsg(), recvmsg() and bind-related hooks in order to retrieve the cgroup v2 context which can then be used as part of the key for BPF map lookups, for example. Given these hooks operate in process context 'current' is always valid and pointing to the app that is performing mentioned syscalls if it's subject to a v2 cgroup. Also with same motivation of commit 7723628101aa ("bpf: Introduce bpf_skb_ancestor_cgroup_id helper") enable retrieval of ancestor from current so the cgroup id can be used for policy lookups which can then forbid connect() / bind(), for example. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/d2a7ef42530ad299e3cbb245e6c12374b72145ef.1585323121.git.daniel@iogearbox.net --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 78046c570596..372708eeaecd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1501,6 +1501,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; -- cgit v1.2.3 From e98eac6ff1b45e4e73f2e6031b37c256ccb5d36b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 27 Mar 2020 12:06:44 +0100 Subject: cpu/hotplug: Ignore pm_wakeup_pending() for disable_nonboot_cpus() A recent change to freeze_secondary_cpus() which added an early abort if a wakeup is pending missed the fact that the function is also invoked for shutdown, reboot and kexec via disable_nonboot_cpus(). In case of disable_nonboot_cpus() the wakeup event needs to be ignored as the purpose is to terminate the currently running kernel. Add a 'suspend' argument which is only set when the freeze is in context of a suspend operation. If not set then an eventually pending wakeup event is ignored. Fixes: a66d955e910a ("cpu/hotplug: Abort disabling secondary CPUs if wakeup is pending") Reported-by: Boqun Feng Signed-off-by: Thomas Gleixner Cc: Pavankumar Kondeti Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/874kuaxdiz.fsf@nanos.tec.linutronix.de --- include/linux/cpu.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9ead281157d3..beaed2dc269e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -144,12 +144,18 @@ static inline void get_online_cpus(void) { cpus_read_lock(); } static inline void put_online_cpus(void) { cpus_read_unlock(); } #ifdef CONFIG_PM_SLEEP_SMP -extern int freeze_secondary_cpus(int primary); +int __freeze_secondary_cpus(int primary, bool suspend); +static inline int freeze_secondary_cpus(int primary) +{ + return __freeze_secondary_cpus(primary, true); +} + static inline int disable_nonboot_cpus(void) { - return freeze_secondary_cpus(0); + return __freeze_secondary_cpus(0, false); } -extern void enable_nonboot_cpus(void); + +void enable_nonboot_cpus(void); static inline int suspend_disable_secondary_cpus(void) { -- cgit v1.2.3 From f1e67e355c2aafeddf1eac31335709236996d2fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Nov 2019 14:28:24 +0100 Subject: fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t Bit spinlocks are problematic if PREEMPT_RT is enabled, because they disable preemption, which is undesired for latency reasons and breaks when regular spinlocks are taken within the bit_spinlock locked region because regular spinlocks are converted to 'sleeping spinlocks' on RT. PREEMPT_RT replaced the bit spinlocks with regular spinlocks to avoid this problem. The replacement was done conditionaly at compile time, but Christoph requested to do an unconditional conversion. Jan suggested to move the spinlock into a existing padding hole which avoids a size increase of struct buffer_head on production kernels. As a benefit the lock gains lockdep coverage. [ bigeasy: Remove the wrapper and use always spinlock_t and move it into the padding hole ] Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Jan Kara Cc: Christoph Hellwig Link: https://lkml.kernel.org/r/20191118132824.rclhrbujqh4b4g4d@linutronix.de --- include/linux/buffer_head.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7b73ef7f902d..e0b020eaf32e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -22,9 +22,6 @@ enum bh_state_bits { BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ - BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise - * IO completion of other buffers in the page - */ BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ @@ -76,6 +73,9 @@ struct buffer_head { struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ + spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to + * serialise IO completion of other + * buffers in the page */ }; /* -- cgit v1.2.3 From be06c1b42eea749547d2f0248dc0a7c1153f67b9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 23 Mar 2020 17:26:01 -0700 Subject: PCI/DPC: Move DPC data into struct pci_dev We only need 25 bits of data for DPC, so I don't think it's worth the complexity of allocating and keeping track of the struct dpc_dev separately from the pci_dev. Move that data into the struct pci_dev. Link: https://lore.kernel.org/r/98323eaa18080adbe5bb30846862f09f8722d4b3.1585000084.git.sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..a0b7e7a53741 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -444,6 +444,11 @@ struct pci_dev { const struct attribute_group **msi_irq_groups; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCIE_DPC + u16 dpc_cap; + unsigned int dpc_rp_extensions:1; + u8 dpc_rp_log_size; +#endif #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */ -- cgit v1.2.3 From ac1c8e35a3262d04cc81b07fac6480a3539e3b0f Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 23 Mar 2020 17:26:07 -0700 Subject: PCI/DPC: Add Error Disconnect Recover (EDR) support Error Disconnect Recover (EDR) is a feature that allows ACPI firmware to notify OSPM that a device has been disconnected due to an error condition (ACPI v6.3, sec 5.6.6). OSPM advertises its support for EDR on PCI devices via _OSC (see [1], sec 4.5.1, table 4-4). The OSPM EDR notify handler should invalidate software state associated with disconnected devices and may attempt to recover them. OSPM communicates the status of recovery to the firmware via _OST (sec 6.3.5.2). For PCIe, firmware may use Downstream Port Containment (DPC) to support EDR. Per [1], sec 4.5.1, table 4-6, even if firmware has retained control of DPC, OSPM may read/write DPC control and status registers during the EDR notification processing window, i.e., from the time it receives an EDR notification until it clears the DPC Trigger Status. Note that per [1], sec 4.5.1 and 4.5.2.4, 1. If the OS supports EDR, it should advertise that to firmware by setting OSC_PCI_EDR_SUPPORT in _OSC Support. 2. If the OS sets OSC_PCI_EXPRESS_DPC_CONTROL in _OSC Control to request control of the DPC capability, it must also set OSC_PCI_EDR_SUPPORT in _OSC Support. Add an EDR notify handler to attempt recovery. [1] Downstream Port Containment Related Enhancements ECN, Jan 28, 2019, affecting PCI Firmware Specification, Rev. 3.2 https://members.pcisig.com/wg/PCI-SIG/document/12888 [bhelgaas: squash add/enable patches into one] Link: https://lore.kernel.org/r/90f91fe6d25c13f9d2255d2ce97ca15be307e1bb.1585000084.git.sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Len Brown --- include/linux/acpi.h | 6 ++++-- include/linux/pci-acpi.h | 8 ++++++++ include/linux/pci.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0f24d701fbdc..b7d3caf6f205 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -530,8 +530,9 @@ extern bool osc_pc_lpi_support_confirmed; #define OSC_PCI_CLOCK_PM_SUPPORT 0x00000004 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT 0x00000008 #define OSC_PCI_MSI_SUPPORT 0x00000010 +#define OSC_PCI_EDR_SUPPORT 0x00000080 #define OSC_PCI_HPX_TYPE_3_SUPPORT 0x00000100 -#define OSC_PCI_SUPPORT_MASKS 0x0000011f +#define OSC_PCI_SUPPORT_MASKS 0x0000019f /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */ #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL 0x00000001 @@ -540,7 +541,8 @@ extern bool osc_pc_lpi_support_confirmed; #define OSC_PCI_EXPRESS_AER_CONTROL 0x00000008 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL 0x00000010 #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 -#define OSC_PCI_CONTROL_MASKS 0x0000003f +#define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +#define OSC_PCI_CONTROL_MASKS 0x000000bf #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 62b7fdcc661c..2d155bfb8fbf 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -112,6 +112,14 @@ extern const guid_t pci_acpi_dsm_guid; #define RESET_DELAY_DSM 0x08 #define FUNCTION_DELAY_DSM 0x09 +#ifdef CONFIG_PCIE_EDR +void pci_acpi_add_edr_notifier(struct pci_dev *pdev); +void pci_acpi_remove_edr_notifier(struct pci_dev *pdev); +#else +static inline void pci_acpi_add_edr_notifier(struct pci_dev *pdev) { } +static inline void pci_acpi_remove_edr_notifier(struct pci_dev *pdev) { } +#endif /* CONFIG_PCIE_EDR */ + #else /* CONFIG_ACPI */ static inline void acpi_pci_add_bus(struct pci_bus *bus) { } static inline void acpi_pci_remove_bus(struct pci_bus *bus) { } diff --git a/include/linux/pci.h b/include/linux/pci.h index a0b7e7a53741..7ed7c088c952 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -515,6 +515,7 @@ struct pci_host_bridge { unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */ unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ + unsigned int native_dpc:1; /* OS may use PCIe DPC */ unsigned int preserve_config:1; /* Preserve FW resource setup */ /* Resource alignment requirements */ -- cgit v1.2.3 From 894020fdd88c1e9a74c60b67c0f19f1c7696ba2f Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 23 Mar 2020 17:26:08 -0700 Subject: PCI/AER: Rationalize error status register clearing The AER interfaces to clear error status registers were a confusing mess: - pci_cleanup_aer_uncorrect_error_status() cleared non-fatal errors from the Uncorrectable Error Status register. - pci_aer_clear_fatal_status() cleared fatal errors from the Uncorrectable Error Status register. - pci_cleanup_aer_error_status_regs() cleared the Root Error Status register (for Root Ports), the Uncorrectable Error Status register, and the Correctable Error Status register. Rename them to make them consistent: From To ---------------------------------------- ------------------------------- pci_cleanup_aer_uncorrect_error_status() pci_aer_clear_nonfatal_status() pci_aer_clear_fatal_status() pci_aer_clear_fatal_status() pci_cleanup_aer_error_status_regs() pci_aer_clear_status() Since pci_cleanup_aer_error_status_regs() (renamed to pci_aer_clear_status()) is only used within drivers/pci/, move the declaration from to drivers/pci/pci.h. [bhelgaas: commit log, add renames] Link: https://lore.kernel.org/r/d1310a75dc3d28f7e8da4e99c45fbd3e60fe238e.1585000084.git.sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas --- include/linux/aer.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index fa19e01f418a..97f64ba1b34a 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -44,8 +44,7 @@ struct aer_capability_regs { /* PCIe port driver needs this function to enable AER */ int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); +int pci_aer_clear_nonfatal_status(struct pci_dev *dev); void pci_save_aer_state(struct pci_dev *dev); void pci_restore_aer_state(struct pci_dev *dev); #else @@ -57,11 +56,7 @@ static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev) { return -EINVAL; } -static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) -{ - return -EINVAL; -} -static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } -- cgit v1.2.3 From cee416a347440628762db2257ff921ccf9f66923 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:32 -0700 Subject: platform/chrome: cros_ec_sensorhub: Add the number of sensors in sensorhub To better manage resources, store the number of sensors reported by the EC. Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_sensorhub.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index bef7ffc7fce1..7e46a47fd642 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -22,9 +22,11 @@ struct cros_ec_sensor_platform { * struct cros_ec_sensorhub - Sensor Hub device data. * * @ec: Embedded Controller where the hub is located. + * @sensor_num: Number of MEMS sensors present in the EC. */ struct cros_ec_sensorhub { struct cros_ec_dev *ec; + int sensor_num; }; #endif /* __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H */ -- cgit v1.2.3 From 145d59baff5944b71551ac518d7fd7d377a9c820 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:33 -0700 Subject: platform/chrome: cros_ec_sensorhub: Add FIFO support cros_ec_sensorhub registers a listener and query motion sense FIFO, spread to iio sensors registers. To test, we can use libiio: iiod& iio_readdev -u ip:localhost -T 10000 -s 25 -b 16 cros-ec-gyro | od -x Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Acked-by: Andy Shevchenko Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_sensorhub.h | 76 +++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index 7e46a47fd642..b0950814f820 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -8,8 +8,13 @@ #ifndef __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H #define __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H +#include +#include +#include #include +struct iio_dev; + /** * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information. * @sensor_num: Id of the sensor, as reported by the EC. @@ -18,15 +23,86 @@ struct cros_ec_sensor_platform { u8 sensor_num; }; +/** + * typedef cros_ec_sensorhub_push_data_cb_t - Callback function to send datum + * to specific sensors. + * + * @indio_dev: The IIO device that will process the sample. + * @data: Vector array of the ring sample. + * @timestamp: Timestamp in host timespace when the sample was acquired by + * the EC. + */ +typedef int (*cros_ec_sensorhub_push_data_cb_t)(struct iio_dev *indio_dev, + s16 *data, + s64 timestamp); + +struct cros_ec_sensorhub_sensor_push_data { + struct iio_dev *indio_dev; + cros_ec_sensorhub_push_data_cb_t push_data_cb; +}; + +enum { + CROS_EC_SENSOR_LAST_TS, + CROS_EC_SENSOR_NEW_TS, + CROS_EC_SENSOR_ALL_TS +}; + +struct cros_ec_sensors_ring_sample { + u8 sensor_id; + u8 flag; + s16 vector[3]; + s64 timestamp; +} __packed; + /** * struct cros_ec_sensorhub - Sensor Hub device data. * + * @dev: Device object, mostly used for logging. * @ec: Embedded Controller where the hub is located. * @sensor_num: Number of MEMS sensors present in the EC. + * @msg: Structure to send FIFO requests. + * @params: Pointer to parameters in msg. + * @resp: Pointer to responses in msg. + * @cmd_lock : Lock for sending msg. + * @notifier: Notifier to kick the FIFO interrupt. + * @ring: Preprocessed ring to store events. + * @fifo_timestamp: array for event timestamp and spreading. + * @fifo_info: copy of FIFO information coming from the EC. + * @fifo_size: size of the ring. + * @push_data: array of callback to send datums to iio sensor object. */ struct cros_ec_sensorhub { + struct device *dev; struct cros_ec_dev *ec; int sensor_num; + + struct cros_ec_command *msg; + struct ec_params_motion_sense *params; + struct ec_response_motion_sense *resp; + struct mutex cmd_lock; /* Lock for protecting msg structure. */ + + struct notifier_block notifier; + + struct cros_ec_sensors_ring_sample *ring; + + ktime_t fifo_timestamp[CROS_EC_SENSOR_ALL_TS]; + struct ec_response_motion_sense_fifo_info *fifo_info; + int fifo_size; + + struct cros_ec_sensorhub_sensor_push_data *push_data; }; +int cros_ec_sensorhub_register_push_data(struct cros_ec_sensorhub *sensorhub, + u8 sensor_num, + struct iio_dev *indio_dev, + cros_ec_sensorhub_push_data_cb_t cb); + +void cros_ec_sensorhub_unregister_push_data(struct cros_ec_sensorhub *sensorhub, + u8 sensor_num); + +int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub); +void cros_ec_sensorhub_ring_remove(void *arg); +int cros_ec_sensorhub_ring_fifo_enable(struct cros_ec_sensorhub *sensorhub, + bool on); + #endif /* __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H */ -- cgit v1.2.3 From 93fe48a585905675719835f8269258736de0948f Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:35 -0700 Subject: platform/chrome: cros_ec_sensorhub: Add median filter Events are timestamped in EC time space, their timestamps need to be converted in host time space. The assumption is the time delta between when the interrupt is sent by the EC and when it is receive by the host is a [small] constant. This is not always true, even with hard-wired interrupt. To mitigate worst offenders, add a median filter to weed out bigger than expected delays. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Acked-by: Lee Jones Acked-by: Andy Shevchenko Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_sensorhub.h | 93 +++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index b0950814f820..c588be843f61 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -54,7 +54,64 @@ struct cros_ec_sensors_ring_sample { s64 timestamp; } __packed; +/* State used for cros_ec_ring_fix_overflow */ +struct cros_ec_sensors_ec_overflow_state { + s64 offset; + s64 last; +}; + +/* Length of the filter, how long to remember entries for */ +#define CROS_EC_SENSORHUB_TS_HISTORY_SIZE 64 + /** + * struct cros_ec_sensors_ts_filter_state - Timestamp filetr state. + * + * @x_offset: x is EC interrupt time. x_offset its last value. + * @y_offset: y is the difference between AP and EC time, y_offset its last + * value. + * @x_history: The past history of x, relative to x_offset. + * @y_history: The past history of y, relative to y_offset. + * @m_history: rate between y and x. + * @history_len: Amount of valid historic data in the arrays. + * @temp_buf: Temporary buffer used when updating the filter. + * @median_m: median value of m_history + * @median_error: final error to apply to AP interrupt timestamp to get the + * "true timestamp" the event occurred. + */ +struct cros_ec_sensors_ts_filter_state { + s64 x_offset, y_offset; + s64 x_history[CROS_EC_SENSORHUB_TS_HISTORY_SIZE]; + s64 y_history[CROS_EC_SENSORHUB_TS_HISTORY_SIZE]; + s64 m_history[CROS_EC_SENSORHUB_TS_HISTORY_SIZE]; + int history_len; + + s64 temp_buf[CROS_EC_SENSORHUB_TS_HISTORY_SIZE]; + + s64 median_m; + s64 median_error; +}; + +/* struct cros_ec_sensors_ts_batch_state - State of batch of a single sensor. + * + * Use to store information to batch data using median fileter information. + * + * @penul_ts: last but one batch timestamp (penultimate timestamp). + * Used for timestamp spreading calculations + * when a batch shows up. + * @penul_len: last but one batch length. + * @last_ts: Last batch timestam. + * @last_len: Last batch length. + * @newest_sensor_event: Last sensor timestamp. + */ +struct cros_ec_sensors_ts_batch_state { + s64 penul_ts; + int penul_len; + s64 last_ts; + int last_len; + s64 newest_sensor_event; +}; + +/* * struct cros_ec_sensorhub - Sensor Hub device data. * * @dev: Device object, mostly used for logging. @@ -66,10 +123,26 @@ struct cros_ec_sensors_ring_sample { * @cmd_lock : Lock for sending msg. * @notifier: Notifier to kick the FIFO interrupt. * @ring: Preprocessed ring to store events. - * @fifo_timestamp: array for event timestamp and spreading. - * @fifo_info: copy of FIFO information coming from the EC. - * @fifo_size: size of the ring. - * @push_data: array of callback to send datums to iio sensor object. + * @fifo_timestamp: Array for event timestamp and spreading. + * @fifo_info: Copy of FIFO information coming from the EC. + * @fifo_size: Size of the ring. + * @batch_state: Per sensor information of the last batches received. + * @overflow_a: For handling timestamp overflow for a time (sensor events) + * @overflow_b: For handling timestamp overflow for b time (ec interrupts) + * @filter: Medium fileter structure. + * @tight_timestamps: Set to truen when EC support tight timestamping: + * The timestamps reported from the EC have low jitter. + * Timestamps also come before every sample. Set either + * by feature bits coming from the EC or userspace. + * @future_timestamp_count: Statistics used to compute shaved time. + * This occurs when timestamp interpolation from EC + * time to AP time accidentally puts timestamps in + * the future. These timestamps are clamped to + * `now` and these count/total_ns maintain the + * statistics for how much time was removed in a + * given period. + * @future_timestamp_total_ns: Total amount of time shaved. + * @push_data: Array of callback to send datums to iio sensor object. */ struct cros_ec_sensorhub { struct device *dev; @@ -89,6 +162,18 @@ struct cros_ec_sensorhub { struct ec_response_motion_sense_fifo_info *fifo_info; int fifo_size; + struct cros_ec_sensors_ts_batch_state *batch_state; + + struct cros_ec_sensors_ec_overflow_state overflow_a; + struct cros_ec_sensors_ec_overflow_state overflow_b; + + struct cros_ec_sensors_ts_filter_state filter; + + int tight_timestamps; + + s32 future_timestamp_count; + s64 future_timestamp_total_ns; + struct cros_ec_sensorhub_sensor_push_data *push_data; }; -- cgit v1.2.3 From d9452adcc5b485ab1b50352d9356cde75ae6ac0e Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:36 -0700 Subject: iio: cros_ec: Move function description to .c file To prevent comment rot, move function description to cros_ec_sensors_core.c. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 80 ------------------------- 1 file changed, 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index bb331e6356a9..0af918978f97 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -79,95 +79,25 @@ struct cros_ec_sensors_core_state { int frequencies[3]; }; -/** - * cros_ec_sensors_read_lpc() - retrieve data from EC shared memory - * @indio_dev: pointer to IIO device - * @scan_mask: bitmap of the sensor indices to scan - * @data: location to store data - * - * This is the safe function for reading the EC data. It guarantees that the - * data sampled was not modified by the EC while being read. - * - * Return: 0 on success, -errno on failure. - */ int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev, unsigned long scan_mask, s16 *data); -/** - * cros_ec_sensors_read_cmd() - retrieve data using the EC command protocol - * @indio_dev: pointer to IIO device - * @scan_mask: bitmap of the sensor indices to scan - * @data: location to store data - * - * Return: 0 on success, -errno on failure. - */ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, s16 *data); struct platform_device; -/** - * cros_ec_sensors_core_init() - basic initialization of the core structure - * @pdev: platform device created for the sensors - * @indio_dev: iio device structure of the device - * @physical_device: true if the device refers to a physical device - * - * Return: 0 on success, -errno on failure. - */ int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device); -/** - * cros_ec_sensors_capture() - the trigger handler function - * @irq: the interrupt number. - * @p: a pointer to the poll function. - * - * On a trigger event occurring, if the pollfunc is attached then this - * handler is called as a threaded interrupt (and hence may sleep). It - * is responsible for grabbing data from the device and pushing it into - * the associated buffer. - * - * Return: IRQ_HANDLED - */ irqreturn_t cros_ec_sensors_capture(int irq, void *p); -/** - * cros_ec_motion_send_host_cmd() - send motion sense host command - * @st: pointer to state information for device - * @opt_length: optional length to reduce the response size, useful on the data - * path. Otherwise, the maximal allowed response size is used - * - * When called, the sub-command is assumed to be set in param->cmd. - * - * Return: 0 on success, -errno on failure. - */ int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *st, u16 opt_length); -/** - * cros_ec_sensors_core_read() - function to request a value from the sensor - * @st: pointer to state information for device - * @chan: channel specification structure table - * @val: will contain one element making up the returned value - * @val2: will contain another element making up the returned value - * @mask: specifies which values to be requested - * - * Return: the type of value returned by the device - */ int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st, struct iio_chan_spec const *chan, int *val, int *val2, long mask); -/** - * cros_ec_sensors_core_read_avail() - get available values - * @indio_dev: pointer to state information for device - * @chan: channel specification structure table - * @vals: list of available values - * @type: type of data returned - * @length: number of data returned in the array - * @mask: specifies which values to be requested - * - * Return: an error code, IIO_AVAIL_RANGE or IIO_AVAIL_LIST - */ int cros_ec_sensors_core_read_avail(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, const int **vals, @@ -175,16 +105,6 @@ int cros_ec_sensors_core_read_avail(struct iio_dev *indio_dev, int *length, long mask); -/** - * cros_ec_sensors_core_write() - function to write a value to the sensor - * @st: pointer to state information for device - * @chan: channel specification structure table - * @val: first part of value to write - * @val2: second part of value to write - * @mask: specifies which values to write - * - * Return: the type of value returned by the device - */ int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st, struct iio_chan_spec const *chan, int val, int val2, long mask); -- cgit v1.2.3 From 69f0793eb60dacd153388974bbaaa1d3184d171d Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:37 -0700 Subject: iio: expose iio_device_set_clock Some IIO devices may want to override the default (realtime) to another clock source by default. It can beneficial when timestamps coming from the hardware or underlying drivers are already in that format. It can always be overridden by attribute current_timestamp_clock. Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/iio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 862ce0019eba..b18f34a8901f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -627,6 +627,8 @@ static inline clockid_t iio_device_get_clock(const struct iio_dev *indio_dev) return indio_dev->clock_id; } +int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id); + /** * dev_to_iio_dev() - Get IIO device struct from a device struct * @dev: The device embedded in the IIO device -- cgit v1.2.3 From aa984f1ba4a477c8ea39d2fa975a4f8de8a126e9 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:38 -0700 Subject: iio: cros_ec: Register to cros_ec_sensorhub when EC supports FIFO When EC supports FIFO, each IIO device registers a callback, to put samples in the buffer when they arrives from the FIFO. When no FIFO, the user space app needs to call trigger_new, or better register a high precision timer. Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index 0af918978f97..b8f573ca9dcc 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -12,6 +12,7 @@ #include #include #include +#include enum { CROS_EC_SENSOR_X, @@ -32,6 +33,8 @@ enum { /* Minimum sampling period to use when device is suspending */ #define CROS_EC_MIN_SUSPEND_SAMPLING_FREQUENCY 1000 /* 1 second */ +typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); + /** * struct cros_ec_sensors_core_state - state data for EC sensors IIO driver * @ec: cros EC device structure @@ -87,9 +90,14 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, struct platform_device; int cros_ec_sensors_core_init(struct platform_device *pdev, - struct iio_dev *indio_dev, bool physical_device); + struct iio_dev *indio_dev, bool physical_device, + cros_ec_sensors_capture_t trigger_capture, + cros_ec_sensorhub_push_data_cb_t push_data); irqreturn_t cros_ec_sensors_capture(int irq, void *p); +int cros_ec_sensors_push_data(struct iio_dev *indio_dev, + s16 *data, + s64 timestamp); int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *st, u16 opt_length); -- cgit v1.2.3 From 2861be4ca9125ee1b7c49895948ca4236449a7fe Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:39 -0700 Subject: iio: cros_ec: Remove pm function Since cros_ec_sensorhub is shutting down the FIFO when the device suspends, no need to slow down the EC sampling period rate. It was necesseary to do that before command CMD_FIFO_INT_ENABLE was introduced, but now all supported chromebooks have it. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Acked-by: Lee Jones Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index b8f573ca9dcc..96ea4551945e 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -30,9 +30,6 @@ enum { */ #define CROS_EC_SAMPLE_SIZE (sizeof(s64) * 2) -/* Minimum sampling period to use when device is suspending */ -#define CROS_EC_MIN_SUSPEND_SAMPLING_FREQUENCY 1000 /* 1 second */ - typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); /** @@ -117,8 +114,6 @@ int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st, struct iio_chan_spec const *chan, int val, int val2, long mask); -extern const struct dev_pm_ops cros_ec_sensors_pm_ops; - /* List of extended channel specification for all sensors */ extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[]; -- cgit v1.2.3 From 6562793b55c58b6b1dcb9cd581c7905afc25e89f Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:40 -0700 Subject: iio: cros_ec: Expose hwfifo_timeout Expose EC minimal interrupt period through buffer/hwfifo_timeout: - Maximal timeout is limited to 65s. - When timeout for all sensors is set to 0, EC will not send events, even if the sensor sampling rate is greater than 0. Rename frequency to sampling_frequency to match IIO ABI. Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index 96ea4551945e..5b0acc14c891 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -50,7 +50,6 @@ typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); * the timestamp. The timestamp is always last and * is always 8-byte aligned. * @read_ec_sensors_data: function used for accessing sensors values - * @cuur_sampl_freq: current sampling period */ struct cros_ec_sensors_core_state { struct cros_ec_device *ec; @@ -73,8 +72,6 @@ struct cros_ec_sensors_core_state { int (*read_ec_sensors_data)(struct iio_dev *indio_dev, unsigned long scan_mask, s16 *data); - int curr_sampl_freq; - /* Table of known available frequencies : 0, Min and Max in mHz */ int frequencies[3]; }; @@ -116,5 +113,6 @@ int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st, /* List of extended channel specification for all sensors */ extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[]; +extern const struct attribute *cros_ec_sensor_fifo_attributes[]; #endif /* __CROS_EC_SENSORS_CORE_H */ -- cgit v1.2.3 From cb87556068146de5c9933397706d3bde88b4a14d Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:41 -0700 Subject: iio: cros_ec: Report hwfifo_watermark_max Report the maximum amount of sample the EC can hold. This is not tunable, but can be useful for application to find out the maximum amount of time it can sleep when hwfifo_timeout is set to a large number. Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index 5b0acc14c891..bc26ae2e3272 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -50,6 +50,7 @@ typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); * the timestamp. The timestamp is always last and * is always 8-byte aligned. * @read_ec_sensors_data: function used for accessing sensors values + * @fifo_max_event_count: Size of the EC sensor FIFO */ struct cros_ec_sensors_core_state { struct cros_ec_device *ec; @@ -72,6 +73,8 @@ struct cros_ec_sensors_core_state { int (*read_ec_sensors_data)(struct iio_dev *indio_dev, unsigned long scan_mask, s16 *data); + u32 fifo_max_event_count; + /* Table of known available frequencies : 0, Min and Max in mHz */ int frequencies[3]; }; -- cgit v1.2.3 From 92234c8f15c8d96ad7e52afdc5994cba6be68eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 25 Mar 2020 18:23:26 +0100 Subject: xdp: Support specifying expected existing program when attaching XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it is currently possible for userspace to specify that an existing XDP program should not be replaced when attaching to an interface, there is no mechanism to safely replace a specific XDP program with another. This patch adds a new netlink attribute, IFLA_XDP_EXPECTED_FD, which can be set along with IFLA_XDP_FD. If set, the kernel will check that the program currently loaded on the interface matches the expected one, and fail the operation if it does not. This corresponds to a 'cmpxchg' memory operation. Setting the new attribute with a negative value means that no program is expected to be attached, which corresponds to setting the UPDATE_IF_NOEXIST flag. A new companion flag, XDP_FLAGS_REPLACE, is also added to explicitly request checking of the EXPECTED_FD attribute. This is needed for userspace to discover whether the kernel supports the new attribute. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/158515700640.92963.3551295145441017022.stgit@toke.dk --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 654808bfad83..b503d468f0df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3768,7 +3768,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags); + int fd, int expected_fd, u32 flags); u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, enum bpf_netdev_command cmd); int xdp_umem_query(struct net_device *dev, u16 queue_id); -- cgit v1.2.3 From 3375590623e4a132b19a8740512f4deb95728933 Mon Sep 17 00:00:00 2001 From: Raymond Pang Date: Fri, 27 Mar 2020 17:11:46 +0800 Subject: PCI: Add Zhaoxin Vendor ID Add Zhaoxin Vendor ID to pci_ids.h Link: https://lore.kernel.org/r/20200327091148.5190-2-RaymondPang-oc@zhaoxin.com Signed-off-by: Raymond Pang Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 352c0d708720..6693cf561cd1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2583,6 +2583,8 @@ #define PCI_VENDOR_ID_AMAZON 0x1d0f +#define PCI_VENDOR_ID_ZHAOXIN 0x1d17 + #define PCI_VENDOR_ID_HYGON 0x1d94 #define PCI_VENDOR_ID_HXT 0x1dbf -- cgit v1.2.3 From d3ec10aa95819bff18a0d936b18884c7816d0914 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Mar 2020 21:11:24 -0400 Subject: KEYS: Don't write out to userspace while holding key semaphore A lockdep circular locking dependency report was seen when running a keyutils test: [12537.027242] ====================================================== [12537.059309] WARNING: possible circular locking dependency detected [12537.088148] 4.18.0-147.7.1.el8_1.x86_64+debug #1 Tainted: G OE --------- - - [12537.125253] ------------------------------------------------------ [12537.153189] keyctl/25598 is trying to acquire lock: [12537.175087] 000000007c39f96c (&mm->mmap_sem){++++}, at: __might_fault+0xc4/0x1b0 [12537.208365] [12537.208365] but task is already holding lock: [12537.234507] 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220 [12537.270476] [12537.270476] which lock already depends on the new lock. [12537.270476] [12537.307209] [12537.307209] the existing dependency chain (in reverse order) is: [12537.340754] [12537.340754] -> #3 (&type->lock_class){++++}: [12537.367434] down_write+0x4d/0x110 [12537.385202] __key_link_begin+0x87/0x280 [12537.405232] request_key_and_link+0x483/0xf70 [12537.427221] request_key+0x3c/0x80 [12537.444839] dns_query+0x1db/0x5a5 [dns_resolver] [12537.468445] dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs] [12537.496731] cifs_reconnect+0xe04/0x2500 [cifs] [12537.519418] cifs_readv_from_socket+0x461/0x690 [cifs] [12537.546263] cifs_read_from_socket+0xa0/0xe0 [cifs] [12537.573551] cifs_demultiplex_thread+0x311/0x2db0 [cifs] [12537.601045] kthread+0x30c/0x3d0 [12537.617906] ret_from_fork+0x3a/0x50 [12537.636225] [12537.636225] -> #2 (root_key_user.cons_lock){+.+.}: [12537.664525] __mutex_lock+0x105/0x11f0 [12537.683734] request_key_and_link+0x35a/0xf70 [12537.705640] request_key+0x3c/0x80 [12537.723304] dns_query+0x1db/0x5a5 [dns_resolver] [12537.746773] dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs] [12537.775607] cifs_reconnect+0xe04/0x2500 [cifs] [12537.798322] cifs_readv_from_socket+0x461/0x690 [cifs] [12537.823369] cifs_read_from_socket+0xa0/0xe0 [cifs] [12537.847262] cifs_demultiplex_thread+0x311/0x2db0 [cifs] [12537.873477] kthread+0x30c/0x3d0 [12537.890281] ret_from_fork+0x3a/0x50 [12537.908649] [12537.908649] -> #1 (&tcp_ses->srv_mutex){+.+.}: [12537.935225] __mutex_lock+0x105/0x11f0 [12537.954450] cifs_call_async+0x102/0x7f0 [cifs] [12537.977250] smb2_async_readv+0x6c3/0xc90 [cifs] [12538.000659] cifs_readpages+0x120a/0x1e50 [cifs] [12538.023920] read_pages+0xf5/0x560 [12538.041583] __do_page_cache_readahead+0x41d/0x4b0 [12538.067047] ondemand_readahead+0x44c/0xc10 [12538.092069] filemap_fault+0xec1/0x1830 [12538.111637] __do_fault+0x82/0x260 [12538.129216] do_fault+0x419/0xfb0 [12538.146390] __handle_mm_fault+0x862/0xdf0 [12538.167408] handle_mm_fault+0x154/0x550 [12538.187401] __do_page_fault+0x42f/0xa60 [12538.207395] do_page_fault+0x38/0x5e0 [12538.225777] page_fault+0x1e/0x30 [12538.243010] [12538.243010] -> #0 (&mm->mmap_sem){++++}: [12538.267875] lock_acquire+0x14c/0x420 [12538.286848] __might_fault+0x119/0x1b0 [12538.306006] keyring_read_iterator+0x7e/0x170 [12538.327936] assoc_array_subtree_iterate+0x97/0x280 [12538.352154] keyring_read+0xe9/0x110 [12538.370558] keyctl_read_key+0x1b9/0x220 [12538.391470] do_syscall_64+0xa5/0x4b0 [12538.410511] entry_SYSCALL_64_after_hwframe+0x6a/0xdf [12538.435535] [12538.435535] other info that might help us debug this: [12538.435535] [12538.472829] Chain exists of: [12538.472829] &mm->mmap_sem --> root_key_user.cons_lock --> &type->lock_class [12538.472829] [12538.524820] Possible unsafe locking scenario: [12538.524820] [12538.551431] CPU0 CPU1 [12538.572654] ---- ---- [12538.595865] lock(&type->lock_class); [12538.613737] lock(root_key_user.cons_lock); [12538.644234] lock(&type->lock_class); [12538.672410] lock(&mm->mmap_sem); [12538.687758] [12538.687758] *** DEADLOCK *** [12538.687758] [12538.714455] 1 lock held by keyctl/25598: [12538.732097] #0: 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220 [12538.770573] [12538.770573] stack backtrace: [12538.790136] CPU: 2 PID: 25598 Comm: keyctl Kdump: loaded Tainted: G [12538.844855] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 12/27/2015 [12538.881963] Call Trace: [12538.892897] dump_stack+0x9a/0xf0 [12538.907908] print_circular_bug.isra.25.cold.50+0x1bc/0x279 [12538.932891] ? save_trace+0xd6/0x250 [12538.948979] check_prev_add.constprop.32+0xc36/0x14f0 [12538.971643] ? keyring_compare_object+0x104/0x190 [12538.992738] ? check_usage+0x550/0x550 [12539.009845] ? sched_clock+0x5/0x10 [12539.025484] ? sched_clock_cpu+0x18/0x1e0 [12539.043555] __lock_acquire+0x1f12/0x38d0 [12539.061551] ? trace_hardirqs_on+0x10/0x10 [12539.080554] lock_acquire+0x14c/0x420 [12539.100330] ? __might_fault+0xc4/0x1b0 [12539.119079] __might_fault+0x119/0x1b0 [12539.135869] ? __might_fault+0xc4/0x1b0 [12539.153234] keyring_read_iterator+0x7e/0x170 [12539.172787] ? keyring_read+0x110/0x110 [12539.190059] assoc_array_subtree_iterate+0x97/0x280 [12539.211526] keyring_read+0xe9/0x110 [12539.227561] ? keyring_gc_check_iterator+0xc0/0xc0 [12539.249076] keyctl_read_key+0x1b9/0x220 [12539.266660] do_syscall_64+0xa5/0x4b0 [12539.283091] entry_SYSCALL_64_after_hwframe+0x6a/0xdf One way to prevent this deadlock scenario from happening is to not allow writing to userspace while holding the key semaphore. Instead, an internal buffer is allocated for getting the keys out from the read method first before copying them out to userspace without holding the lock. That requires taking out the __user modifier from all the relevant read methods as well as additional changes to not use any userspace write helpers. That is, 1) The put_user() call is replaced by a direct copy. 2) The copy_to_user() call is replaced by memcpy(). 3) All the fault handling code is removed. Compiling on a x86-64 system, the size of the rxrpc_read() function is reduced from 3795 bytes to 2384 bytes with this patch. Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Jarkko Sakkinen Signed-off-by: Waiman Long Signed-off-by: David Howells --- include/linux/key-type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 4ded94bcf274..2ab2d6d6aeab 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -127,7 +127,7 @@ struct key_type { * much is copied into the buffer * - shouldn't do the copy if the buffer is NULL */ - long (*read)(const struct key *key, char __user *buffer, size_t buflen); + long (*read)(const struct key *key, char *buffer, size_t buflen); /* handle request_key() for this type instead of invoking * /sbin/request-key (optional) -- cgit v1.2.3 From fc611f47f2188ade2b48ff6902d5cce8baac0c58 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:49 +0100 Subject: bpf: Introduce BPF_PROG_TYPE_LSM Introduce types and configs for bpf programs that can be attached to LSM hooks. The programs can be enabled by the config option CONFIG_BPF_LSM. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Thomas Garnier Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-2-kpsingh@chromium.org --- include/linux/bpf.h | 3 +++ include/linux/bpf_types.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 372708eeaecd..3bde59a8453b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1515,6 +1515,9 @@ extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; +const struct bpf_func_proto *bpf_tracing_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c81d4ece79a4..ba0c2d56f8a3 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -70,6 +70,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, void *, void *) BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension, void *, void *) +#ifdef CONFIG_BPF_LSM +BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, + void *, void *) +#endif /* CONFIG_BPF_LSM */ #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) -- cgit v1.2.3 From 98e828a0650f348be85728c69875260cf78069e6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:50 +0100 Subject: security: Refactor declaration of LSM hooks The information about the different types of LSM hooks is scattered in two locations i.e. union security_list_options and struct security_hook_heads. Rather than duplicating this information even further for BPF_PROG_TYPE_LSM, define all the hooks with the LSM_HOOK macro in lsm_hook_defs.h which is then used to generate all the data structures required by the LSM framework. The LSM hooks are defined as: LSM_HOOK(, , , args...) with acccessible in security.c as: LSM_RET_DEFAULT() Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Kees Cook Reviewed-by: Casey Schaufler Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-3-kpsingh@chromium.org --- include/linux/lsm_hook_defs.h | 381 +++++++++++++++++++++++++ include/linux/lsm_hooks.h | 628 +----------------------------------------- 2 files changed, 393 insertions(+), 616 deletions(-) create mode 100644 include/linux/lsm_hook_defs.h (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h new file mode 100644 index 000000000000..9cd4455528e5 --- /dev/null +++ b/include/linux/lsm_hook_defs.h @@ -0,0 +1,381 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Linux Security Module Hook declarations. + * + * Copyright (C) 2001 WireX Communications, Inc + * Copyright (C) 2001 Greg Kroah-Hartman + * Copyright (C) 2001 Networks Associates Technology, Inc + * Copyright (C) 2001 James Morris + * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2015 Intel Corporation. + * Copyright (C) 2015 Casey Schaufler + * Copyright (C) 2016 Mellanox Techonologies + * Copyright (C) 2020 Google LLC. + */ + +/* + * The macro LSM_HOOK is used to define the data structures required by the + * the LSM framework using the pattern: + * + * LSM_HOOK(, , , args...) + * + * struct security_hook_heads { + * #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; + * #include + * #undef LSM_HOOK + * }; + */ +LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) +LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, + struct task_struct *to) +LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, + struct task_struct *to) +LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, + struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, + unsigned int mode) +LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) +LSM_HOOK(int, 0, capget, struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted) +LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, + const kernel_cap_t *effective, const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) +LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, + int cap, unsigned int opts) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quota_on, struct dentry *dentry) +LSM_HOOK(int, 0, syslog, int type) +LSM_HOOK(int, 0, settime, const struct timespec64 *ts, + const struct timezone *tz) +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 0, bprm_set_creds, struct linux_binprm *bprm) +LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, + struct fs_context *src_sc) +LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, + struct fs_parameter *param) +LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) +LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) +LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) +LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) +LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) +LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, + const char *type, unsigned long flags, void *data) +LSM_HOOK(int, 0, sb_umount, struct vfsmount *mnt, int flags) +LSM_HOOK(int, 0, sb_pivotroot, const struct path *old_path, + const struct path *new_path) +LSM_HOOK(int, 0, sb_set_mnt_opts, struct super_block *sb, void *mnt_opts, + unsigned long kern_flags, unsigned long *set_kern_flags) +LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, + struct super_block *newsb, unsigned long kern_flags, + unsigned long *set_kern_flags) +LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val, + int len, void **mnt_opts) +LSM_HOOK(int, 0, move_mount, const struct path *from_path, + const struct path *to_path) +LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, + int mode, const struct qstr *name, void **ctx, u32 *ctxlen) +LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, + struct qstr *name, const struct cred *old, struct cred *new) + +#ifdef CONFIG_SECURITY_PATH +LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry) +LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) +LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, + umode_t mode, unsigned int dev) +LSM_HOOK(int, 0, path_truncate, const struct path *path) +LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, + const char *old_name) +LSM_HOOK(int, 0, path_link, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry) +LSM_HOOK(int, 0, path_rename, const struct path *old_dir, + struct dentry *old_dentry, const struct path *new_dir, + struct dentry *new_dentry) +LSM_HOOK(int, 0, path_chmod, const struct path *path, umode_t mode) +LSM_HOOK(int, 0, path_chown, const struct path *path, kuid_t uid, kgid_t gid) +LSM_HOOK(int, 0, path_chroot, const struct path *path) +#endif /* CONFIG_SECURITY_PATH */ + +/* Needed for inode based security check */ +LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, + unsigned int obj_type) +LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) +LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) +LSM_HOOK(int, 0, inode_init_security, struct inode *inode, + struct inode *dir, const struct qstr *qstr, const char **name, + void **value, size_t *len) +LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) +LSM_HOOK(int, 0, inode_symlink, struct inode *dir, struct dentry *dentry, + const char *old_name) +LSM_HOOK(int, 0, inode_mkdir, struct inode *dir, struct dentry *dentry, + umode_t mode) +LSM_HOOK(int, 0, inode_rmdir, struct inode *dir, struct dentry *dentry) +LSM_HOOK(int, 0, inode_mknod, struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev) +LSM_HOOK(int, 0, inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) +LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, + bool rcu) +LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) +LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_getattr, const struct path *path) +LSM_HOOK(int, 0, inode_setxattr, struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) +LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) +LSM_HOOK(int, 0, inode_removexattr, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) +LSM_HOOK(int, 0, inode_killpriv, struct dentry *dentry) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct inode *inode, + const char *name, void **buffer, bool alloc) +LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, + const char *name, const void *value, size_t size, int flags) +LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, + size_t buffer_size) +LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) +LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) +LSM_HOOK(int, 0, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, + struct kernfs_node *kn) +LSM_HOOK(int, 0, file_permission, struct file *file, int mask) +LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) +LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, + unsigned long arg) +LSM_HOOK(int, 0, mmap_addr, unsigned long addr) +LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot) +LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd) +LSM_HOOK(int, 0, file_fcntl, struct file *file, unsigned int cmd, + unsigned long arg) +LSM_HOOK(void, LSM_RET_VOID, file_set_fowner, struct file *file) +LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, + struct fown_struct *fown, int sig) +LSM_HOOK(int, 0, file_receive, struct file *file) +LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, task_alloc, struct task_struct *task, + unsigned long clone_flags) +LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) +LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp) +LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred) +LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, + gfp_t gfp) +LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, + const struct cred *old) +LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) +LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) +LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) +LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) +LSM_HOOK(int, 0, kernel_load_data, enum kernel_load_data_id id) +LSM_HOOK(int, 0, kernel_read_file, struct file *file, + enum kernel_read_file_id id) +LSM_HOOK(int, 0, kernel_post_read_file, struct file *file, char *buf, + loff_t size, enum kernel_read_file_id id) +LSM_HOOK(int, 0, task_fix_setuid, struct cred *new, const struct cred *old, + int flags) +LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) +LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) +LSM_HOOK(int, 0, task_getsid, struct task_struct *p) +LSM_HOOK(void, LSM_RET_VOID, task_getsecid, struct task_struct *p, u32 *secid) +LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) +LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) +LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) +LSM_HOOK(int, 0, task_prlimit, const struct cred *cred, + const struct cred *tcred, unsigned int flags) +LSM_HOOK(int, 0, task_setrlimit, struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) +LSM_HOOK(int, 0, task_setscheduler, struct task_struct *p) +LSM_HOOK(int, 0, task_getscheduler, struct task_struct *p) +LSM_HOOK(int, 0, task_movememory, struct task_struct *p) +LSM_HOOK(int, 0, task_kill, struct task_struct *p, struct kernel_siginfo *info, + int sig, const struct cred *cred) +LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, + struct inode *inode) +LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) +LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, + u32 *secid) +LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) +LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) +LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, msg_queue_free_security, + struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, msg_queue_associate, struct kern_ipc_perm *perm, int msqflg) +LSM_HOOK(int, 0, msg_queue_msgctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, msg_queue_msgsnd, struct kern_ipc_perm *perm, + struct msg_msg *msg, int msqflg) +LSM_HOOK(int, 0, msg_queue_msgrcv, struct kern_ipc_perm *perm, + struct msg_msg *msg, struct task_struct *target, long type, int mode) +LSM_HOOK(int, 0, shm_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, shm_free_security, struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, shm_associate, struct kern_ipc_perm *perm, int shmflg) +LSM_HOOK(int, 0, shm_shmctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, shm_shmat, struct kern_ipc_perm *perm, char __user *shmaddr, + int shmflg) +LSM_HOOK(int, 0, sem_alloc_security, struct kern_ipc_perm *perm) +LSM_HOOK(void, LSM_RET_VOID, sem_free_security, struct kern_ipc_perm *perm) +LSM_HOOK(int, 0, sem_associate, struct kern_ipc_perm *perm, int semflg) +LSM_HOOK(int, 0, sem_semctl, struct kern_ipc_perm *perm, int cmd) +LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, + unsigned nsops, int alter) +LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) +LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, + struct inode *inode) +LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, + char **value) +LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) +LSM_HOOK(int, 0, ismaclabel, const char *name) +LSM_HOOK(int, 0, secid_to_secctx, u32 secid, char **secdata, + u32 *seclen) +LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) +LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) +LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) +LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, + u32 *ctxlen) + +#ifdef CONFIG_SECURITY_NETWORK +LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, + struct sock *newsk) +LSM_HOOK(int, 0, unix_may_send, struct socket *sock, struct socket *other) +LSM_HOOK(int, 0, socket_create, int family, int type, int protocol, int kern) +LSM_HOOK(int, 0, socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +LSM_HOOK(int, 0, socket_socketpair, struct socket *socka, struct socket *sockb) +LSM_HOOK(int, 0, socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +LSM_HOOK(int, 0, socket_connect, struct socket *sock, struct sockaddr *address, + int addrlen) +LSM_HOOK(int, 0, socket_listen, struct socket *sock, int backlog) +LSM_HOOK(int, 0, socket_accept, struct socket *sock, struct socket *newsock) +LSM_HOOK(int, 0, socket_sendmsg, struct socket *sock, struct msghdr *msg, + int size) +LSM_HOOK(int, 0, socket_recvmsg, struct socket *sock, struct msghdr *msg, + int size, int flags) +LSM_HOOK(int, 0, socket_getsockname, struct socket *sock) +LSM_HOOK(int, 0, socket_getpeername, struct socket *sock) +LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) +LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) +LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) +LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) +LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, + char __user *optval, int __user *optlen, unsigned len) +LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, + struct sk_buff *skb, u32 *secid) +LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) +LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) +LSM_HOOK(void, LSM_RET_VOID, sk_clone_security, const struct sock *sk, + struct sock *newsk) +LSM_HOOK(void, LSM_RET_VOID, sk_getsecid, struct sock *sk, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, sock_graft, struct sock *sk, struct socket *parent) +LSM_HOOK(int, 0, inet_conn_request, struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +LSM_HOOK(void, LSM_RET_VOID, inet_csk_clone, struct sock *newsk, + const struct request_sock *req) +LSM_HOOK(void, LSM_RET_VOID, inet_conn_established, struct sock *sk, + struct sk_buff *skb) +LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) +LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) +LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) +LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, + struct flowi *fl) +LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) +LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) +LSM_HOOK(int, 0, tun_dev_create, void) +LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) +LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) +LSM_HOOK(int, 0, tun_dev_open, void *security) +LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep, + struct sk_buff *skb) +LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, + struct sockaddr *address, int addrlen) +LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep, + struct sock *sk, struct sock *newsk) +#endif /* CONFIG_SECURITY_NETWORK */ + +#ifdef CONFIG_SECURITY_INFINIBAND +LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) +LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, + u8 port_num) +LSM_HOOK(int, 0, ib_alloc_security, void **sec) +LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec) +#endif /* CONFIG_SECURITY_INFINIBAND */ + +#ifdef CONFIG_SECURITY_NETWORK_XFRM +LSM_HOOK(int, 0, xfrm_policy_alloc_security, struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp) +LSM_HOOK(int, 0, xfrm_policy_clone_security, struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctx) +LSM_HOOK(void, LSM_RET_VOID, xfrm_policy_free_security, + struct xfrm_sec_ctx *ctx) +LSM_HOOK(int, 0, xfrm_policy_delete_security, struct xfrm_sec_ctx *ctx) +LSM_HOOK(int, 0, xfrm_state_alloc, struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +LSM_HOOK(int, 0, xfrm_state_alloc_acquire, struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) +LSM_HOOK(void, LSM_RET_VOID, xfrm_state_free_security, struct xfrm_state *x) +LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) +LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, + u8 dir) +LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, + struct xfrm_policy *xp, const struct flowi *fl) +LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, + int ckall) +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + +/* key management security hooks */ +#ifdef CONFIG_KEYS +LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, + unsigned long flags) +LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) +LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, + unsigned perm) +LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **_buffer) +#endif /* CONFIG_KEYS */ + +#ifdef CONFIG_AUDIT +LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, + void **lsmrule) +LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) +LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) +LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) +#endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL +LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) +LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) +LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +#endif /* CONFIG_BPF_SYSCALL */ + +LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) + +#ifdef CONFIG_PERF_EVENTS +LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) +LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) +LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) +LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) +LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) +#endif /* CONFIG_PERF_EVENTS */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 20d8cf194fb7..c09623b32489 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1456,625 +1456,15 @@ * @what: kernel feature being accessed */ union security_list_options { - int (*binder_set_context_mgr)(struct task_struct *mgr); - int (*binder_transaction)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file)(struct task_struct *from, - struct task_struct *to, - struct file *file); - - int (*ptrace_access_check)(struct task_struct *child, - unsigned int mode); - int (*ptrace_traceme)(struct task_struct *parent); - int (*capget)(struct task_struct *target, kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset)(struct cred *new, const struct cred *old, - const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - int (*capable)(const struct cred *cred, - struct user_namespace *ns, - int cap, - unsigned int opts); - int (*quotactl)(int cmds, int type, int id, struct super_block *sb); - int (*quota_on)(struct dentry *dentry); - int (*syslog)(int type); - int (*settime)(const struct timespec64 *ts, const struct timezone *tz); - int (*vm_enough_memory)(struct mm_struct *mm, long pages); - - int (*bprm_set_creds)(struct linux_binprm *bprm); - int (*bprm_check_security)(struct linux_binprm *bprm); - void (*bprm_committing_creds)(struct linux_binprm *bprm); - void (*bprm_committed_creds)(struct linux_binprm *bprm); - - int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc); - int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param); - - int (*sb_alloc_security)(struct super_block *sb); - void (*sb_free_security)(struct super_block *sb); - void (*sb_free_mnt_opts)(void *mnt_opts); - int (*sb_eat_lsm_opts)(char *orig, void **mnt_opts); - int (*sb_remount)(struct super_block *sb, void *mnt_opts); - int (*sb_kern_mount)(struct super_block *sb); - int (*sb_show_options)(struct seq_file *m, struct super_block *sb); - int (*sb_statfs)(struct dentry *dentry); - int (*sb_mount)(const char *dev_name, const struct path *path, - const char *type, unsigned long flags, void *data); - int (*sb_umount)(struct vfsmount *mnt, int flags); - int (*sb_pivotroot)(const struct path *old_path, const struct path *new_path); - int (*sb_set_mnt_opts)(struct super_block *sb, - void *mnt_opts, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_clone_mnt_opts)(const struct super_block *oldsb, - struct super_block *newsb, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_add_mnt_opt)(const char *option, const char *val, int len, - void **mnt_opts); - int (*move_mount)(const struct path *from_path, const struct path *to_path); - int (*dentry_init_security)(struct dentry *dentry, int mode, - const struct qstr *name, void **ctx, - u32 *ctxlen); - int (*dentry_create_files_as)(struct dentry *dentry, int mode, - struct qstr *name, - const struct cred *old, - struct cred *new); - - -#ifdef CONFIG_SECURITY_PATH - int (*path_unlink)(const struct path *dir, struct dentry *dentry); - int (*path_mkdir)(const struct path *dir, struct dentry *dentry, - umode_t mode); - int (*path_rmdir)(const struct path *dir, struct dentry *dentry); - int (*path_mknod)(const struct path *dir, struct dentry *dentry, - umode_t mode, unsigned int dev); - int (*path_truncate)(const struct path *path); - int (*path_symlink)(const struct path *dir, struct dentry *dentry, - const char *old_name); - int (*path_link)(struct dentry *old_dentry, const struct path *new_dir, - struct dentry *new_dentry); - int (*path_rename)(const struct path *old_dir, struct dentry *old_dentry, - const struct path *new_dir, - struct dentry *new_dentry); - int (*path_chmod)(const struct path *path, umode_t mode); - int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot)(const struct path *path); -#endif - /* Needed for inode based security check */ - int (*path_notify)(const struct path *path, u64 mask, - unsigned int obj_type); - int (*inode_alloc_security)(struct inode *inode); - void (*inode_free_security)(struct inode *inode); - int (*inode_init_security)(struct inode *inode, struct inode *dir, - const struct qstr *qstr, - const char **name, void **value, - size_t *len); - int (*inode_create)(struct inode *dir, struct dentry *dentry, - umode_t mode); - int (*inode_link)(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry); - int (*inode_unlink)(struct inode *dir, struct dentry *dentry); - int (*inode_symlink)(struct inode *dir, struct dentry *dentry, - const char *old_name); - int (*inode_mkdir)(struct inode *dir, struct dentry *dentry, - umode_t mode); - int (*inode_rmdir)(struct inode *dir, struct dentry *dentry); - int (*inode_mknod)(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev); - int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry); - int (*inode_readlink)(struct dentry *dentry); - int (*inode_follow_link)(struct dentry *dentry, struct inode *inode, - bool rcu); - int (*inode_permission)(struct inode *inode, int mask); - int (*inode_setattr)(struct dentry *dentry, struct iattr *attr); - int (*inode_getattr)(const struct path *path); - int (*inode_setxattr)(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - void (*inode_post_setxattr)(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags); - int (*inode_getxattr)(struct dentry *dentry, const char *name); - int (*inode_listxattr)(struct dentry *dentry); - int (*inode_removexattr)(struct dentry *dentry, const char *name); - int (*inode_need_killpriv)(struct dentry *dentry); - int (*inode_killpriv)(struct dentry *dentry); - int (*inode_getsecurity)(struct inode *inode, const char *name, - void **buffer, bool alloc); - int (*inode_setsecurity)(struct inode *inode, const char *name, - const void *value, size_t size, - int flags); - int (*inode_listsecurity)(struct inode *inode, char *buffer, - size_t buffer_size); - void (*inode_getsecid)(struct inode *inode, u32 *secid); - int (*inode_copy_up)(struct dentry *src, struct cred **new); - int (*inode_copy_up_xattr)(const char *name); - - int (*kernfs_init_security)(struct kernfs_node *kn_dir, - struct kernfs_node *kn); - - int (*file_permission)(struct file *file, int mask); - int (*file_alloc_security)(struct file *file); - void (*file_free_security)(struct file *file); - int (*file_ioctl)(struct file *file, unsigned int cmd, - unsigned long arg); - int (*mmap_addr)(unsigned long addr); - int (*mmap_file)(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); - int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot, - unsigned long prot); - int (*file_lock)(struct file *file, unsigned int cmd); - int (*file_fcntl)(struct file *file, unsigned int cmd, - unsigned long arg); - void (*file_set_fowner)(struct file *file); - int (*file_send_sigiotask)(struct task_struct *tsk, - struct fown_struct *fown, int sig); - int (*file_receive)(struct file *file); - int (*file_open)(struct file *file); - - int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); - void (*task_free)(struct task_struct *task); - int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); - void (*cred_free)(struct cred *cred); - int (*cred_prepare)(struct cred *new, const struct cred *old, - gfp_t gfp); - void (*cred_transfer)(struct cred *new, const struct cred *old); - void (*cred_getsecid)(const struct cred *c, u32 *secid); - int (*kernel_act_as)(struct cred *new, u32 secid); - int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(char *kmod_name); - int (*kernel_load_data)(enum kernel_load_data_id id); - int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id); - int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size, - enum kernel_read_file_id id); - int (*task_fix_setuid)(struct cred *new, const struct cred *old, - int flags); - int (*task_setpgid)(struct task_struct *p, pid_t pgid); - int (*task_getpgid)(struct task_struct *p); - int (*task_getsid)(struct task_struct *p); - void (*task_getsecid)(struct task_struct *p, u32 *secid); - int (*task_setnice)(struct task_struct *p, int nice); - int (*task_setioprio)(struct task_struct *p, int ioprio); - int (*task_getioprio)(struct task_struct *p); - int (*task_prlimit)(const struct cred *cred, const struct cred *tcred, - unsigned int flags); - int (*task_setrlimit)(struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim); - int (*task_setscheduler)(struct task_struct *p); - int (*task_getscheduler)(struct task_struct *p); - int (*task_movememory)(struct task_struct *p); - int (*task_kill)(struct task_struct *p, struct kernel_siginfo *info, - int sig, const struct cred *cred); - int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); - void (*task_to_inode)(struct task_struct *p, struct inode *inode); - - int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag); - void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid); - - int (*msg_msg_alloc_security)(struct msg_msg *msg); - void (*msg_msg_free_security)(struct msg_msg *msg); - - int (*msg_queue_alloc_security)(struct kern_ipc_perm *perm); - void (*msg_queue_free_security)(struct kern_ipc_perm *perm); - int (*msg_queue_associate)(struct kern_ipc_perm *perm, int msqflg); - int (*msg_queue_msgctl)(struct kern_ipc_perm *perm, int cmd); - int (*msg_queue_msgsnd)(struct kern_ipc_perm *perm, struct msg_msg *msg, - int msqflg); - int (*msg_queue_msgrcv)(struct kern_ipc_perm *perm, struct msg_msg *msg, - struct task_struct *target, long type, - int mode); - - int (*shm_alloc_security)(struct kern_ipc_perm *perm); - void (*shm_free_security)(struct kern_ipc_perm *perm); - int (*shm_associate)(struct kern_ipc_perm *perm, int shmflg); - int (*shm_shmctl)(struct kern_ipc_perm *perm, int cmd); - int (*shm_shmat)(struct kern_ipc_perm *perm, char __user *shmaddr, - int shmflg); - - int (*sem_alloc_security)(struct kern_ipc_perm *perm); - void (*sem_free_security)(struct kern_ipc_perm *perm); - int (*sem_associate)(struct kern_ipc_perm *perm, int semflg); - int (*sem_semctl)(struct kern_ipc_perm *perm, int cmd); - int (*sem_semop)(struct kern_ipc_perm *perm, struct sembuf *sops, - unsigned nsops, int alter); - - int (*netlink_send)(struct sock *sk, struct sk_buff *skb); - - void (*d_instantiate)(struct dentry *dentry, struct inode *inode); - - int (*getprocattr)(struct task_struct *p, char *name, char **value); - int (*setprocattr)(const char *name, void *value, size_t size); - int (*ismaclabel)(const char *name); - int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); - void (*release_secctx)(char *secdata, u32 seclen); - - void (*inode_invalidate_secctx)(struct inode *inode); - int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); - int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); - int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); - -#ifdef CONFIG_SECURITY_NETWORK - int (*unix_stream_connect)(struct sock *sock, struct sock *other, - struct sock *newsk); - int (*unix_may_send)(struct socket *sock, struct socket *other); - - int (*socket_create)(int family, int type, int protocol, int kern); - int (*socket_post_create)(struct socket *sock, int family, int type, - int protocol, int kern); - int (*socket_socketpair)(struct socket *socka, struct socket *sockb); - int (*socket_bind)(struct socket *sock, struct sockaddr *address, - int addrlen); - int (*socket_connect)(struct socket *sock, struct sockaddr *address, - int addrlen); - int (*socket_listen)(struct socket *sock, int backlog); - int (*socket_accept)(struct socket *sock, struct socket *newsock); - int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg, - int size); - int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg, - int size, int flags); - int (*socket_getsockname)(struct socket *sock); - int (*socket_getpeername)(struct socket *sock); - int (*socket_getsockopt)(struct socket *sock, int level, int optname); - int (*socket_setsockopt)(struct socket *sock, int level, int optname); - int (*socket_shutdown)(struct socket *sock, int how); - int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb); - int (*socket_getpeersec_stream)(struct socket *sock, - char __user *optval, - int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram)(struct socket *sock, - struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority); - void (*sk_free_security)(struct sock *sk); - void (*sk_clone_security)(const struct sock *sk, struct sock *newsk); - void (*sk_getsecid)(struct sock *sk, u32 *secid); - void (*sock_graft)(struct sock *sk, struct socket *parent); - int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone)(struct sock *newsk, - const struct request_sock *req); - void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb); - int (*secmark_relabel_packet)(u32 secid); - void (*secmark_refcount_inc)(void); - void (*secmark_refcount_dec)(void); - void (*req_classify_flow)(const struct request_sock *req, - struct flowi *fl); - int (*tun_dev_alloc_security)(void **security); - void (*tun_dev_free_security)(void *security); - int (*tun_dev_create)(void); - int (*tun_dev_attach_queue)(void *security); - int (*tun_dev_attach)(struct sock *sk, void *security); - int (*tun_dev_open)(void *security); - int (*sctp_assoc_request)(struct sctp_endpoint *ep, - struct sk_buff *skb); - int (*sctp_bind_connect)(struct sock *sk, int optname, - struct sockaddr *address, int addrlen); - void (*sctp_sk_clone)(struct sctp_endpoint *ep, struct sock *sk, - struct sock *newsk); -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_INFINIBAND - int (*ib_pkey_access)(void *sec, u64 subnet_prefix, u16 pkey); - int (*ib_endport_manage_subnet)(void *sec, const char *dev_name, - u8 port_num); - int (*ib_alloc_security)(void **sec); - void (*ib_free_security)(void *sec); -#endif /* CONFIG_SECURITY_INFINIBAND */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, - gfp_t gfp); - int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx, - struct xfrm_sec_ctx **new_ctx); - void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx); - int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc)(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_state_alloc_acquire)(struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid); - void (*xfrm_state_free_security)(struct xfrm_state *x); - int (*xfrm_state_delete_security)(struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, - u8 dir); - int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl); - int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ - - /* key management security hooks */ -#ifdef CONFIG_KEYS - int (*key_alloc)(struct key *key, const struct cred *cred, - unsigned long flags); - void (*key_free)(struct key *key); - int (*key_permission)(key_ref_t key_ref, const struct cred *cred, - unsigned perm); - int (*key_getsecurity)(struct key *key, char **_buffer); -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT - int (*audit_rule_init)(u32 field, u32 op, char *rulestr, - void **lsmrule); - int (*audit_rule_known)(struct audit_krule *krule); - int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule); - void (*audit_rule_free)(void *lsmrule); -#endif /* CONFIG_AUDIT */ - -#ifdef CONFIG_BPF_SYSCALL - int (*bpf)(int cmd, union bpf_attr *attr, - unsigned int size); - int (*bpf_map)(struct bpf_map *map, fmode_t fmode); - int (*bpf_prog)(struct bpf_prog *prog); - int (*bpf_map_alloc_security)(struct bpf_map *map); - void (*bpf_map_free_security)(struct bpf_map *map); - int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); - void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); -#endif /* CONFIG_BPF_SYSCALL */ - int (*locked_down)(enum lockdown_reason what); -#ifdef CONFIG_PERF_EVENTS - int (*perf_event_open)(struct perf_event_attr *attr, int type); - int (*perf_event_alloc)(struct perf_event *event); - void (*perf_event_free)(struct perf_event *event); - int (*perf_event_read)(struct perf_event *event); - int (*perf_event_write)(struct perf_event *event); - -#endif + #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); + #include "lsm_hook_defs.h" + #undef LSM_HOOK }; struct security_hook_heads { - struct hlist_head binder_set_context_mgr; - struct hlist_head binder_transaction; - struct hlist_head binder_transfer_binder; - struct hlist_head binder_transfer_file; - struct hlist_head ptrace_access_check; - struct hlist_head ptrace_traceme; - struct hlist_head capget; - struct hlist_head capset; - struct hlist_head capable; - struct hlist_head quotactl; - struct hlist_head quota_on; - struct hlist_head syslog; - struct hlist_head settime; - struct hlist_head vm_enough_memory; - struct hlist_head bprm_set_creds; - struct hlist_head bprm_check_security; - struct hlist_head bprm_committing_creds; - struct hlist_head bprm_committed_creds; - struct hlist_head fs_context_dup; - struct hlist_head fs_context_parse_param; - struct hlist_head sb_alloc_security; - struct hlist_head sb_free_security; - struct hlist_head sb_free_mnt_opts; - struct hlist_head sb_eat_lsm_opts; - struct hlist_head sb_remount; - struct hlist_head sb_kern_mount; - struct hlist_head sb_show_options; - struct hlist_head sb_statfs; - struct hlist_head sb_mount; - struct hlist_head sb_umount; - struct hlist_head sb_pivotroot; - struct hlist_head sb_set_mnt_opts; - struct hlist_head sb_clone_mnt_opts; - struct hlist_head sb_add_mnt_opt; - struct hlist_head move_mount; - struct hlist_head dentry_init_security; - struct hlist_head dentry_create_files_as; -#ifdef CONFIG_SECURITY_PATH - struct hlist_head path_unlink; - struct hlist_head path_mkdir; - struct hlist_head path_rmdir; - struct hlist_head path_mknod; - struct hlist_head path_truncate; - struct hlist_head path_symlink; - struct hlist_head path_link; - struct hlist_head path_rename; - struct hlist_head path_chmod; - struct hlist_head path_chown; - struct hlist_head path_chroot; -#endif - /* Needed for inode based modules as well */ - struct hlist_head path_notify; - struct hlist_head inode_alloc_security; - struct hlist_head inode_free_security; - struct hlist_head inode_init_security; - struct hlist_head inode_create; - struct hlist_head inode_link; - struct hlist_head inode_unlink; - struct hlist_head inode_symlink; - struct hlist_head inode_mkdir; - struct hlist_head inode_rmdir; - struct hlist_head inode_mknod; - struct hlist_head inode_rename; - struct hlist_head inode_readlink; - struct hlist_head inode_follow_link; - struct hlist_head inode_permission; - struct hlist_head inode_setattr; - struct hlist_head inode_getattr; - struct hlist_head inode_setxattr; - struct hlist_head inode_post_setxattr; - struct hlist_head inode_getxattr; - struct hlist_head inode_listxattr; - struct hlist_head inode_removexattr; - struct hlist_head inode_need_killpriv; - struct hlist_head inode_killpriv; - struct hlist_head inode_getsecurity; - struct hlist_head inode_setsecurity; - struct hlist_head inode_listsecurity; - struct hlist_head inode_getsecid; - struct hlist_head inode_copy_up; - struct hlist_head inode_copy_up_xattr; - struct hlist_head kernfs_init_security; - struct hlist_head file_permission; - struct hlist_head file_alloc_security; - struct hlist_head file_free_security; - struct hlist_head file_ioctl; - struct hlist_head mmap_addr; - struct hlist_head mmap_file; - struct hlist_head file_mprotect; - struct hlist_head file_lock; - struct hlist_head file_fcntl; - struct hlist_head file_set_fowner; - struct hlist_head file_send_sigiotask; - struct hlist_head file_receive; - struct hlist_head file_open; - struct hlist_head task_alloc; - struct hlist_head task_free; - struct hlist_head cred_alloc_blank; - struct hlist_head cred_free; - struct hlist_head cred_prepare; - struct hlist_head cred_transfer; - struct hlist_head cred_getsecid; - struct hlist_head kernel_act_as; - struct hlist_head kernel_create_files_as; - struct hlist_head kernel_load_data; - struct hlist_head kernel_read_file; - struct hlist_head kernel_post_read_file; - struct hlist_head kernel_module_request; - struct hlist_head task_fix_setuid; - struct hlist_head task_setpgid; - struct hlist_head task_getpgid; - struct hlist_head task_getsid; - struct hlist_head task_getsecid; - struct hlist_head task_setnice; - struct hlist_head task_setioprio; - struct hlist_head task_getioprio; - struct hlist_head task_prlimit; - struct hlist_head task_setrlimit; - struct hlist_head task_setscheduler; - struct hlist_head task_getscheduler; - struct hlist_head task_movememory; - struct hlist_head task_kill; - struct hlist_head task_prctl; - struct hlist_head task_to_inode; - struct hlist_head ipc_permission; - struct hlist_head ipc_getsecid; - struct hlist_head msg_msg_alloc_security; - struct hlist_head msg_msg_free_security; - struct hlist_head msg_queue_alloc_security; - struct hlist_head msg_queue_free_security; - struct hlist_head msg_queue_associate; - struct hlist_head msg_queue_msgctl; - struct hlist_head msg_queue_msgsnd; - struct hlist_head msg_queue_msgrcv; - struct hlist_head shm_alloc_security; - struct hlist_head shm_free_security; - struct hlist_head shm_associate; - struct hlist_head shm_shmctl; - struct hlist_head shm_shmat; - struct hlist_head sem_alloc_security; - struct hlist_head sem_free_security; - struct hlist_head sem_associate; - struct hlist_head sem_semctl; - struct hlist_head sem_semop; - struct hlist_head netlink_send; - struct hlist_head d_instantiate; - struct hlist_head getprocattr; - struct hlist_head setprocattr; - struct hlist_head ismaclabel; - struct hlist_head secid_to_secctx; - struct hlist_head secctx_to_secid; - struct hlist_head release_secctx; - struct hlist_head inode_invalidate_secctx; - struct hlist_head inode_notifysecctx; - struct hlist_head inode_setsecctx; - struct hlist_head inode_getsecctx; -#ifdef CONFIG_SECURITY_NETWORK - struct hlist_head unix_stream_connect; - struct hlist_head unix_may_send; - struct hlist_head socket_create; - struct hlist_head socket_post_create; - struct hlist_head socket_socketpair; - struct hlist_head socket_bind; - struct hlist_head socket_connect; - struct hlist_head socket_listen; - struct hlist_head socket_accept; - struct hlist_head socket_sendmsg; - struct hlist_head socket_recvmsg; - struct hlist_head socket_getsockname; - struct hlist_head socket_getpeername; - struct hlist_head socket_getsockopt; - struct hlist_head socket_setsockopt; - struct hlist_head socket_shutdown; - struct hlist_head socket_sock_rcv_skb; - struct hlist_head socket_getpeersec_stream; - struct hlist_head socket_getpeersec_dgram; - struct hlist_head sk_alloc_security; - struct hlist_head sk_free_security; - struct hlist_head sk_clone_security; - struct hlist_head sk_getsecid; - struct hlist_head sock_graft; - struct hlist_head inet_conn_request; - struct hlist_head inet_csk_clone; - struct hlist_head inet_conn_established; - struct hlist_head secmark_relabel_packet; - struct hlist_head secmark_refcount_inc; - struct hlist_head secmark_refcount_dec; - struct hlist_head req_classify_flow; - struct hlist_head tun_dev_alloc_security; - struct hlist_head tun_dev_free_security; - struct hlist_head tun_dev_create; - struct hlist_head tun_dev_attach_queue; - struct hlist_head tun_dev_attach; - struct hlist_head tun_dev_open; - struct hlist_head sctp_assoc_request; - struct hlist_head sctp_bind_connect; - struct hlist_head sctp_sk_clone; -#endif /* CONFIG_SECURITY_NETWORK */ -#ifdef CONFIG_SECURITY_INFINIBAND - struct hlist_head ib_pkey_access; - struct hlist_head ib_endport_manage_subnet; - struct hlist_head ib_alloc_security; - struct hlist_head ib_free_security; -#endif /* CONFIG_SECURITY_INFINIBAND */ -#ifdef CONFIG_SECURITY_NETWORK_XFRM - struct hlist_head xfrm_policy_alloc_security; - struct hlist_head xfrm_policy_clone_security; - struct hlist_head xfrm_policy_free_security; - struct hlist_head xfrm_policy_delete_security; - struct hlist_head xfrm_state_alloc; - struct hlist_head xfrm_state_alloc_acquire; - struct hlist_head xfrm_state_free_security; - struct hlist_head xfrm_state_delete_security; - struct hlist_head xfrm_policy_lookup; - struct hlist_head xfrm_state_pol_flow_match; - struct hlist_head xfrm_decode_session; -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ -#ifdef CONFIG_KEYS - struct hlist_head key_alloc; - struct hlist_head key_free; - struct hlist_head key_permission; - struct hlist_head key_getsecurity; -#endif /* CONFIG_KEYS */ -#ifdef CONFIG_AUDIT - struct hlist_head audit_rule_init; - struct hlist_head audit_rule_known; - struct hlist_head audit_rule_match; - struct hlist_head audit_rule_free; -#endif /* CONFIG_AUDIT */ -#ifdef CONFIG_BPF_SYSCALL - struct hlist_head bpf; - struct hlist_head bpf_map; - struct hlist_head bpf_prog; - struct hlist_head bpf_map_alloc_security; - struct hlist_head bpf_map_free_security; - struct hlist_head bpf_prog_alloc_security; - struct hlist_head bpf_prog_free_security; -#endif /* CONFIG_BPF_SYSCALL */ - struct hlist_head locked_down; -#ifdef CONFIG_PERF_EVENTS - struct hlist_head perf_event_open; - struct hlist_head perf_event_alloc; - struct hlist_head perf_event_free; - struct hlist_head perf_event_read; - struct hlist_head perf_event_write; -#endif + #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; + #include "lsm_hook_defs.h" + #undef LSM_HOOK } __randomize_layout; /* @@ -2100,6 +1490,12 @@ struct lsm_blob_sizes { int lbs_task; }; +/* + * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void + * LSM hooks (in include/linux/lsm_hook_defs.h). + */ +#define LSM_RET_VOID ((void) 0) + /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes -- cgit v1.2.3 From 9d3fdea789c8fab51381c2d609932fabe94c0517 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:51 +0100 Subject: bpf: lsm: Provide attachment points for BPF LSM programs When CONFIG_BPF_LSM is enabled, nop functions, bpf_lsm_, are generated for each LSM hook. These functions are initialized as LSM hooks in a subsequent patch. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Reviewed-by: Kees Cook Acked-by: Yonghong Song Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-4-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/bpf_lsm.h (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h new file mode 100644 index 000000000000..83b96895829f --- /dev/null +++ b/include/linux/bpf_lsm.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2020 Google LLC. + */ + +#ifndef _LINUX_BPF_LSM_H +#define _LINUX_BPF_LSM_H + +#include +#include + +#ifdef CONFIG_BPF_LSM + +#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + RET bpf_lsm_##NAME(__VA_ARGS__); +#include +#undef LSM_HOOK + +#endif /* CONFIG_BPF_LSM */ + +#endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From 9e4e01dfd3254c7f04f24b7c6b29596bc12332f3 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 29 Mar 2020 01:43:52 +0100 Subject: bpf: lsm: Implement attach, detach and execution JITed BPF programs are dynamically attached to the LSM hooks using BPF trampolines. The trampoline prologue generates code to handle conversion of the signature of the hook to the appropriate BPF context. The allocated trampoline programs are attached to the nop functions initialized as LSM hooks. BPF_PROG_TYPE_LSM programs must have a GPL compatible license and and need CAP_SYS_ADMIN (required for loading eBPF programs). Upon attachment: * A BPF fexit trampoline is used for LSM hooks with a void return type. * A BPF fmod_ret trampoline is used for LSM hooks which return an int. The attached programs can override the return value of the bpf LSM hook to indicate a MAC Policy decision. Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Reviewed-by: Brendan Jackman Reviewed-by: Florent Revest Acked-by: Andrii Nakryiko Acked-by: James Morris Link: https://lore.kernel.org/bpf/20200329004356.27286-5-kpsingh@chromium.org --- include/linux/bpf_lsm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 83b96895829f..af74712af585 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -17,6 +17,17 @@ #include #undef LSM_HOOK +int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog); + +#else /* !CONFIG_BPF_LSM */ + +static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ -- cgit v1.2.3 From 0544cb75bd7df357c1758b760ecc7709125c139a Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Thu, 19 Mar 2020 09:12:31 -0700 Subject: bus: fsl-mc: add api to retrieve mc version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new api that returns Management Complex firmware version and make the required structure public. The api's first user will be the caam driver for setting prediction resistance bits. Signed-off-by: Andrei Botila Acked-by: Laurentiu Tudor Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- include/linux/fsl/mc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 54d9436600c7..2b5f8366dbe1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -381,6 +381,22 @@ int __must_check __fsl_mc_driver_register(struct fsl_mc_driver *fsl_mc_driver, void fsl_mc_driver_unregister(struct fsl_mc_driver *driver); +/** + * struct fsl_mc_version + * @major: Major version number: incremented on API compatibility changes + * @minor: Minor version number: incremented on API additions (that are + * backward compatible); reset when major version is incremented + * @revision: Internal revision number: incremented on implementation changes + * and/or bug fixes that have no impact on API + */ +struct fsl_mc_version { + u32 major; + u32 minor; + u32 revision; +}; + +struct fsl_mc_version *fsl_mc_get_version(void); + int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, u16 mc_io_flags, struct fsl_mc_io **new_mc_io); -- cgit v1.2.3 From a08e7fd9123d85dfdf8d1dc61dbe321c8359d25f Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Thu, 26 Mar 2020 15:33:14 +0800 Subject: net: Fix typo of SKB_SGO_CB_OFFSET The SKB_SGO_CB_OFFSET should be SKB_GSO_CB_OFFSET which means the offset of the GSO in skb cb. This patch fixes the typo. Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation") Signed-off-by: Cambda Zhu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e47895082b4b..28b1a2b4459e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4389,8 +4389,8 @@ struct skb_gso_cb { __wsum csum; __u16 csum_start; }; -#define SKB_SGO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { -- cgit v1.2.3 From 3df523ab582c52f745f9a73b9ebf9368ede555ac Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Fri, 27 Mar 2020 14:48:37 -0700 Subject: mptcp: Add ADD_ADDR handling Add handling for sending and receiving the ADD_ADDR, ADD_ADDR6, and RM_ADDR suboptions. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Peter Krystad Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3dc964010fef..1225db308957 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -86,9 +86,13 @@ struct mptcp_options_received { u64 data_seq; u32 subflow_seq; u16 data_len; - u8 mp_capable : 1, + u16 mp_capable : 1, mp_join : 1, - dss : 1; + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1; u8 use_map:1, dsn64:1, data_fin:1, @@ -96,6 +100,16 @@ struct mptcp_options_received { ack64:1, mpc_map:1, __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; }; #endif @@ -131,6 +145,8 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) #if IS_ENABLED(CONFIG_MPTCP) rx_opt->mptcp.mp_capable = 0; rx_opt->mptcp.mp_join = 0; + rx_opt->mptcp.add_addr = 0; + rx_opt->mptcp.rm_addr = 0; rx_opt->mptcp.dss = 0; #endif } -- cgit v1.2.3 From f296234c98a8fcec94eec80304a873f635d350ea Mon Sep 17 00:00:00 2001 From: Peter Krystad Date: Fri, 27 Mar 2020 14:48:39 -0700 Subject: mptcp: Add handling of incoming MP_JOIN requests Process the MP_JOIN option in a SYN packet with the same flow as MP_CAPABLE but when the third ACK is received add the subflow to the MPTCP socket subflow list instead of adding it to the TCP socket accept queue. The subflow is added at the end of the subflow list so it will not interfere with the existing subflows operation and no data is expected to be transmitted on it. Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Peter Krystad Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1225db308957..421c99c12291 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -92,7 +92,13 @@ struct mptcp_options_received { add_addr : 1, rm_addr : 1, family : 4, - echo : 1; + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; u8 use_map:1, dsn64:1, data_fin:1, -- cgit v1.2.3 From 8610c7c6e3bd647ff98d21c8bc0580e77bc2f8b3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 27 Mar 2020 18:00:20 -0400 Subject: net: ipv6: add support for rpl sr exthdr This patch adds rpl source routing receive handling. Everything works only if sysconf "rpl_seg_enabled" and source routing is enabled. Mostly the same behaviour as IPv6 segmentation routing. To handle compression and uncompression a rpl.c file is created which contains the necessary functionality. The receive handling will also care about IPv6 encapsulated so far it's specified as possible nexthdr in RFC 6554. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ea7c7906591e..2cb445a8fc9e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -74,6 +74,7 @@ struct ipv6_devconf { __u32 addr_gen_mode; __s32 disable_policy; __s32 ndisc_tclass; + __s32 rpl_seg_enabled; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From 317a0ebe53f46527aed912f7c3df963cd9a41536 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 27 Mar 2020 15:34:42 -0700 Subject: iio: cros_ec: Use Hertz as unit for sampling frequency To be compliant with other sensors, set and get sensor sampling frequency in Hz, not mHz. Fixes: ae7b02ad2f32 ("iio: common: cros_ec_sensors: Expose cros_ec_sensors frequency range via iio sysfs") Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/iio/common/cros_ec_sensors_core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index bc26ae2e3272..7bc961defa87 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -51,6 +51,8 @@ typedef irqreturn_t (*cros_ec_sensors_capture_t)(int irq, void *p); * is always 8-byte aligned. * @read_ec_sensors_data: function used for accessing sensors values * @fifo_max_event_count: Size of the EC sensor FIFO + * @frequencies: Table of known available frequencies: + * 0, Min and Max in mHz */ struct cros_ec_sensors_core_state { struct cros_ec_device *ec; @@ -74,9 +76,7 @@ struct cros_ec_sensors_core_state { unsigned long scan_mask, s16 *data); u32 fifo_max_event_count; - - /* Table of known available frequencies : 0, Min and Max in mHz */ - int frequencies[3]; + int frequencies[6]; }; int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev, unsigned long scan_mask, -- cgit v1.2.3 From 7a52cbccee8df0edfee30b81fdbb7d4f9d27ffd5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Sun, 12 Jan 2020 01:55:03 +0000 Subject: mfd: rk808: Reduce shutdown duplication Rather than having 3 almost-identical functions plus the machinery to keep track of them, it's far simpler to just dynamically select the appropriate register field per variant. Signed-off-by: Robin Murphy Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index a59bf323f713..b038653fa87e 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -620,7 +620,6 @@ struct rk808 { long variant; const struct regmap_config *regmap_cfg; const struct regmap_irq_chip *regmap_irq_chip; - void (*pm_pwroff_fn)(void); void (*pm_pwroff_prep_fn)(void); }; #endif /* __LINUX_REGULATOR_RK808_H */ -- cgit v1.2.3 From 42679765faf286259b16acf284eb52d68877ff32 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Sun, 12 Jan 2020 01:55:04 +0000 Subject: mfd: rk808: Convert RK805 to shutdown/suspend hooks RK805 has the same kind of dual-role sleep/shutdown pin as RK809/RK817, so it makes little sense for the driver to have to have two completely different mechanisms to handle essentially the same thing. Move RK805 over to the shutdown/suspend flow to clean things up. Signed-off-by: Robin Murphy Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index b038653fa87e..e07f6e61cd38 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -620,6 +620,5 @@ struct rk808 { long variant; const struct regmap_config *regmap_cfg; const struct regmap_irq_chip *regmap_irq_chip; - void (*pm_pwroff_prep_fn)(void); }; #endif /* __LINUX_REGULATOR_RK808_H */ -- cgit v1.2.3 From 2a7e7274f3d43d2a072cab25c0035dc994903bb9 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 17 Feb 2020 10:26:16 +0800 Subject: mfd: sc27xx: Add USB charger type detection support The Spreadtrum SC27XX series PMICs supply the USB charger type detection function, and related registers are located on the PMIC global registers region, thus we implement and export this function in the MFD driver for users to get the USB charger type. Signed-off-by: Baolin Wang Signed-off-by: Lee Jones --- include/linux/mfd/sc27xx-pmic.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/mfd/sc27xx-pmic.h (limited to 'include/linux') diff --git a/include/linux/mfd/sc27xx-pmic.h b/include/linux/mfd/sc27xx-pmic.h new file mode 100644 index 000000000000..57e45c0b3ae2 --- /dev/null +++ b/include/linux/mfd/sc27xx-pmic.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_MFD_SC27XX_PMIC_H +#define __LINUX_MFD_SC27XX_PMIC_H + +extern enum usb_charger_type sprd_pmic_detect_charger_type(struct device *dev); + +#endif /* __LINUX_MFD_SC27XX_PMIC_H */ -- cgit v1.2.3 From 072eaf3c0f0fd2bd8f53799c8dee3ab907db1242 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 28 Jan 2020 20:12:22 +0100 Subject: libceph: drop CEPH_DEFINE_SHOW_FUNC Although CEPH_DEFINE_SHOW_FUNC is much older, it now duplicates DEFINE_SHOW_ATTRIBUTE from linux/seq_file.h. Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- include/linux/ceph/debugfs.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h index cf5e840eec71..8b3a1a7a953a 100644 --- a/include/linux/ceph/debugfs.h +++ b/include/linux/ceph/debugfs.h @@ -2,22 +2,8 @@ #ifndef _FS_CEPH_DEBUGFS_H #define _FS_CEPH_DEBUGFS_H -#include #include -#define CEPH_DEFINE_SHOW_FUNC(name) \ -static int name##_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, name, inode->i_private); \ -} \ - \ -static const struct file_operations name##_fops = { \ - .open = name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -}; - /* debugfs.c */ extern void ceph_debugfs_init(void); extern void ceph_debugfs_cleanup(void); -- cgit v1.2.3 From 5107d7d505cb32fc5e74b792bce14b03f5beac7f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 29 Jan 2020 03:27:07 -0500 Subject: ceph: move ceph_osdc_{read,write}pages to ceph.ko Since these helpers are only used by ceph.ko, move them there and rename them with _sync_ qualifiers. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5a62dbd3f4c2..9d9f745b98a1 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -509,23 +509,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, struct page *req_page, size_t req_len, struct page **resp_pages, size_t *resp_len); -extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - u32 truncate_seq, u64 truncate_size, - struct page **pages, int nr_pages, - int page_align); - -extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - struct ceph_snap_context *sc, - u64 off, u64 len, - u32 truncate_seq, u64 truncate_size, - struct timespec64 *mtime, - struct page **pages, int nr_pages); - int ceph_osdc_copy_from(struct ceph_osd_client *osdc, u64 src_snapid, u64 src_version, struct ceph_object_id *src_oid, -- cgit v1.2.3 From 058daab79d6b597a20fd49b5e445b1b2929c2c1c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Feb 2020 18:38:37 -0500 Subject: ceph: move to a dedicated slabcache for mds requests On my machine (x86_64) this struct is 952 bytes, which gets rounded up to 1024 by kmalloc. Move this to a dedicated slabcache, so we can allocate them without the extra 72 bytes of overhead per. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index ec73ebc4827d..525b7c3f1c81 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -272,6 +272,7 @@ extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; extern struct kmem_cache *ceph_dir_file_cachep; +extern struct kmem_cache *ceph_mds_request_cachep; /* ceph_common.c */ extern bool libceph_compatible(void *data); -- cgit v1.2.3 From 3bb48b4142bbf72045af5ebe72e65ccff6d02680 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 2 Dec 2019 13:47:57 -0500 Subject: ceph: add flag to designate that a request is asynchronous ...and ensure that such requests are never queued. The MDS has need to know that a request is asynchronous so add flags and proper infrastructure for that. Also, delegated inode numbers and directory caps are associated with the session, so ensure that async requests are always transmitted on the first attempt and are never queued to wait for session reestablishment. If it does end up looking like we'll need to queue the request, then have it return -EJUKEBOX so the caller can reattempt with a synchronous request. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cb21c5cf12c3..9f747a1b8788 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -444,8 +444,9 @@ union ceph_mds_request_args { } __attribute__ ((packed)) lookupino; } __attribute__ ((packed)); -#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ +#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ +#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ +#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ struct ceph_mds_request_head { __le64 oldest_client_tid; -- cgit v1.2.3 From f5e17aed3accb406f51ae528d657c275efc1edfc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Feb 2020 14:12:32 -0500 Subject: ceph: track primary dentry link Newer versions of the MDS will flag a dentry as "primary". In later patches, we'll need to consult this info, so track it in di->flags. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 9f747a1b8788..94cc4b047987 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -531,6 +531,9 @@ struct ceph_mds_reply_lease { __le32 seq; } __attribute__ ((packed)); +#define CEPH_LEASE_VALID (1 | 2) /* old and new bit values */ +#define CEPH_LEASE_PRIMARY_LINK 4 /* primary linkage */ + struct ceph_mds_reply_dirfrag { __le32 frag; /* fragment */ __le32 auth; /* auth mds, if this is a delegation point */ -- cgit v1.2.3 From a25949b99003b7e6c2604a3fc8b8d62385508477 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Feb 2020 14:12:45 -0500 Subject: ceph: cap tracking for async directory operations Track and correctly handle directory caps for asynchronous operations. Add aliases for Frc caps that we now designate at Dcu caps (when dealing with directories). Unlike file caps, we don't reclaim these when the session goes away, and instead preemptively release them. In-flight async dirops are instead handled during reconnect phase. The client needs to re-do a synchronous operation in order to re-get directory caps. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 94cc4b047987..91d09cf37649 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -663,6 +663,12 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ CEPH_LOCK_IXATTR) +/* cap masks async dir operations */ +#define CEPH_CAP_DIR_CREATE CEPH_CAP_FILE_CACHE +#define CEPH_CAP_DIR_UNLINK CEPH_CAP_FILE_RD +#define CEPH_CAP_ANY_DIR_OPS (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | \ + CEPH_CAP_FILE_WREXTEND | CEPH_CAP_FILE_LAZYIO) + int ceph_caps_for_mode(int mode); enum { -- cgit v1.2.3 From 9a8d03ca2e2c334d08ee91a3e07dcce31a02fdc6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Nov 2019 12:06:14 -0500 Subject: ceph: attempt to do async create when possible With the Octopus release, the MDS will hand out directory create caps. If we have Fxc caps on the directory, and complete directory information or a known negative dentry, then we can return without waiting on the reply, allowing the open() call to return very quickly to userland. We use the normal ceph_fill_inode() routine to fill in the inode, so we have to gin up some reply inode information with what we'd expect the newly-created inode to have. The client assumes that it has a full set of caps on the new inode, and that the MDS will revoke them when there is conflicting access. This functionality is gated on the wsync/nowsync mount options. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 91d09cf37649..e035c5194005 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -659,6 +659,9 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ CEPH_CAP_PIN) +#define CEPH_CAP_ALL_FILE (CEPH_CAP_PIN | CEPH_CAP_ANY_SHARED | \ + CEPH_CAP_AUTH_EXCL | CEPH_CAP_XATTR_EXCL | \ + CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR) #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ CEPH_LOCK_IXATTR) -- cgit v1.2.3 From 719a2514e9bf313c3627078926d56bc2a8b290d1 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 5 Mar 2020 20:21:00 +0800 Subject: ceph: consider inode's last read/write when calculating wanted caps Add i_last_rd and i_last_wr to ceph_inode_info. These fields are used to track the last time the client acquired read/write caps for the inode. If there is no read/write on an inode for 'caps_wanted_delay_max' seconds, __ceph_caps_file_wanted() does not request caps for read/write even there are open files. Call __ceph_touch_fmode() for dir operations. __ceph_caps_file_wanted() calculates dir's wanted caps according to last dir read/modification. If there is recent dir read, dir inode wants CEPH_CAP_ANY_SHARED caps. If there is recent dir modification, also wants CEPH_CAP_FILE_EXCL. Readdir is a special case. Dir inode wants CEPH_CAP_FILE_EXCL after readdir, as with that, modifications do not need to release CEPH_CAP_FILE_SHARED or invalidate all dentry leases issued by readdir. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index e035c5194005..ebf5ba62b772 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -568,6 +568,7 @@ struct ceph_filelock { #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ #define CEPH_FILE_MODE_BITS 4 +#define CEPH_FILE_MODE_MASK ((1 << CEPH_FILE_MODE_BITS) - 1) int ceph_flags_to_mode(int flags); -- cgit v1.2.3 From 72e0ef0e5f067fd991f702f0b2635d911d0cf208 Mon Sep 17 00:00:00 2001 From: Mikel Rychliski Date: Wed, 18 Mar 2020 22:16:23 -0400 Subject: PCI: Use ioremap(), not phys_to_virt() for platform ROM On some EFI systems, the video BIOS is provided by the EFI firmware. The boot stub code stores the physical address of the ROM image in pdev->rom. Currently we attempt to access this pointer using phys_to_virt(), which doesn't work with CONFIG_HIGHMEM. On these systems, attempting to load the radeon module on a x86_32 kernel can result in the following: BUG: unable to handle page fault for address: 3e8ed03c #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page *pde = 00000000 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 317 Comm: systemd-udevd Not tainted 5.6.0-rc3-next-20200228 #2 Hardware name: Apple Computer, Inc. MacPro1,1/Mac-F4208DC8, BIOS MP11.88Z.005C.B08.0707021221 07/02/07 EIP: radeon_get_bios+0x5ed/0xe50 [radeon] Code: 00 00 84 c0 0f 85 12 fd ff ff c7 87 64 01 00 00 00 00 00 00 8b 47 08 8b 55 b0 e8 1e 83 e1 d6 85 c0 74 1a 8b 55 c0 85 d2 74 13 <80> 38 55 75 0e 80 78 01 aa 0f 84 a4 03 00 00 8d 74 26 00 68 dc 06 EAX: 3e8ed03c EBX: 00000000 ECX: 3e8ed03c EDX: 00010000 ESI: 00040000 EDI: eec04000 EBP: eef3fc60 ESP: eef3fbe0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010206 CR0: 80050033 CR2: 3e8ed03c CR3: 2ec77000 CR4: 000006d0 Call Trace: r520_init+0x26/0x240 [radeon] radeon_device_init+0x533/0xa50 [radeon] radeon_driver_load_kms+0x80/0x220 [radeon] drm_dev_register+0xa7/0x180 [drm] radeon_pci_probe+0x10f/0x1a0 [radeon] pci_device_probe+0xd4/0x140 Fix the issue by updating all drivers which can access a platform provided ROM. Instead of calling the helper function pci_platform_rom() which uses phys_to_virt(), call ioremap() directly on the pdev->rom. radeon_read_platform_bios() previously directly accessed an __iomem pointer. Avoid this by calling memcpy_fromio() instead of kmemdup(). pci_platform_rom() now has no remaining callers, so remove it. Link: https://lore.kernel.org/r/20200319021623.5426-1-mikel@mikelr.com Signed-off-by: Mikel Rychliski Signed-off-by: Bjorn Helgaas Acked-by: Alex Deucher --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..7268dcf1f23e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1214,7 +1214,6 @@ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); -void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); -- cgit v1.2.3 From 2c8d5a2dc1e335d895dea45164f5d4d850b134da Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Sat, 14 Mar 2020 19:43:55 +0000 Subject: PCI: Add support for root bus sizing In certain cases we should be able to enumerate IO and MEM ranges of all PCI devices installed in the system, and then set respective host bridge apertures basing on calculated size and alignment. Particularly when firmware is broken and fails to assign bridge windows properly, like on Alpha UP1500 platform. Actually, almost everything is already in place, and required changes are minimal: - add "size_windows" flag to struct pci_host_bridge: when set, it instructs __pci_bus_size_bridges() to continue with the root bus; - in the __pci_bus_size_bridges() path: add checks for bus->self, as it can legitimately be null for the root bus. Link: https://lore.kernel.org/r/20200314194355.GA12510@mail.rc.ru Tested-by: Matt Turner Signed-off-by: Ivan Kokshaysky Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7268dcf1f23e..15734731ad87 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -511,6 +511,7 @@ struct pci_host_bridge { unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ unsigned int preserve_config:1; /* Preserve FW resource setup */ + unsigned int size_windows:1; /* Enable root bus sizing */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, -- cgit v1.2.3 From 3ad1f3a33286dc67d595f6fab3a3a9e583bc738a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 10 Feb 2020 22:35:18 +0100 Subject: pwm: Implement some checks for lowlevel drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some expectations which the callbacks provided by lowlevel drivers should fulfill. Implement checks that help driver authors to get these semantics right. As these have some overhead the checks can be disabled using a Kconfig setting. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 0ef808d925bb..2635b2a55090 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -71,7 +71,8 @@ struct pwm_state { * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments - * @state: curent PWM channel state + * @state: last applied state + * @last: last implemented state (for PWM_DEBUG) */ struct pwm_device { const char *label; @@ -83,6 +84,7 @@ struct pwm_device { struct pwm_args args; struct pwm_state state; + struct pwm_state last; }; /** -- cgit v1.2.3 From 54091b5f195b45a9a7d394008c06d2b9646ab126 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 12 Mar 2020 09:52:06 +0530 Subject: pwm: omap-dmtimer: Drop unused header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm_omap_dmtimer.h is used only: - to typedef struct omap_dm_timer to pwm_omap_dmtimer - for macro PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE Rest of the file is pretty mush unsed. So reuse omap_dm_timer and OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE in pwm-omap-dmtimer.c and delete the header file. Acked-by: Tony Lindgren Signed-off-by: Lokesh Vutla Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/platform_data/pwm_omap_dmtimer.h | 90 -------------------------- 1 file changed, 90 deletions(-) delete mode 100644 include/linux/platform_data/pwm_omap_dmtimer.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pwm_omap_dmtimer.h b/include/linux/platform_data/pwm_omap_dmtimer.h deleted file mode 100644 index e7d521e48855..000000000000 --- a/include/linux/platform_data/pwm_omap_dmtimer.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * include/linux/platform_data/pwm_omap_dmtimer.h - * - * OMAP Dual-Mode Timer PWM platform data - * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ - * Tarun Kanti DebBarma - * Thara Gopinath - * - * Platform device conversion and hwmod support. - * - * Copyright (C) 2005 Nokia Corporation - * Author: Lauri Leukkunen - * PWM and clock framework support by Timo Teras. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __PWM_OMAP_DMTIMER_PDATA_H -#define __PWM_OMAP_DMTIMER_PDATA_H - -/* clock sources */ -#define PWM_OMAP_DMTIMER_SRC_SYS_CLK 0x00 -#define PWM_OMAP_DMTIMER_SRC_32_KHZ 0x01 -#define PWM_OMAP_DMTIMER_SRC_EXT_CLK 0x02 - -/* timer interrupt enable bits */ -#define PWM_OMAP_DMTIMER_INT_CAPTURE (1 << 2) -#define PWM_OMAP_DMTIMER_INT_OVERFLOW (1 << 1) -#define PWM_OMAP_DMTIMER_INT_MATCH (1 << 0) - -/* trigger types */ -#define PWM_OMAP_DMTIMER_TRIGGER_NONE 0x00 -#define PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW 0x01 -#define PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW_AND_COMPARE 0x02 - -struct omap_dm_timer; -typedef struct omap_dm_timer pwm_omap_dmtimer; - -struct pwm_omap_dmtimer_pdata { - pwm_omap_dmtimer *(*request_by_node)(struct device_node *np); - pwm_omap_dmtimer *(*request_specific)(int timer_id); - pwm_omap_dmtimer *(*request)(void); - - int (*free)(pwm_omap_dmtimer *timer); - - void (*enable)(pwm_omap_dmtimer *timer); - void (*disable)(pwm_omap_dmtimer *timer); - - int (*get_irq)(pwm_omap_dmtimer *timer); - int (*set_int_enable)(pwm_omap_dmtimer *timer, unsigned int value); - int (*set_int_disable)(pwm_omap_dmtimer *timer, u32 mask); - - struct clk *(*get_fclk)(pwm_omap_dmtimer *timer); - - int (*start)(pwm_omap_dmtimer *timer); - int (*stop)(pwm_omap_dmtimer *timer); - int (*set_source)(pwm_omap_dmtimer *timer, int source); - - int (*set_load)(pwm_omap_dmtimer *timer, int autoreload, - unsigned int value); - int (*set_match)(pwm_omap_dmtimer *timer, int enable, - unsigned int match); - int (*set_pwm)(pwm_omap_dmtimer *timer, int def_on, - int toggle, int trigger); - int (*set_prescaler)(pwm_omap_dmtimer *timer, int prescaler); - - unsigned int (*read_counter)(pwm_omap_dmtimer *timer); - int (*write_counter)(pwm_omap_dmtimer *timer, unsigned int value); - unsigned int (*read_status)(pwm_omap_dmtimer *timer); - int (*write_status)(pwm_omap_dmtimer *timer, unsigned int value); -}; - -#endif /* __PWM_OMAP_DMTIMER_PDATA_H */ -- cgit v1.2.3 From 62582a7ee78364c6106d09d5e0f1dc7f564be887 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2020 07:55:10 -0700 Subject: ptp: Avoid deadlocks in the programmable pin code. The PTP Hardware Clock (PHC) subsystem offers an API for configuring programmable pins. User space sets or gets the settings using ioctls, and drivers verify dialed settings via a callback. Drivers may also query pin settings by calling the ptp_find_pin() method. Although the core subsystem protects concurrent access to the pin settings, the implementation places illogical restrictions on how drivers may call ptp_find_pin(). When enabling an auxiliary function via the .enable(on=1) callback, drivers may invoke the pin finding method, but when disabling with .enable(on=0) drivers are not permitted to do so. With the exception of the mv88e6xxx, all of the PHC drivers do respect this restriction, but still the locking pattern is both confusing and unnecessary. This patch changes the locking implementation to allow PHC drivers to freely call ptp_find_pin() from their .enable() and .verify() callbacks. V2 ChangeLog: - fixed spelling in the kernel doc - add Vladimir's tested by tag Signed-off-by: Richard Cochran Reported-by: Yangbo Lu Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c64a1ef87240..121a7eda4593 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -223,6 +223,12 @@ extern s32 scaled_ppm_to_ppb(long ppm); /** * ptp_find_pin() - obtain the pin index of a given auxiliary function * + * The caller must hold ptp_clock::pincfg_mux. Drivers do not have + * access to that mutex as ptp_clock is an opaque type. However, the + * core code acquires the mutex before invoking the driver's + * ptp_clock_info::enable() callback, and so drivers may call this + * function from that context. + * * @ptp: The clock obtained from ptp_clock_register(). * @func: One of the ptp_pin_function enumerated values. * @chan: The particular functional channel to find. @@ -233,6 +239,19 @@ extern s32 scaled_ppm_to_ppb(long ppm); int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan); +/** + * ptp_find_pin_unlocked() - wrapper for ptp_find_pin() + * + * This function acquires the ptp_clock::pincfg_mux mutex before + * invoking ptp_find_pin(). Instead of using this function, drivers + * should most likely call ptp_find_pin() directly from their + * ptp_clock_info::enable() method. + * + */ + +int ptp_find_pin_unlocked(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan); + /** * ptp_schedule_worker() - schedule ptp auxiliary work * -- cgit v1.2.3 From 8063f761cd7c17fc1d0018728936e0c33a25388a Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Sun, 29 Mar 2020 20:32:49 +0300 Subject: qed: Fix use after free in qed_chain_free The qed_chain data structure was modified in commit 1a4a69751f4d ("qed: Chain support for external PBL") to support receiving an external pbl (due to iWARP FW requirements). The pages pointed to by the pbl are allocated in qed_chain_alloc and their virtual address are stored in an virtual addresses array to enable accessing and freeing the data. The physical addresses however weren't stored and were accessed directly from the external-pbl during free. Destroy-qp flow, leads to freeing the external pbl before the chain is freed, when the chain is freed it tries accessing the already freed external pbl, leading to a use-after-free. Therefore we need to store the physical addresses in additional to the virtual addresses in a new data structure. Fixes: 1a4a69751f4d ("qed: Chain support for external PBL") Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 2dd0a9ed5b36..733fad7dfbed 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -97,6 +97,11 @@ struct qed_chain_u32 { u32 cons_idx; }; +struct addr_tbl_entry { + void *virt_addr; + dma_addr_t dma_map; +}; + struct qed_chain { /* fastpath portion of the chain - required for commands such * as produce / consume. @@ -107,10 +112,11 @@ struct qed_chain { /* Fastpath portions of the PBL [if exists] */ struct { - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. + /* Table for keeping the virtual and physical addresses of the + * chain pages, respectively to the physical addresses + * in the pbl table. */ - void **pp_virt_addr_tbl; + struct addr_tbl_entry *pp_addr_tbl; union { struct qed_chain_pbl_u16 u16; @@ -287,7 +293,7 @@ qed_chain_advance_page(struct qed_chain *p_chain, *(u32 *)page_to_inc = 0; page_index = *(u32 *)page_to_inc; } - *p_next_elem = p_chain->pbl.pp_virt_addr_tbl[page_index]; + *p_next_elem = p_chain->pbl.pp_addr_tbl[page_index].virt_addr; } } @@ -537,7 +543,7 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->pbl_sp.p_phys_table = 0; p_chain->pbl_sp.p_virt_table = NULL; - p_chain->pbl.pp_virt_addr_tbl = NULL; + p_chain->pbl.pp_addr_tbl = NULL; } /** @@ -575,11 +581,11 @@ static inline void qed_chain_init_mem(struct qed_chain *p_chain, static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, void *p_virt_pbl, dma_addr_t p_phys_pbl, - void **pp_virt_addr_tbl) + struct addr_tbl_entry *pp_addr_tbl) { p_chain->pbl_sp.p_phys_table = p_phys_pbl; p_chain->pbl_sp.p_virt_table = p_virt_pbl; - p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; + p_chain->pbl.pp_addr_tbl = pp_addr_tbl; } /** @@ -644,7 +650,7 @@ static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain) break; case QED_CHAIN_MODE_PBL: last_page_idx = p_chain->page_cnt - 1; - p_virt_addr = p_chain->pbl.pp_virt_addr_tbl[last_page_idx]; + p_virt_addr = p_chain->pbl.pp_addr_tbl[last_page_idx].virt_addr; break; } /* p_virt_addr points at this stage to the last page of the chain */ @@ -716,7 +722,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain) page_cnt = qed_chain_get_page_cnt(p_chain); for (i = 0; i < page_cnt; i++) - memset(p_chain->pbl.pp_virt_addr_tbl[i], 0, + memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0, QED_CHAIN_PAGE_SIZE); } -- cgit v1.2.3 From 0bd274060a0f49d974b7e88fa87b6e1c1f496a48 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:44 +0100 Subject: net: phylink: change phylink_mii_c22_pcs_set_advertisement() prototype Change phylink_mii_c22_pcs_set_advertisement() to take only the PHY interface and advertisement mask, rather than the full phylink state. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 8fa6df3b881b..6f6ecf3e0be1 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -320,7 +320,8 @@ void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs, - const struct phylink_link_state *state); + phy_interface_t interface, + const unsigned long *advertising); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs, -- cgit v1.2.3 From e7765d634aaa9dd5db3cb59155269ef6c18d4592 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:50 +0100 Subject: net: phylink: rename 'ops' to 'mac_ops' Rename the bland 'ops' member of struct phylink to be a more descriptive 'mac_ops' - this is necessary as we're about to introduce another set of operations. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 6f6ecf3e0be1..90c907eaae15 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -272,7 +272,7 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, - const struct phylink_mac_ops *ops); + const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From 4c0d6d3a7a81fcd2dcb4abf15fe2e13074cf8619 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 30 Mar 2020 18:44:55 +0100 Subject: net: phylink: add separate pcs operations structure Add a separate set of PCS operations, which MAC drivers can use to couple phylink with their associated MAC PCS layer. The PCS operations include: - pcs_get_state() - reads the link up/down, resolved speed, duplex and pause from the PCS. - pcs_config() - configures the PCS for the specified mode, PHY interface type, and setting the advertisement. - pcs_an_restart() - restarts 802.3 in-band negotiation with the link partner - pcs_link_up() - informs the PCS that link has come up, and the parameters of the link. Link parameters are used to program the PCS for fixed speed and non-inband modes. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 90c907eaae15..3f8d37ec5503 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -270,9 +270,97 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy, int speed, int duplex, bool tx_pause, bool rx_pause); #endif +/** + * struct phylink_pcs_ops - MAC PCS operations structure. + * @pcs_get_state: read the current MAC PCS link state from the hardware. + * @pcs_config: configure the MAC PCS for the selected mode and state. + * @pcs_an_restart: restart 802.3z BaseX autonegotiation. + * @pcs_link_up: program the PCS for the resolved link configuration + * (where necessary). + */ +struct phylink_pcs_ops { + void (*pcs_get_state)(struct phylink_config *config, + struct phylink_link_state *state); + int (*pcs_config)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising); + void (*pcs_an_restart)(struct phylink_config *config); + void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, int speed, int duplex); +}; + +#if 0 /* For kernel-doc purposes only. */ +/** + * pcs_get_state() - Read the current inband link state from the hardware + * @config: a pointer to a &struct phylink_config. + * @state: a pointer to a &struct phylink_link_state. + * + * Read the current inband link state from the MAC PCS, reporting the + * current speed in @state->speed, duplex mode in @state->duplex, pause + * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, + * negotiation completion state in @state->an_complete, and link up state + * in @state->link. If possible, @state->lp_advertising should also be + * populated. + * + * When present, this overrides mac_pcs_get_state() in &struct + * phylink_mac_ops. + */ +void pcs_get_state(struct phylink_config *config, + struct phylink_link_state *state); + +/** + * pcs_config() - Configure the PCS mode and advertisement + * @config: a pointer to a &struct phylink_config. + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @interface: interface mode to be used + * @advertising: adertisement ethtool link mode mask + * + * Configure the PCS for the operating mode, the interface mode, and set + * the advertisement mask. + * + * When operating in %MLO_AN_INBAND, inband should always be enabled, + * otherwise inband should be disabled. + * + * For SGMII, there is no advertisement from the MAC side, the PCS should + * be programmed to acknowledge the inband word from the PHY. + * + * For 1000BASE-X, the advertisement should be programmed into the PCS. + * + * For most 10GBASE-R, there is no advertisement. + */ +int (*pcs_config)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, const unsigned long *advertising); + +/** + * pcs_an_restart() - restart 802.3z BaseX autonegotiation + * @config: a pointer to a &struct phylink_config. + * + * When PCS ops are present, this overrides mac_an_restart() in &struct + * phylink_mac_ops. + */ +void (*pcs_an_restart)(struct phylink_config *config); + +/** + * pcs_link_up() - program the PCS for the resolved link configuration + * @config: a pointer to a &struct phylink_config. + * @mode: link autonegotiation mode + * @interface: link &typedef phy_interface_t mode + * @speed: link speed + * @duplex: link duplex + * + * This call will be made just before mac_link_up() to inform the PCS of + * the resolved link parameters. For example, a PCS operating in SGMII + * mode without in-band AN needs to be manually configured for the link + * and duplex setting. Otherwise, this should be a no-op. + */ +void (*pcs_link_up)(struct phylink_config *config, unsigned int mode, + phy_interface_t interface, int speed, int duplex); +#endif + struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); +void phylink_add_pcs(struct phylink *, const struct phylink_pcs_ops *ops); void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); -- cgit v1.2.3 From 3f50f132d8400e129fc9eb68b5020167ef80a244 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 30 Mar 2020 14:36:39 -0700 Subject: bpf: Verifier, do explicit ALU32 bounds tracking It is not possible for the current verifier to track ALU32 and JMP ops correctly. This can result in the verifier aborting with errors even though the program should be verifiable. BPF codes that hit this can work around it by changin int variables to 64-bit types, marking variables volatile, etc. But this is all very ugly so it would be better to avoid these tricks. But, the main reason to address this now is do_refine_retval_range() was assuming return values could not be negative. Once we fixed this code that was previously working will no longer work. See do_refine_retval_range() patch for details. And we don't want to suddenly cause programs that used to work to fail. The simplest example code snippet that illustrates the problem is likely this, 53: w8 = w0 // r8 <- [0, S32_MAX], // w8 <- [-S32_MIN, X] 54: w8 64-bit 2. MOV ALU64 - copy 64-bit -> 32-bit 3. op ALU32 - zext 32-bit -> 64-bit 4. op ALU64 - n/a 5. jmp ALU32 - 64-bit: var32_off | upper_32_bits(var64_off) 6. jmp ALU64 - 32-bit: (>> (<< var64_off)) Details for each case, For "MOV ALU32" BPF arch zero extends so we simply copy the bounds from 32-bit into 64-bit ensuring we truncate var_off and 64-bit bounds correctly. See zext_32_to_64. For "MOV ALU64" copy all bounds including 32-bit into new register. If the src register had 32-bit bounds the dst register will as well. For "op ALU32" zero extend 32-bit into 64-bit the same as move, see zext_32_to_64. For "op ALU64" calculate both 32-bit and 64-bit bounds no merging is done here. Except we have a special case. When RSH or ARSH is done we can't simply ignore shifting bits from 64-bit reg into the 32-bit subreg. So currently just push bounds from 64-bit into 32-bit. This will be correct in the sense that they will represent a valid state of the register. However we could lose some accuracy if an ARSH is following a jmp32 operation. We can handle this special case in a follow up series. For "jmp ALU32" mark 64-bit reg unknown and recalculate 64-bit bounds from tnum by setting var_off to ((<<(>>var_off)) | var32_off). We special case if 64-bit bounds has zero'd upper 32bits at which point we can simply copy 32-bit bounds into 64-bit register. This catches a common compiler trick where upper 32-bits are zeroed and then 32-bit ops are used followed by a 64-bit compare or 64-bit op on a pointer. See __reg_combine_64_into_32(). For "jmp ALU64" cast the bounds of the 64bit to their 32-bit counterpart. For example s32_min_value = (s32)reg->smin_value. For tnum use only the lower 32bits via, (>>(< Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/158560419880.10843.11448220440809118343.stgit@john-Precision-5820-Tower --- include/linux/bpf_verifier.h | 4 ++++ include/linux/limits.h | 1 + include/linux/tnum.h | 12 ++++++++++++ 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5406e6e96585..6abd5a778fcd 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -123,6 +123,10 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + s32 s32_min_value; /* minimum possible (s32)value */ + s32 s32_max_value; /* maximum possible (s32)value */ + u32 u32_min_value; /* minimum possible (u32)value */ + u32 u32_max_value; /* maximum possible (u32)value */ /* parentage chain for liveness checking */ struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like diff --git a/include/linux/limits.h b/include/linux/limits.h index 76afcd24ff8c..0d3de82dd354 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -27,6 +27,7 @@ #define S16_MAX ((s16)(U16_MAX >> 1)) #define S16_MIN ((s16)(-S16_MAX - 1)) #define U32_MAX ((u32)~0U) +#define U32_MIN ((u32)0) #define S32_MAX ((s32)(U32_MAX >> 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define U64_MAX ((u64)~0ULL) diff --git a/include/linux/tnum.h b/include/linux/tnum.h index ea627d1ab7e3..498dbcedb451 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -86,4 +86,16 @@ int tnum_strn(char *str, size_t size, struct tnum a); /* Format a tnum as tristate binary expansion */ int tnum_sbin(char *str, size_t size, struct tnum a); +/* Returns the 32-bit subreg */ +struct tnum tnum_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg cleared */ +struct tnum tnum_clear_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg set to value */ +struct tnum tnum_const_subreg(struct tnum a, u32 value); +/* Returns true if 32-bit subreg @a is a known constant*/ +static inline bool tnum_subreg_is_const(struct tnum a) +{ + return !(tnum_subreg(a)).mask; +} + #endif /* _LINUX_TNUM_H */ -- cgit v1.2.3 From 779df6a5480f1307d51b66ea72352be592265cad Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 3 Mar 2020 17:58:37 -0500 Subject: NFS: Ensure security label is set for root inode When using NFSv4.2, the security label for the root inode should be set via a call to nfs_setsecurity() during the mount process, otherwise the inode will appear as unlabeled for up to acdirmin seconds. Currently the label for the root inode is allocated, retrieved, and freed entirely witin nfs4_proc_get_root(). Add a field for the label to the nfs_fattr struct, and allocate & free the label in nfs_get_root(), where we also add a call to nfs_setsecurity(). Note that for the call to nfs_setsecurity() to succeed, it's necessary to also move the logic calling security_sb_{set,clone}_security() from nfs_get_tree_common() down into nfs_get_root()... otherwise the SBLABEL_MNT flag will not be set in the super_block's security flags and nfs_setsecurity() will silently fail. Reported-by: Richard Haines Signed-off-by: Scott Mayhew Acked-by: Stephen Smalley Tested-by: Stephen Smalley [PM: fixed 80-char line width problems] Signed-off-by: Paul Moore --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 94c77ed55ce1..6838c149f335 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -75,6 +75,7 @@ struct nfs_fattr { struct nfs4_string *owner_name; struct nfs4_string *group_name; struct nfs4_threshold *mdsthreshold; /* pNFS threshold hints */ + struct nfs4_label *label; }; #define NFS_ATTR_FATTR_TYPE (1U << 0) -- cgit v1.2.3 From af6eea57437a830293eab56246b6025cc7d46ee7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:58 -0700 Subject: bpf: Implement bpf_link-based cgroup BPF program attachment Implement new sub-command to attach cgroup BPF programs and return FD-based bpf_link back on success. bpf_link, once attached to cgroup, cannot be replaced, except by owner having its FD. Cgroup bpf_link supports only BPF_F_ALLOW_MULTI semantics. Both link-based and prog-based BPF_F_ALLOW_MULTI attachments can be freely intermixed. To prevent bpf_cgroup_link from keeping cgroup alive past the point when no BPF program can be executed, implement auto-detachment of link. When cgroup_bpf_release() is called, all attached bpf_links are forced to release cgroup refcounts, but they leave bpf_link otherwise active and allocated, as well as still owning underlying bpf_prog. This is because user-space might still have FDs open and active, so bpf_link as a user-referenced object can't be freed yet. Once last active FD is closed, bpf_link will be freed and underlying bpf_prog refcount will be dropped. But cgroup refcount won't be touched, because cgroup is released already. The inherent race between bpf_cgroup_link release (from closing last FD) and cgroup_bpf_release() is resolved by both operations taking cgroup_mutex. So the only additional check required is when bpf_cgroup_link attempts to detach itself from cgroup. At that time we need to check whether there is still cgroup associated with that link. And if not, exit with success, because bpf_cgroup_link was already successfully detached. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20200330030001.2312810-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 29 ++++++++++++++++++++++++----- include/linux/bpf.h | 10 +++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7cd5c7a2509..d2d969669564 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -51,9 +51,18 @@ struct bpf_cgroup_storage { struct rcu_head rcu; }; +struct bpf_cgroup_link { + struct bpf_link link; + struct cgroup *cgroup; + enum bpf_attach_type type; +}; + +extern const struct bpf_link_ops bpf_cgroup_link_lops; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; @@ -84,20 +93,23 @@ struct cgroup_bpf { int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); -int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, +int __cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ -int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, enum bpf_attach_type type, +int cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -332,6 +344,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else @@ -354,6 +367,12 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3bde59a8453b..56254d880293 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); -struct bpf_link; +struct bpf_link { + atomic64_t refcnt; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); + }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, + int link_fd); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 0c991ebc8c69d29b7fc44db17075c5aa5253e2ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:59 -0700 Subject: bpf: Implement bpf_prog replacement for an active bpf_cgroup_link Add new operation (LINK_UPDATE), which allows to replace active bpf_prog from under given bpf_link. Currently this is only supported for bpf_cgroup_link, but will be extended to other kinds of bpf_links in follow-up patches. For bpf_cgroup_link, implemented functionality matches existing semantics for direct bpf_prog attachment (including BPF_F_REPLACE flag). User can either unconditionally set new bpf_prog regardless of which bpf_prog is currently active under given bpf_link, or, optionally, can specify expected active bpf_prog. If active bpf_prog doesn't match expected one, no changes are performed, old bpf_link stays intact and attached, operation returns a failure. cgroup_bpf_replace() operation is resolving race between auto-detachment and bpf_prog update in the same fashion as it's done for bpf_link detachment, except in this case update has no way of succeeding because of target cgroup marked as dying. So in this case error is returned. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200330030001.2312810-3-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d2d969669564..c11b413d5b1a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,6 +100,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); +int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, + struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -110,6 +112,8 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); +int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -350,6 +354,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; +struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -373,6 +378,13 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_replace(struct bpf_link *link, + struct bpf_prog *old_prog, + struct bpf_prog *new_prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { -- cgit v1.2.3 From b990408537388e9174b642ad36cdef6c47c64d3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 21 Mar 2020 13:25:55 -0700 Subject: KVM: Pass kvm_init()'s opaque param to additional arch funcs Pass @opaque to kvm_arch_hardware_setup() and kvm_arch_check_processor_compat() to allow architecture specific code to reference @opaque without having to stash it away in a temporary global variable. This will enable x86 to separate its vendor specific callback ops, which are passed via @opaque, into "init" and "runtime" ops without having to stash away the "init" ops. No functional change intended. Reviewed-by: Cornelia Huck Tested-by: Cornelia Huck #s390 Acked-by: Marc Zyngier Signed-off-by: Sean Christopherson Message-Id: <20200321202603.19355-2-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6a1905da9bf..6d58beb65454 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -886,9 +886,9 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_hardware_setup(void); +int kvm_arch_hardware_setup(void *opaque); void kvm_arch_hardware_unsetup(void); -int kvm_arch_check_processor_compat(void); +int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From a0b66a73785ccc8fedbff00383ffe814df9f63c7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 29 Mar 2020 16:04:05 +0200 Subject: gpio: Rename variable in core APIs There is struct gpio *gc, *chip and *gpiochip, and yes I am responsible for some of the inconsistencies. I want this to be just gc everywhere for minimizing cognitive resistance when reading the code: more compact function signatures and less clutter. Purely syntactic changes intended. No semantic effects. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20200329140405.52276-1-linus.walleij@linaro.org Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 138 ++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ed65e00ee977..b8fc92c177eb 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -87,7 +87,7 @@ struct gpio_irq_chip { * @need_valid_mask to make these GPIO lines unavailable for * translation. */ - int (*child_to_parent_hwirq)(struct gpio_chip *chip, + int (*child_to_parent_hwirq)(struct gpio_chip *gc, unsigned int child_hwirq, unsigned int child_type, unsigned int *parent_hwirq, @@ -102,7 +102,7 @@ struct gpio_irq_chip { * variant named &gpiochip_populate_parent_fwspec_fourcell is also * available. */ - void *(*populate_parent_alloc_arg)(struct gpio_chip *chip, + void *(*populate_parent_alloc_arg)(struct gpio_chip *gc, unsigned int parent_hwirq, unsigned int parent_type); @@ -114,7 +114,7 @@ struct gpio_irq_chip { * callback. If this is not specified, then a default callback will be * provided that returns the line offset. */ - unsigned int (*child_offset_to_irq)(struct gpio_chip *chip, + unsigned int (*child_offset_to_irq)(struct gpio_chip *gc, unsigned int pin); /** @@ -209,7 +209,7 @@ struct gpio_irq_chip { * a particular driver wants to clear IRQ related registers * in order to avoid undesired events. */ - int (*init_hw)(struct gpio_chip *chip); + int (*init_hw)(struct gpio_chip *gc); /** * @init_valid_mask: optional routine to initialize @valid_mask, to be @@ -220,7 +220,7 @@ struct gpio_irq_chip { * then directly set some bits to "0" if they cannot be used for * interrupts. */ - void (*init_valid_mask)(struct gpio_chip *chip, + void (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); @@ -348,40 +348,40 @@ struct gpio_chip { struct device *parent; struct module *owner; - int (*request)(struct gpio_chip *chip, + int (*request)(struct gpio_chip *gc, unsigned offset); - void (*free)(struct gpio_chip *chip, + void (*free)(struct gpio_chip *gc, unsigned offset); - int (*get_direction)(struct gpio_chip *chip, + int (*get_direction)(struct gpio_chip *gc, unsigned offset); - int (*direction_input)(struct gpio_chip *chip, + int (*direction_input)(struct gpio_chip *gc, unsigned offset); - int (*direction_output)(struct gpio_chip *chip, + int (*direction_output)(struct gpio_chip *gc, unsigned offset, int value); - int (*get)(struct gpio_chip *chip, + int (*get)(struct gpio_chip *gc, unsigned offset); - int (*get_multiple)(struct gpio_chip *chip, + int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - void (*set)(struct gpio_chip *chip, + void (*set)(struct gpio_chip *gc, unsigned offset, int value); - void (*set_multiple)(struct gpio_chip *chip, + void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - int (*set_config)(struct gpio_chip *chip, + int (*set_config)(struct gpio_chip *gc, unsigned offset, unsigned long config); - int (*to_irq)(struct gpio_chip *chip, + int (*to_irq)(struct gpio_chip *gc, unsigned offset); void (*dbg_show)(struct seq_file *s, - struct gpio_chip *chip); + struct gpio_chip *gc); - int (*init_valid_mask)(struct gpio_chip *chip, + int (*init_valid_mask)(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios); - int (*add_pin_ranges)(struct gpio_chip *chip); + int (*add_pin_ranges)(struct gpio_chip *gc); int base; u16 ngpio; @@ -458,11 +458,11 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *chip, +extern const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned offset); /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, +extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); @@ -490,43 +490,43 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, * Otherwise it returns zero as a success code. */ #ifdef CONFIG_LOCKDEP -#define gpiochip_add_data(chip, data) ({ \ +#define gpiochip_add_data(gc, data) ({ \ static struct lock_class_key lock_key; \ static struct lock_class_key request_key; \ - gpiochip_add_data_with_key(chip, data, &lock_key, \ + gpiochip_add_data_with_key(gc, data, &lock_key, \ &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) +#define gpiochip_add_data(gc, data) gpiochip_add_data_with_key(gc, data, NULL, NULL) #endif /* CONFIG_LOCKDEP */ -static inline int gpiochip_add(struct gpio_chip *chip) +static inline int gpiochip_add(struct gpio_chip *gc) { - return gpiochip_add_data(chip, NULL); + return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *chip); -extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip, +extern void gpiochip_remove(struct gpio_chip *gc); +extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *gc, void *data); extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *chip, void *data)); + int (*match)(struct gpio_chip *gc, void *data)); -bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset); -int gpiochip_reqres_irq(struct gpio_chip *chip, unsigned int offset); -void gpiochip_relres_irq(struct gpio_chip *chip, unsigned int offset); -void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset); -void gpiochip_enable_irq(struct gpio_chip *chip, unsigned int offset); +bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); +int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); +void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); +void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset); +void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset); /* Line status inquiry for drivers */ -bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset); -bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset); +bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset); +bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); /* Sleep persistence inquiry for drivers */ -bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset); -bool gpiochip_line_is_valid(const struct gpio_chip *chip, unsigned int offset); +bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset); +bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset); /* get driver data */ -void *gpiochip_get_data(struct gpio_chip *chip); +void *gpiochip_get_data(struct gpio_chip *gc); struct bgpio_pdata { const char *label; @@ -536,23 +536,23 @@ struct bgpio_pdata { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip, +void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, unsigned int parent_hwirq, unsigned int parent_type); -void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip, +void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, unsigned int parent_hwirq, unsigned int parent_type); #else -static inline void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip, +static inline void *gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, unsigned int parent_hwirq, unsigned int parent_type) { return NULL; } -static inline void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip, +static inline void *gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, unsigned int parent_hwirq, unsigned int parent_type) { @@ -583,11 +583,11 @@ int gpiochip_irq_domain_activate(struct irq_domain *domain, void gpiochip_irq_domain_deactivate(struct irq_domain *domain, struct irq_data *data); -void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, +void gpiochip_set_nested_irqchip(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int parent_irq); -int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, +int gpiochip_irqchip_add_key(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, @@ -596,7 +596,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, struct lock_class_key *lock_key, struct lock_class_key *request_key); -bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip, +bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset); #ifdef CONFIG_LOCKDEP @@ -607,7 +607,7 @@ bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip, * boilerplate static inlines provides such a key for each * unique instance. */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, +static inline int gpiochip_irqchip_add(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, @@ -616,12 +616,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, static struct lock_class_key lock_key; static struct lock_class_key request_key; - return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + return gpiochip_irqchip_add_key(gc, irqchip, first_irq, handler, type, false, &lock_key, &request_key); } -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, +static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, @@ -631,35 +631,35 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, static struct lock_class_key lock_key; static struct lock_class_key request_key; - return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + return gpiochip_irqchip_add_key(gc, irqchip, first_irq, handler, type, true, &lock_key, &request_key); } #else /* ! CONFIG_LOCKDEP */ -static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, +static inline int gpiochip_irqchip_add(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, unsigned int type) { - return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + return gpiochip_irqchip_add_key(gc, irqchip, first_irq, handler, type, false, NULL, NULL); } -static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, +static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, unsigned int type) { - return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + return gpiochip_irqchip_add_key(gc, irqchip, first_irq, handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ -int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset); -void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset); -int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset, +int gpiochip_generic_request(struct gpio_chip *gc, unsigned offset); +void gpiochip_generic_free(struct gpio_chip *gc, unsigned offset); +int gpiochip_generic_config(struct gpio_chip *gc, unsigned offset, unsigned long config); /** @@ -676,25 +676,25 @@ struct gpio_pin_range { #ifdef CONFIG_PINCTRL -int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, +int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins); -int gpiochip_add_pingroup_range(struct gpio_chip *chip, +int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group); -void gpiochip_remove_pin_ranges(struct gpio_chip *chip); +void gpiochip_remove_pin_ranges(struct gpio_chip *gc); #else /* ! CONFIG_PINCTRL */ static inline int -gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, +gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins) { return 0; } static inline int -gpiochip_add_pingroup_range(struct gpio_chip *chip, +gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group) { @@ -702,27 +702,27 @@ gpiochip_add_pingroup_range(struct gpio_chip *chip, } static inline void -gpiochip_remove_pin_ranges(struct gpio_chip *chip) +gpiochip_remove_pin_ranges(struct gpio_chip *gc) { } #endif /* CONFIG_PINCTRL */ -struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, +struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, unsigned int hwnum, const char *label, enum gpio_lookup_flags lflags, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); -void devprop_gpiochip_set_names(struct gpio_chip *chip, +void devprop_gpiochip_set_names(struct gpio_chip *gc, const struct fwnode_handle *fwnode); #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset); -void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset); +int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); +void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); @@ -736,14 +736,14 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } -static inline int gpiochip_lock_as_irq(struct gpio_chip *chip, +static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); return -EINVAL; } -static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip, +static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset) { WARN_ON(1); -- cgit v1.2.3 From e45ee71ae101bd271c3cd951cf66341dc8f504a0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 30 Mar 2020 11:58:01 +0200 Subject: pinctrl: Define of_pinctrl_get() dummy for !PINCTRL Currently, the of_pinctrl_get() dummy is only defined for !OF, which can still cause build failures on configurations with OF enabled but PINCTRL disabled. Make sure to define the dummy if either OF or PINCTRL are not enabled. Reported-by: Stephen Rothwell Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20200330095801.2421589-1-thierry.reding@gmail.com Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 7ce23450a1cb..2aef59df93d7 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -186,7 +186,7 @@ extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, const char *pin_group, const unsigned **pins, unsigned *num_pins); -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PINCTRL) extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np); #else static inline -- cgit v1.2.3 From a9d68cbd4f8834d126ebdd3097a1dee1c5973fdf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Apr 2020 08:03:28 +0200 Subject: Revert "amba: Initialize dma_parms for amba devices" This reverts commit 5caf6102e32ead7ed5d21b5309c1a4a7d70e6a9f. It still needs some more work and that will happen for the next release cycle, not this one. Cc: Cc: Russell King Cc: Christoph Hellwig Cc: Ludovic Barre Cc: Linus Walleij Cc: Arnd Bergmann Cc: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 0bbfd647f5c6..26f0ecf401ea 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,7 +65,6 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; - struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3 From 885a64715fd81e6af6d94a038556e0b2e6deb19c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Apr 2020 08:06:54 +0200 Subject: Revert "driver core: platform: Initialize dma_parms for platform devices" This reverts commit 7c8978c0837d40c302f5e90d24c298d9ca9fc097, a new version will come in the next release cycle. Cc: Cc: Russell King Cc: Christoph Hellwig Cc: Ludovic Barre Cc: Linus Walleij Cc: Arnd Bergmann Cc: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 81900b3cbe37..041bfa412aa0 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,7 +25,6 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; - struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3 From 73d20564e0dcae003e0d79977f044d5e57496304 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 31 Mar 2020 22:18:49 +0200 Subject: hrtimer: Don't dereference the hrtimer pointer after the callback A hrtimer can be released in its callback, but lockdep_hrtimer_exit() dereferences the pointer after the callback returns, i.e. a potential use after free. Retrieve the context in which the hrtimer expires before the callback is invoked and use it in lockdep_hrtimer_exit(). Fixes: 40db173965c0 ("lockdep: Add hrtimer context tracing bits") Reported-by: syzbot+62c155c276e580cfb606@syzkaller.appspotmail.com Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200331201849.fkp2siy3vcdqvqlz@linutronix.de --- include/linux/irqflags.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index ceca42de4438..61a9ced3aa50 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -58,16 +58,21 @@ do { \ } while (0) # define lockdep_hrtimer_enter(__hrtimer) \ - do { \ - if (!__hrtimer->is_hard) \ - current->irq_config = 1; \ - } while (0) - -# define lockdep_hrtimer_exit(__hrtimer) \ - do { \ - if (!__hrtimer->is_hard) \ +({ \ + bool __expires_hardirq = true; \ + \ + if (!__hrtimer->is_hard) { \ + current->irq_config = 1; \ + __expires_hardirq = false; \ + } \ + __expires_hardirq; \ +}) + +# define lockdep_hrtimer_exit(__expires_hardirq) \ + do { \ + if (!__expires_hardirq) \ current->irq_config = 0; \ - } while (0) + } while (0) # define lockdep_posixtimer_enter() \ do { \ @@ -102,8 +107,8 @@ do { \ # define lockdep_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) -# define lockdep_hrtimer_enter(__hrtimer) do { } while (0) -# define lockdep_hrtimer_exit(__hrtimer) do { } while (0) +# define lockdep_hrtimer_enter(__hrtimer) false +# define lockdep_hrtimer_exit(__context) do { } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) -- cgit v1.2.3 From 0bbe30668d89ec8a309f28ced6d092c90fb23e8c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:19 +0800 Subject: vhost: factor out IOTLB This patch factors out IOTLB into a dedicated module in order to be reused by other modules like vringh. User may choose to enable the automatic retiring by specifying VHOST_IOTLB_FLAG_RETIRE flag to fit for the case of vhost device IOTLB implementation. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-4-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vhost_iotlb.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/vhost_iotlb.h (limited to 'include/linux') diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h new file mode 100644 index 000000000000..6b09b786a762 --- /dev/null +++ b/include/linux/vhost_iotlb.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VHOST_IOTLB_H +#define _LINUX_VHOST_IOTLB_H + +#include + +struct vhost_iotlb_map { + struct rb_node rb; + struct list_head link; + u64 start; + u64 last; + u64 size; + u64 addr; +#define VHOST_MAP_RO 0x1 +#define VHOST_MAP_WO 0x2 +#define VHOST_MAP_RW 0x3 + u32 perm; + u32 flags_padding; + u64 __subtree_last; +}; + +#define VHOST_IOTLB_FLAG_RETIRE 0x1 + +struct vhost_iotlb { + struct rb_root_cached root; + struct list_head list; + unsigned int limit; + unsigned int nmaps; + unsigned int flags; +}; + +int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last, + u64 addr, unsigned int perm); +void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); + +struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags); +void vhost_iotlb_free(struct vhost_iotlb *iotlb); +void vhost_iotlb_reset(struct vhost_iotlb *iotlb); + +struct vhost_iotlb_map * +vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last); +struct vhost_iotlb_map * +vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last); + +void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, + struct vhost_iotlb_map *map); +#endif -- cgit v1.2.3 From 9ad9c49cfe970b053bb0ef323b682dd1b4d4f8a0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:20 +0800 Subject: vringh: IOTLB support This patch implements the third memory accessor for vringh besides current kernel and userspace accessors. This idea is to allow vringh to do the address translation through an IOTLB which is implemented via vhost_map interval tree. Users should setup and IOVA to PA mapping in this IOTLB. This allows us to: - Use vringh to access virtqueues with vIOMMU - Use vringh to implement software virtqueues for vDPA devices Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-5-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index d237087eb257..bd0503ca6f8f 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include /* virtio_ring with information needed for host access. */ @@ -39,6 +41,9 @@ struct vringh { /* The vring (note: it may contain user pointers!) */ struct vring vring; + /* IOTLB for this vring */ + struct vhost_iotlb *iotlb; + /* The function to call to notify the guest about added buffers */ void (*notify)(struct vringh *); }; @@ -248,4 +253,35 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } + +void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); + +int vringh_init_iotlb(struct vringh *vrh, u64 features, + unsigned int num, bool weak_barriers, + struct vring_desc *desc, + struct vring_avail *avail, + struct vring_used *used); + +int vringh_getdesc_iotlb(struct vringh *vrh, + struct vringh_kiov *riov, + struct vringh_kiov *wiov, + u16 *head, + gfp_t gfp); + +ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, + struct vringh_kiov *riov, + void *dst, size_t len); +ssize_t vringh_iov_push_iotlb(struct vringh *vrh, + struct vringh_kiov *wiov, + const void *src, size_t len); + +void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num); + +int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len); + +bool vringh_notify_enable_iotlb(struct vringh *vrh); +void vringh_notify_disable_iotlb(struct vringh *vrh); + +int vringh_need_notify_iotlb(struct vringh *vrh); + #endif /* _LINUX_VRINGH_H */ -- cgit v1.2.3 From 961e9c84077f6c8579d7a628cbe94a675cb67ae4 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:21 +0800 Subject: vDPA: introduce vDPA bus vDPA device is a device that uses a datapath which complies with the virtio specifications with vendor specific control path. vDPA devices can be both physically located on the hardware or emulated by software. vDPA hardware devices are usually implemented through PCIE with the following types: - PF (Physical Function) - A single Physical Function - VF (Virtual Function) - Device that supports single root I/O virtualization (SR-IOV). Its Virtual Function (VF) represents a virtualized instance of the device that can be assigned to different partitions - ADI (Assignable Device Interface) and its equivalents - With technologies such as Intel Scalable IOV, a virtual device (VDEV) composed by host OS utilizing one or more ADIs. Or its equivalent like SF (Sub function) from Mellanox. >From a driver's perspective, depends on how and where the DMA translation is done, vDPA devices are split into two types: - Platform specific DMA translation - From the driver's perspective, the device can be used on a platform where device access to data in memory is limited and/or translated. An example is a PCIE vDPA whose DMA request was tagged via a bus (e.g PCIE) specific way. DMA translation and protection are done at PCIE bus IOMMU level. - Device specific DMA translation - The device implements DMA isolation and protection through its own logic. An example is a vDPA device which uses on-chip IOMMU. To hide the differences and complexity of the above types for a vDPA device/IOMMU options and in order to present a generic virtio device to the upper layer, a device agnostic framework is required. This patch introduces a software vDPA bus which abstracts the common attributes of vDPA device, vDPA bus driver and the communication method (vdpa_config_ops) between the vDPA device abstraction and the vDPA bus driver. This allows multiple types of drivers to be used for vDPA device like the virtio_vdpa and vhost_vdpa driver to operate on the bus and allow vDPA device could be used by either kernel virtio driver or userspace vhost drivers as: virtio drivers vhost drivers | | [virtio bus] [vhost uAPI] | | virtio device vhost device virtio_vdpa drv vhost_vdpa drv \ / [vDPA bus] | vDPA device hardware drv | [hardware bus] | vDPA hardware With the abstraction of vDPA bus and vDPA bus operations, the difference and complexity of the under layer hardware is hidden from upper layer. The vDPA bus drivers on top can use a unified vdpa_config_ops to control different types of vDPA device. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-6-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 include/linux/vdpa.h (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h new file mode 100644 index 000000000000..733acfb7ef84 --- /dev/null +++ b/include/linux/vdpa.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VDPA_H +#define _LINUX_VDPA_H + +#include +#include +#include +#include + +/** + * vDPA callback definition. + * @callback: interrupt callback function + * @private: the data passed to the callback function + */ +struct vdpa_callback { + irqreturn_t (*callback)(void *data); + void *private; +}; + +/** + * vDPA device - representation of a vDPA device + * @dev: underlying device + * @dma_dev: the actual device that is performing DMA + * @config: the configuration ops for this device. + * @index: device index + */ +struct vdpa_device { + struct device dev; + struct device *dma_dev; + const struct vdpa_config_ops *config; + unsigned int index; +}; + +/** + * vDPA_config_ops - operations for configuring a vDPA device. + * Note: vDPA device drivers are required to implement all of the + * operations unless it is mentioned to be optional in the following + * list. + * + * @set_vq_address: Set the address of virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @desc_area: address of desc area + * @driver_area: address of driver area + * @device_area: address of device area + * Returns integer: success (0) or error (< 0) + * @set_vq_num: Set the size of virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @num: the size of virtqueue + * @kick_vq: Kick the virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @set_vq_cb: Set the interrupt callback function for + * a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @cb: virtio-vdev interrupt callback structure + * @set_vq_ready: Set ready status for a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @ready: ready (true) not ready(false) + * @get_vq_ready: Get ready status for a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * Returns boolean: ready (true) or not (false) + * @set_vq_state: Set the state for a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * @state: virtqueue state (last_avail_idx) + * Returns integer: success (0) or error (< 0) + * @get_vq_state: Get the state for a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * Returns virtqueue state (last_avail_idx) + * @get_vq_align: Get the virtqueue align requirement + * for the device + * @vdev: vdpa device + * Returns virtqueue algin requirement + * @get_features: Get virtio features supported by the device + * @vdev: vdpa device + * Returns the virtio features support by the + * device + * @set_features: Set virtio features supported by the driver + * @vdev: vdpa device + * @features: feature support by the driver + * Returns integer: success (0) or error (< 0) + * @set_config_cb: Set the config interrupt callback + * @vdev: vdpa device + * @cb: virtio-vdev interrupt callback structure + * @get_vq_num_max: Get the max size of virtqueue + * @vdev: vdpa device + * Returns u16: max size of virtqueue + * @get_device_id: Get virtio device id + * @vdev: vdpa device + * Returns u32: virtio device id + * @get_vendor_id: Get id for the vendor that provides this device + * @vdev: vdpa device + * Returns u32: virtio vendor id + * @get_status: Get the device status + * @vdev: vdpa device + * Returns u8: virtio device status + * @set_status: Set the device status + * @vdev: vdpa device + * @status: virtio device status + * @get_config: Read from device specific configuration space + * @vdev: vdpa device + * @offset: offset from the beginning of + * configuration space + * @buf: buffer used to read to + * @len: the length to read from + * configuration space + * @set_config: Write to device specific configuration space + * @vdev: vdpa device + * @offset: offset from the beginning of + * configuration space + * @buf: buffer used to write from + * @len: the length to write to + * configuration space + * @get_generation: Get device config generation (optional) + * @vdev: vdpa device + * Returns u32: device generation + * @set_map: Set device memory mapping (optional) + * Needed for device that using device + * specific DMA translation (on-chip IOMMU) + * @vdev: vdpa device + * @iotlb: vhost memory mapping to be + * used by the vDPA + * Returns integer: success (0) or error (< 0) + * @dma_map: Map an area of PA to IOVA (optional) + * Needed for device that using device + * specific DMA translation (on-chip IOMMU) + * and preferring incremental map. + * @vdev: vdpa device + * @iova: iova to be mapped + * @size: size of the area + * @pa: physical address for the map + * @perm: device access permission (VHOST_MAP_XX) + * Returns integer: success (0) or error (< 0) + * @dma_unmap: Unmap an area of IOVA (optional but + * must be implemented with dma_map) + * Needed for device that using device + * specific DMA translation (on-chip IOMMU) + * and preferring incremental unmap. + * @vdev: vdpa device + * @iova: iova to be unmapped + * @size: size of the area + * Returns integer: success (0) or error (< 0) + * @free: Free resources that belongs to vDPA (optional) + * @vdev: vdpa device + */ +struct vdpa_config_ops { + /* Virtqueue ops */ + int (*set_vq_address)(struct vdpa_device *vdev, + u16 idx, u64 desc_area, u64 driver_area, + u64 device_area); + void (*set_vq_num)(struct vdpa_device *vdev, u16 idx, u32 num); + void (*kick_vq)(struct vdpa_device *vdev, u16 idx); + void (*set_vq_cb)(struct vdpa_device *vdev, u16 idx, + struct vdpa_callback *cb); + void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready); + bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); + int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state); + u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); + + /* Device ops */ + u16 (*get_vq_align)(struct vdpa_device *vdev); + u64 (*get_features)(struct vdpa_device *vdev); + int (*set_features)(struct vdpa_device *vdev, u64 features); + void (*set_config_cb)(struct vdpa_device *vdev, + struct vdpa_callback *cb); + u16 (*get_vq_num_max)(struct vdpa_device *vdev); + u32 (*get_device_id)(struct vdpa_device *vdev); + u32 (*get_vendor_id)(struct vdpa_device *vdev); + u8 (*get_status)(struct vdpa_device *vdev); + void (*set_status)(struct vdpa_device *vdev, u8 status); + void (*get_config)(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len); + void (*set_config)(struct vdpa_device *vdev, unsigned int offset, + const void *buf, unsigned int len); + u32 (*get_generation)(struct vdpa_device *vdev); + + /* DMA ops */ + int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); + int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, + u64 pa, u32 perm); + int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); + + /* Free device resources */ + void (*free)(struct vdpa_device *vdev); +}; + +struct vdpa_device *__vdpa_alloc_device(struct device *parent, + const struct vdpa_config_ops *config, + size_t size); + +#define vdpa_alloc_device(dev_struct, member, parent, config) \ + container_of(__vdpa_alloc_device( \ + parent, config, \ + sizeof(dev_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + dev_struct, member))), \ + dev_struct, member) + +int vdpa_register_device(struct vdpa_device *vdev); +void vdpa_unregister_device(struct vdpa_device *vdev); + +/** + * vdpa_driver - operations for a vDPA driver + * @driver: underlying device driver + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct vdpa_driver { + struct device_driver driver; + int (*probe)(struct vdpa_device *vdev); + void (*remove)(struct vdpa_device *vdev); +}; + +#define vdpa_register_driver(drv) \ + __vdpa_register_driver(drv, THIS_MODULE) +int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner); +void vdpa_unregister_driver(struct vdpa_driver *drv); + +#define module_vdpa_driver(__vdpa_driver) \ + module_driver(__vdpa_driver, vdpa_register_driver, \ + vdpa_unregister_driver) + +static inline struct vdpa_driver *drv_to_vdpa(struct device_driver *driver) +{ + return container_of(driver, struct vdpa_driver, driver); +} + +static inline struct vdpa_device *dev_to_vdpa(struct device *_dev) +{ + return container_of(_dev, struct vdpa_device, dev); +} + +static inline void *vdpa_get_drvdata(const struct vdpa_device *vdev) +{ + return dev_get_drvdata(&vdev->dev); +} + +static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data) +{ + dev_set_drvdata(&vdev->dev, data); +} + +static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) +{ + return vdev->dma_dev; +} +#endif /* _LINUX_VDPA_H */ -- cgit v1.2.3 From d1e7fd6462ca9fc76650fbe6ca800e35b24267da Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Mar 2020 19:01:04 -0500 Subject: signal: Extend exec_id to 64bits Replace the 32bit exec_id with a 64bit exec_id to make it impossible to wrap the exec_id counter. With care an attacker can cause exec_id wrap and send arbitrary signals to a newly exec'd parent. This bypasses the signal sending checks if the parent changes their credentials during exec. The severity of this problem can been seen that in my limited testing of a 32bit exec_id it can take as little as 19s to exec 65536 times. Which means that it can take as little as 14 days to wrap a 32bit exec_id. Adam Zabrocki has succeeded wrapping the self_exe_id in 7 days. Even my slower timing is in the uptime of a typical server. Which means self_exec_id is simply a speed bump today, and if exec gets noticably faster self_exec_id won't even be a speed bump. Extending self_exec_id to 64bits introduces a problem on 32bit architectures where reading self_exec_id is no longer atomic and can take two read instructions. Which means that is is possible to hit a window where the read value of exec_id does not match the written value. So with very lucky timing after this change this still remains expoiltable. I have updated the update of exec_id on exec to use WRITE_ONCE and the read of exec_id in do_notify_parent to use READ_ONCE to make it clear that there is no locking between these two locations. Link: https://lore.kernel.org/kernel-hardening/20200324215049.GA3710@pi3.com.pl Fixes: 2.3.23pre2 Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 04278493bf15..0323e4f0982a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -939,8 +939,8 @@ struct task_struct { struct seccomp seccomp; /* Thread group tracking: */ - u32 parent_exec_id; - u32 self_exec_id; + u64 parent_exec_id; + u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; -- cgit v1.2.3 From 08ca8b21f760c0ed5034a5c122092eec22ccf8f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Apr 2020 13:04:49 -0400 Subject: NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests() When a subrequest is being detached from the subgroup, we want to ensure that it is not holding the group lock, or in the process of waiting for the group lock. Fixes: 5b2b5187fa85 ("NFS: Fix nfs_page_group_destroy() and nfs_lock_and_join_requests() race cases") Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0bbd587fac6a..7e9419d74b86 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -142,6 +142,8 @@ extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern int nfs_page_set_headlock(struct nfs_page *req); +extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* -- cgit v1.2.3 From a62f8e3bd836bf1abde1648a45e14afd050dbd23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 11:12:16 -0400 Subject: NFS: Clean up nfs_lock_and_join_requests() Clean up nfs_lock_and_join_requests() to simplify the calculation of the range covered by the page group, taking into account the presence of mirrors. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7e9419d74b86..dd205bc6bc58 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From e00ed89d7bd59c4ae49d6aeeee567187b1357a4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 12:40:47 -0400 Subject: NFS: Refactor nfs_lock_and_join_requests() Refactor nfs_lock_and_join_requests() in order to separate out the subrequest merging into its own function nfs_lock_and_join_group() that can be used by O_DIRECT. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index dd205bc6bc58..99198c039bd6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); -- cgit v1.2.3 From ed5d588fe47feef290f271022820e255d8371561 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 20:57:49 -0400 Subject: NFS: Try to join page groups before an O_DIRECT retransmission If we have to retransmit requests, try to join their page groups first. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 99198c039bd6..c32c15216da3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -141,6 +141,7 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); +extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From d866dbf6178713e37d2fec2870af00b345684e1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jul 2019 10:37:22 -0700 Subject: blkcg: rename blkcg->cgwb_refcnt to ->online_pin and always use it blkcg->cgwb_refcnt is used to delay blkcg offlining so that blkgs don't get offlined while there are active cgwbs on them. However, it ends up making offlining unordered sometimes causing parents to be offlined before children. To fix it, we want child blkcgs to pin the parents' online states turning the refcnt into a more generic online pinning mechanism. In prepartion, * blkcg->cgwb_refcnt -> blkcg->online_pin * blkcg_cgwb_get/put() -> blkcg_pin/unpin_online() * Take them out of CONFIG_CGROUP_WRITEBACK Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a6949fd171..7fb7caa55a3d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -46,6 +46,7 @@ struct blkcg_gq; struct blkcg { struct cgroup_subsys_state css; spinlock_t lock; + refcount_t online_pin; struct radix_tree_root blkg_tree; struct blkcg_gq __rcu *blkg_hint; @@ -56,7 +57,6 @@ struct blkcg { struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; - refcount_t cgwb_refcnt; #endif }; @@ -412,47 +412,34 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) extern void blkcg_destroy_blkgs(struct blkcg *blkcg); -#ifdef CONFIG_CGROUP_WRITEBACK - /** - * blkcg_cgwb_get - get a reference for blkcg->cgwb_list + * blkcg_pin_online - pin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. */ -static inline void blkcg_cgwb_get(struct blkcg *blkcg) +static inline void blkcg_pin_online(struct blkcg *blkcg) { - refcount_inc(&blkcg->cgwb_refcnt); + refcount_inc(&blkcg->online_pin); } /** - * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list + * blkcg_unpin_online - unpin online state * @blkcg: blkcg of interest * - * This is used to track the number of active wb's related to a blkcg. - * When this count goes to zero, all active wb has finished so the + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - * This work may occur in cgwb_release_workfn() on the cgwb_release - * workqueue. */ -static inline void blkcg_cgwb_put(struct blkcg *blkcg) +static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) + if (refcount_dec_and_test(&blkcg->online_pin)) blkcg_destroy_blkgs(blkcg); } -#else - -static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } - -static inline void blkcg_cgwb_put(struct blkcg *blkcg) -{ - /* wb isn't being accounted, so trigger destruction right away */ - blkcg_destroy_blkgs(blkcg); -} - -#endif - /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest -- cgit v1.2.3 From 4308a434e5e08c78676aa66bc626ef78cbef0883 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jul 2019 10:37:55 -0700 Subject: blkcg: don't offline parent blkcg first blkcg->cgwb_refcnt is used to delay blkcg offlining so that blkgs don't get offlined while there are active cgwbs on them. However, it ends up making offlining unordered sometimes causing parents to be offlined before children. Let's fix this by making child blkcgs pin the parents' online states. Note that pin/unpin names are chosen over get/put intentionally because css uses get/put online for something different. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 7fb7caa55a3d..35f8ffe92b70 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -436,8 +436,12 @@ static inline void blkcg_pin_online(struct blkcg *blkcg) */ static inline void blkcg_unpin_online(struct blkcg *blkcg) { - if (refcount_dec_and_test(&blkcg->online_pin)) + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); } /** -- cgit v1.2.3 From 98c985d7da8dfc8a9aa0ee8a560852dd97b1cf7d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 1 Apr 2020 21:02:28 -0700 Subject: kthread: mark timer used by delayed kthread works as IRQ safe The timer used by delayed kthread works are IRQ safe because the used kthread_delayed_work_timer_fn() is IRQ safe. It is properly marked when initialized by KTHREAD_DELAYED_WORK_INIT(). But TIMER_IRQSAFE flag is missing when initialized by kthread_init_delayed_work(). The missing flag might trigger invalid warning from del_timer_sync() when kthread_mod_delayed_work() is called with interrupts disabled. This patch is result of a discussion about using the API, see https://lkml.kernel.org/r/cfa886ad-e3b7-c0d2-3ff8-58d94170eab5@ti.com Reported-by: Grygorii Strashko Signed-off-by: Petr Mladek Signed-off-by: Andrew Morton Tested-by: Grygorii Strashko Acked-by: Tejun Heo Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200217120709.1974-1-pmladek@suse.com Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0f9da966934e..8bbcaad7ef0f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -165,7 +165,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker, do { \ kthread_init_work(&(dwork)->work, (fn)); \ timer_setup(&(dwork)->timer, \ - kthread_delayed_work_timer_fn, 0); \ + kthread_delayed_work_timer_fn, \ + TIMER_IRQSAFE); \ } while (0) int kthread_worker_fn(void *worker_ptr); -- cgit v1.2.3 From 667c790169e2b9ebd33188c9285d30ee96844a02 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 1 Apr 2020 21:04:30 -0700 Subject: revert "topology: add support for node_to_mem_node() to determine the fallback node" This reverts commit ad2c8144418c6a81cefe65379fd47bbe8344cef2. The function node_to_mem_node() was introduced by that commit for use in SLUB on systems with memoryless nodes, but it turned out to be unreliable on some architectures/configurations and a simpler solution exists than fixing it up. Thus commit 0715e6c516f1 ("mm, slub: prevent kmalloc_node crashes and memory leaks") removed the only user of node_to_mem_node() and we can revert the commit that introduced the function. Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Reviewed-by: Srikar Dronamraju Cc: Joonsoo Kim Cc: Bharata B Rao Cc: Christopher Lameter Cc: David Rientjes Cc: Kirill Tkhai Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Nathan Lynch Cc: Pekka Enberg Cc: PUVICHAKRAVARTHY RAMACHANDRAN Cc: Sachin Sant Link: http://lkml.kernel.org/r/20200320115533.9604-2-vbabka@suse.cz Signed-off-by: Linus Torvalds --- include/linux/topology.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index eb2fe6edd73c..608fa4aadf0e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -130,20 +130,11 @@ static inline int numa_node_id(void) * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). */ DECLARE_PER_CPU(int, _numa_mem_); -extern int _node_numa_mem_[MAX_NUMNODES]; #ifndef set_numa_mem static inline void set_numa_mem(int node) { this_cpu_write(_numa_mem_, node); - _node_numa_mem_[numa_node_id()] = node; -} -#endif - -#ifndef node_to_mem_node -static inline int node_to_mem_node(int node) -{ - return _node_numa_mem_[node]; } #endif @@ -166,7 +157,6 @@ static inline int cpu_to_mem(int cpu) static inline void set_cpu_numa_mem(int cpu, int node) { per_cpu(_numa_mem_, cpu) = node; - _node_numa_mem_[cpu_to_node(cpu)] = node; } #endif @@ -180,13 +170,6 @@ static inline int numa_mem_id(void) } #endif -#ifndef node_to_mem_node -static inline int node_to_mem_node(int node) -{ - return node; -} -#endif - #ifndef cpu_to_mem static inline int cpu_to_mem(int cpu) { -- cgit v1.2.3 From ec84821507be44480e831d6f3d89df9e2b13728e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Apr 2020 21:04:57 -0700 Subject: include/linux/pagemap.h: rename arguments to find_subpage This isn't just a random struct page, it's known to be a head page, and calling it head makes the function better self-documenting. The pgoff_t is less confusing if it's named index instead of offset. Also add a couple of comments to explain why we're doing various things. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Pankaj Gupta Cc: Aneesh Kumar K.V Link: http://lkml.kernel.org/r/20200318140253.6141-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ccb14b6a16b5..5b0e6343229d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -333,14 +333,19 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -static inline struct page *find_subpage(struct page *page, pgoff_t offset) +/* + * Given the page we found in the page cache, return the page corresponding + * to this index in the file + */ +static inline struct page *find_subpage(struct page *head, pgoff_t index) { - if (PageHuge(page)) - return page; + /* HugeTLBfs wants the head page regardless */ + if (PageHuge(head)) + return head; - VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageTail(head), head); - return page + (offset & (compound_nr(page) - 1)); + return head + (index & (compound_nr(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -- cgit v1.2.3 From 566d774a1187245c65a9d4be778930e676df3c07 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 1 Apr 2020 21:05:18 -0700 Subject: mm: introduce page_ref_sub_return() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An upcoming patch requires subtracting a large chunk of refcounts from a page, and checking what the resulting refcount is. This is a little different than the usual "check for zero refcount" that many of the page ref functions already do. However, it is similar to a few other routines that (like this one) are generally useful for things such as 1-based refcounting. Add page_ref_sub_return(), that subtracts a chunk of refcounts atomically, and returns an atomic snapshot of the result. Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Reviewed-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ira Weiny Cc: Jérôme Glisse Cc: "Matthew Wilcox (Oracle)" Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200211001536.1027652-4-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/page_ref.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 14d14beb1f7f..d27701199a4d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -102,6 +102,15 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline int page_ref_sub_return(struct page *page, int nr) +{ + int ret = atomic_sub_return(nr, &page->_refcount); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + __page_ref_mod_and_return(page, -nr, ret); + return ret; +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); -- cgit v1.2.3 From 3faa52c03f440d1b9ddef18c4f189f4790d52d7e Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 1 Apr 2020 21:05:29 -0700 Subject: mm/gup: track FOLL_PIN pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracking of pages that were pinned via FOLL_PIN. This tracking is implemented via overloading of page->_refcount: pins are added by adding GUP_PIN_COUNTING_BIAS (1024) to the refcount. This provides a fuzzy indication of pinning, and it can have false positives (and that's OK). Please see the pre-existing Documentation/core-api/pin_user_pages.rst for details. As mentioned in pin_user_pages.rst, callers who effectively set FOLL_PIN (typically via pin_user_pages*()) are required to ultimately free such pages via unpin_user_page(). Please also note the limitation, discussed in pin_user_pages.rst under the "TODO: for 1GB and larger huge pages" section. (That limitation will be removed in a following patch.) The effect of a FOLL_PIN flag is similar to that of FOLL_GET, and may be thought of as "FOLL_GET for DIO and/or RDMA use". Pages that have been pinned via FOLL_PIN are identifiable via a new function call: bool page_maybe_dma_pinned(struct page *page); What to do in response to encountering such a page, is left to later patchsets. There is discussion about this in [1], [2], [3], and [4]. This also changes a BUG_ON(), to a WARN_ON(), in follow_page_mask(). [1] Some slow progress on get_user_pages() (Apr 2, 2019): https://lwn.net/Articles/784574/ [2] DMA and get_user_pages() (LPC: Dec 12, 2018): https://lwn.net/Articles/774411/ [3] The trouble with get_user_pages() (Apr 30, 2018): https://lwn.net/Articles/753027/ [4] LWN kernel index: get_user_pages(): https://lwn.net/Kernel/Index/#Memory_management-get_user_pages [jhubbard@nvidia.com: add kerneldoc] Link: http://lkml.kernel.org/r/20200307021157.235726-1-jhubbard@nvidia.com [imbrenda@linux.ibm.com: if pin fails, we need to unpin, a simple put_page will not be enough] Link: http://lkml.kernel.org/r/20200306132537.783769-2-imbrenda@linux.ibm.com [akpm@linux-foundation.org: fix put_compound_head defined but not used] Suggested-by: Jan Kara Suggested-by: Jérôme Glisse Signed-off-by: John Hubbard Signed-off-by: Claudio Imbrenda Signed-off-by: Andrew Morton Reviewed-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ira Weiny Cc: "Matthew Wilcox (Oracle)" Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200211001536.1027652-7-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 82 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c54fb96cb1e6..10be09c8227e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1001,6 +1001,8 @@ static inline void get_page(struct page *page) page_ref_inc(page); } +bool __must_check try_grab_page(struct page *page, unsigned int flags); + static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); @@ -1029,29 +1031,79 @@ static inline void put_page(struct page *page) __put_page(page); } -/** - * unpin_user_page() - release a gup-pinned page - * @page: pointer to page to be released +/* + * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload + * the page's refcount so that two separate items are tracked: the original page + * reference count, and also a new count of how many pin_user_pages() calls were + * made against the page. ("gup-pinned" is another term for the latter). + * + * With this scheme, pin_user_pages() becomes special: such pages are marked as + * distinct from normal pages. As such, the unpin_user_page() call (and its + * variants) must be used in order to release gup-pinned pages. + * + * Choice of value: + * + * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference + * counts with respect to pin_user_pages() and unpin_user_page() becomes + * simpler, due to the fact that adding an even power of two to the page + * refcount has the effect of using only the upper N bits, for the code that + * counts up using the bias value. This means that the lower bits are left for + * the exclusive use of the original code that increments and decrements by one + * (or at least, by much smaller values than the bias value). * - * Pages that were pinned via pin_user_pages*() must be released via either - * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so - * that eventually such pages can be separately tracked and uniquely handled. In - * particular, interactions with RDMA and filesystems need special handling. + * Of course, once the lower bits overflow into the upper bits (and this is + * OK, because subtraction recovers the original values), then visual inspection + * no longer suffices to directly view the separate counts. However, for normal + * applications that don't have huge page reference counts, this won't be an + * issue. * - * unpin_user_page() and put_page() are not interchangeable, despite this early - * implementation that makes them look the same. unpin_user_page() calls must - * be perfectly matched up with pin*() calls. + * Locking: the lockless algorithm described in page_cache_get_speculative() + * and page_cache_gup_pin_speculative() provides safe operation for + * get_user_pages and page_mkclean and other calls that race to set up page + * table entries. */ -static inline void unpin_user_page(struct page *page) -{ - put_page(page); -} +#define GUP_PIN_COUNTING_BIAS (1U << 10) +void unpin_user_page(struct page *page); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); - void unpin_user_pages(struct page **pages, unsigned long npages); +/** + * page_maybe_dma_pinned() - report if a page is pinned for DMA. + * + * This function checks if a page has been pinned via a call to + * pin_user_pages*(). + * + * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, + * because it means "definitely not pinned for DMA", but true means "probably + * pinned for DMA, but possibly a false positive due to having at least + * GUP_PIN_COUNTING_BIAS worth of normal page references". + * + * False positives are OK, because: a) it's unlikely for a page to get that many + * refcounts, and b) all the callers of this routine are expected to be able to + * deal gracefully with a false positive. + * + * For more information, please see Documentation/vm/pin_user_pages.rst. + * + * @page: pointer to page to be queried. + * @Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. + */ +static inline bool page_maybe_dma_pinned(struct page *page) +{ + /* + * page_ref_count() is signed. If that refcount overflows, then + * page_ref_count() returns a negative value, and callers will avoid + * further incrementing the refcount. + * + * Here, for that overflow case, use the signed bit to count a little + * bit higher via unsigned math, and thus still get an accurate result. + */ + return ((unsigned int)page_ref_count(compound_head(page))) >= + GUP_PIN_COUNTING_BIAS; +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif -- cgit v1.2.3 From 47e29d32afba11b13efb51f03154a8cf22fb4360 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 1 Apr 2020 21:05:33 -0700 Subject: mm/gup: page->hpage_pinned_refcount: exact pin counts for huge pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For huge pages (and in fact, any compound page), the GUP_PIN_COUNTING_BIAS scheme tends to overflow too easily, each tail page increments the head page->_refcount by GUP_PIN_COUNTING_BIAS (1024). That limits the number of huge pages that can be pinned. This patch removes that limitation, by using an exact form of pin counting for compound pages of order > 1. The "order > 1" is required because this approach uses the 3rd struct page in the compound page, and order 1 compound pages only have two pages, so that won't work there. A new struct page field, hpage_pinned_refcount, has been added, replacing a padding field in the union (so no new space is used). This enhancement also has a useful side effect: huge pages and compound pages (of order > 1) do not suffer from the "potential false positives" problem that is discussed in the page_dma_pinned() comment block. That is because these compound pages have extra space for tracking things, so they get exact pin counts instead of overloading page->_refcount. Documentation/core-api/pin_user_pages.rst is updated accordingly. Suggested-by: Jan Kara Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Reviewed-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ira Weiny Cc: Jérôme Glisse Cc: "Matthew Wilcox (Oracle)" Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200211001536.1027652-8-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 26 ++++++++++++++++++++++++++ include/linux/mm_types.h | 7 ++++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 10be09c8227e..6a426f8fd1e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -770,6 +770,24 @@ static inline unsigned int compound_order(struct page *page) return page[1].compound_order; } +static inline bool hpage_pincount_available(struct page *page) +{ + /* + * Can the page->hpage_pinned_refcount field be used? That field is in + * the 3rd page of the compound page, so the smallest (2-page) compound + * pages cannot support it. + */ + page = compound_head(page); + return PageCompound(page) && compound_order(page) > 1; +} + +static inline int compound_pincount(struct page *page) +{ + VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); + page = compound_head(page); + return atomic_read(compound_pincount_ptr(page)); +} + static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1084,6 +1102,11 @@ void unpin_user_pages(struct page **pages, unsigned long npages); * refcounts, and b) all the callers of this routine are expected to be able to * deal gracefully with a false positive. * + * For huge pages, the result will be exactly correct. That's because we have + * more tracking data available: the 3rd struct page in the compound page is + * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS + * scheme). + * * For more information, please see Documentation/vm/pin_user_pages.rst. * * @page: pointer to page to be queried. @@ -1092,6 +1115,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages); */ static inline bool page_maybe_dma_pinned(struct page *page) { + if (hpage_pincount_available(page)) + return compound_pincount(page) > 0; + /* * page_ref_count() is signed. If that refcount overflows, then * page_ref_count() returns a negative value, and callers will avoid diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c28911c3afa8..dd555e6d23f3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -137,7 +137,7 @@ struct page { }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ - unsigned long _compound_pad_2; + atomic_t hpage_pinned_refcount; /* For both global and memcg */ struct list_head deferred_list; }; @@ -226,6 +226,11 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) return &page[1].compound_mapcount; } +static inline atomic_t *compound_pincount_ptr(struct page *page) +{ + return &page[2].hpage_pinned_refcount; +} + /* * Used for sizing the vmemmap region on some architectures */ -- cgit v1.2.3 From 1970dc6f5226416957ad0cc70ab47386ed3195a6 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 1 Apr 2020 21:05:37 -0700 Subject: mm/gup: /proc/vmstat: pin_user_pages (FOLL_PIN) reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that pages are "DMA-pinned" via pin_user_page*(), and unpinned via unpin_user_pages*(), we need some visibility into whether all of this is working correctly. Add two new fields to /proc/vmstat: nr_foll_pin_acquired nr_foll_pin_released These are documented in Documentation/core-api/pin_user_pages.rst. They represent the number of pages (since boot time) that have been pinned ("nr_foll_pin_acquired") and unpinned ("nr_foll_pin_released"), via pin_user_pages*() and unpin_user_pages*(). In the absence of long-running DMA or RDMA operations that hold pages pinned, the above two fields will normally be equal to each other. Also: update Documentation/core-api/pin_user_pages.rst, to remove an earlier (now confirmed untrue) claim about a performance problem with /proc/vmstat. Also: update Documentation/core-api/pin_user_pages.rst to rename the new /proc/vmstat entries, to the names listed here. Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Reviewed-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: Ira Weiny Cc: Jérôme Glisse Cc: "Matthew Wilcox (Oracle)" Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200211001536.1027652-9-jhubbard@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 462f6873905a..4bca42eeb439 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -243,6 +243,8 @@ enum node_stat_item { NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ + NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ + NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From f28d43636d6f940e60abef4f0131119836c8ebd4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 1 Apr 2020 21:05:56 -0700 Subject: mm/gup/writeback: add callbacks for inaccessible pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of protected KVM guests on s390 there is now a concept of inaccessible pages. These pages need to be made accessible before the host can access them. While cpu accesses will trigger a fault that can be resolved, I/O accesses will just fail. We need to add a callback into architecture code for places that will do I/O, namely when writeback is started or when a page reference is taken. This is not only to enable paging, file backing etc, it is also necessary to protect the host against a malicious user space. For example a bad QEMU could simply start direct I/O on such protected memory. We do not want userspace to be able to trigger I/O errors and thus the logic is "whenever somebody accesses that page (gup) or does I/O, make sure that this page can be accessed". When the guest tries to access that page we will wait in the page fault handler for writeback to have finished and for the page_ref to be the expected value. On s390x the function is not supposed to fail, so it is ok to use a WARN_ON on failure. If we ever need some more finegrained handling we can tackle this when we know the details. Signed-off-by: Claudio Imbrenda Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Reviewed-by: John Hubbard Acked-by: Will Deacon Cc: Jan Kara Cc: Matthew Wilcox Cc: Ira Weiny Cc: Jérôme Glisse Cc: Al Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Michal Hocko Cc: Mike Kravetz Cc: Shuah Khan Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/20200306132537.783769-3-imbrenda@linux.ibm.com Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e5b817cb86e7..be2754841369 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, -- cgit v1.2.3 From 1eb6234e52f0cbb87f59c328687127866d57941a Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 1 Apr 2020 21:06:20 -0700 Subject: mm: swap: make page_evictable() inline When backporting commit 9c4e6b1a7027 ("mm, mlock, vmscan: no more skipping pagevecs") to our 4.9 kernel, our test bench noticed around 10% down with a couple of vm-scalability's test cases (lru-file-readonce, lru-file-readtwice and lru-file-mmap-read). I didn't see that much down on my VM (32c-64g-2nodes). It might be caused by the test configuration, which is 32c-256g with NUMA disabled and the tests were run in root memcg, so the tests actually stress only one inactive and active lru. It sounds not very usual in mordern production environment. That commit did two major changes: 1. Call page_evictable() 2. Use smp_mb to force the PG_lru set visible It looks they contribute the most overhead. The page_evictable() is a function which does function prologue and epilogue, and that was used by page reclaim path only. However, lru add is a very hot path, so it sounds better to make it inline. However, it calls page_mapping() which is not inlined either, but the disassemble shows it doesn't do push and pop operations and it sounds not very straightforward to inline it. Other than this, it sounds smp_mb() is not necessary for x86 since SetPageLRU is atomic which enforces memory barrier already, replace it with smp_mb__after_atomic() in the following patch. With the two fixes applied, the tests can get back around 5% on that test bench and get back normal on my VM. Since the test bench configuration is not that usual and I also saw around 6% up on the latest upstream, so it sounds good enough IMHO. The below is test data (lru-file-readtwice throughput) against the v5.6-rc4: mainline w/ inline fix 150MB 154MB With this patch the throughput gets 2.67% up. The data with using smp_mb__after_atomic() is showed in the following patch. Shakeel Butt did the below test: On a real machine with limiting the 'dd' on a single node and reading 100 GiB sparse file (less than a single node). Just ran a single instance to not cause the lru lock contention. The cmdline used is "dd if=file-100GiB of=/dev/null bs=4k". Ran the cmd 10 times with drop_caches in between and measured the time it took. Without patch: 56.64143 +- 0.672 sec With patches: 56.10 +- 0.21 sec [akpm@linux-foundation.org: move page_evictable() to internal.h] Fixes: 9c4e6b1a7027 ("mm, mlock, vmscan: no more skipping pagevecs") Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Tested-by: Shakeel Butt Reviewed-by: Shakeel Butt Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Link: http://lkml.kernel.org/r/1584500541-46817-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 6 ++---- include/linux/swap.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5b0e6343229d..b82eabf0268e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -70,11 +70,9 @@ static inline void mapping_clear_unevictable(struct address_space *mapping) clear_bit(AS_UNEVICTABLE, &mapping->flags); } -static inline int mapping_unevictable(struct address_space *mapping) +static inline bool mapping_unevictable(struct address_space *mapping) { - if (mapping) - return test_bit(AS_UNEVICTABLE, &mapping->flags); - return !!mapping; + return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_set_exiting(struct address_space *mapping) diff --git a/include/linux/swap.h b/include/linux/swap.h index 1e99f7ac1d7e..b835d8dbea0e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -374,7 +374,6 @@ extern int sysctl_min_slab_ratio; #define node_reclaim_mode 0 #endif -extern int page_evictable(struct page *page); extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); -- cgit v1.2.3 From 10eaec2f63b6b4b9e3d2efbdb95789579aa8f64e Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 1 Apr 2020 21:06:39 -0700 Subject: mm: kmem: cleanup (__)memcg_kmem_charge_memcg() arguments Patch series "mm: memcg: kmem API cleanup", v2. This patchset aims to clean up the kernel memory charging API. It doesn't bring any functional changes, just removes unused arguments, renames some functions and fixes some comments. Currently it's not obvious which functions are most basic (memcg_kmem_(un)charge_memcg()) and which are based on them (memcg_kmem_(un)charge()). The patchset renames these functions and removes unused arguments: TL;DR: was: memcg_kmem_charge_memcg(page, gfp, order, memcg) memcg_kmem_uncharge_memcg(memcg, nr_pages) memcg_kmem_charge(page, gfp, order) memcg_kmem_uncharge(page, order) now: memcg_kmem_charge(memcg, gfp, nr_pages) memcg_kmem_uncharge(memcg, nr_pages) memcg_kmem_charge_page(page, gfp, order) memcg_kmem_uncharge_page(page, order) This patch (of 6): The first argument of memcg_kmem_charge_memcg() and __memcg_kmem_charge_memcg() is the page pointer and it's not used. Let's drop it. Memcg pointer is passed as the last argument. Move it to the first place for consistency with other memcg functions, e.g. __memcg_kmem_uncharge_memcg() or try_charge(). Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200109202659.752357-2-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e9ba01336d4e..f1542d6558c2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1369,8 +1369,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep); #ifdef CONFIG_MEMCG_KMEM int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge(struct page *page, int order); -int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, - struct mem_cgroup *memcg); +int __memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, int order); void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, unsigned int nr_pages); @@ -1407,11 +1406,11 @@ static inline void memcg_kmem_uncharge(struct page *page, int order) __memcg_kmem_uncharge(page, order); } -static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, - int order, struct mem_cgroup *memcg) +static inline int memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, + int order) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge_memcg(page, gfp, order, memcg); + return __memcg_kmem_charge_memcg(memcg, gfp, order); return 0; } -- cgit v1.2.3 From 50591183fa86c4436f6408c6b702af5e4de7524d Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 1 Apr 2020 21:06:43 -0700 Subject: mm: kmem: cleanup memcg_kmem_uncharge_memcg() arguments Drop the unused page argument and put the memcg pointer at the first place. This make the function consistent with its peers: __memcg_kmem_uncharge_memcg(), memcg_kmem_charge_memcg(), etc. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200109202659.752357-3-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f1542d6558c2..13b5d70b8b0e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1414,8 +1414,8 @@ static inline int memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, return 0; } -static inline void memcg_kmem_uncharge_memcg(struct page *page, int order, - struct mem_cgroup *memcg) +static inline void memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, + int order) { if (memcg_kmem_enabled()) __memcg_kmem_uncharge_memcg(memcg, 1 << order); -- cgit v1.2.3 From f4b00eab5004e823f28a268580ae4ed16df9fabf Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 1 Apr 2020 21:06:46 -0700 Subject: mm: kmem: rename memcg_kmem_(un)charge() into memcg_kmem_(un)charge_page() Rename (__)memcg_kmem_(un)charge() into (__)memcg_kmem_(un)charge_page() to better reflect what they are actually doing: 1) call __memcg_kmem_(un)charge_memcg() to actually charge or uncharge the current memcg 2) set or clear the PageKmemcg flag Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200109202659.752357-4-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 13b5d70b8b0e..4bc97ae50f3b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1367,8 +1367,8 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); void memcg_kmem_put_cache(struct kmem_cache *cachep); #ifdef CONFIG_MEMCG_KMEM -int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); -void __memcg_kmem_uncharge(struct page *page, int order); +int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); +void __memcg_kmem_uncharge_page(struct page *page, int order); int __memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, int order); void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, unsigned int nr_pages); @@ -1393,17 +1393,18 @@ static inline bool memcg_kmem_enabled(void) return static_branch_unlikely(&memcg_kmem_enabled_key); } -static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge(page, gfp, order); + return __memcg_kmem_charge_page(page, gfp, order); return 0; } -static inline void memcg_kmem_uncharge(struct page *page, int order) +static inline void memcg_kmem_uncharge_page(struct page *page, int order) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(page, order); + __memcg_kmem_uncharge_page(page, order); } static inline int memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, @@ -1435,21 +1436,23 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p); #else -static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { return 0; } -static inline void memcg_kmem_uncharge(struct page *page, int order) +static inline void memcg_kmem_uncharge_page(struct page *page, int order) { } -static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { return 0; } -static inline void __memcg_kmem_uncharge(struct page *page, int order) +static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -- cgit v1.2.3 From 92d0510c3585970fb26af27f7fd3ba58321523ac Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 1 Apr 2020 21:06:49 -0700 Subject: mm: kmem: switch to nr_pages in (__)memcg_kmem_charge_memcg() These functions are charging the given number of kernel pages to the given memory cgroup. The number doesn't have to be a power of two. Let's make them to take the unsigned int nr_pages as an argument instead of the page order. It makes them look consistent with the corresponding uncharge functions and functions like: mem_cgroup_charge_skmem(memcg, nr_pages). Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200109202659.752357-5-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4bc97ae50f3b..b819a64c2ceb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1369,7 +1369,8 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep); #ifdef CONFIG_MEMCG_KMEM int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -int __memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, int order); +int __memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, + unsigned int nr_pages); void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, unsigned int nr_pages); @@ -1408,18 +1409,18 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) } static inline int memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, - int order) + unsigned int nr_pages) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge_memcg(memcg, gfp, order); + return __memcg_kmem_charge_memcg(memcg, gfp, nr_pages); return 0; } static inline void memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, - int order) + unsigned int nr_pages) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge_memcg(memcg, 1 << order); + __memcg_kmem_uncharge_memcg(memcg, nr_pages); } /* -- cgit v1.2.3 From 4b13f64de25686583db3e359b1b8e59049278b50 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 1 Apr 2020 21:06:56 -0700 Subject: mm: kmem: rename (__)memcg_kmem_(un)charge_memcg() to __memcg_kmem_(un)charge() Drop the _memcg suffix from (__)memcg_kmem_(un)charge functions. It's shorter and more obvious. These are the most basic functions which are just (un)charging the given cgroup with the given amount of pages. Also fix up the corresponding comments. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200109202659.752357-7-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b819a64c2ceb..1b4150ff64be 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1367,12 +1367,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); void memcg_kmem_put_cache(struct kmem_cache *cachep); #ifdef CONFIG_MEMCG_KMEM +int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, + unsigned int nr_pages); +void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -int __memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages); -void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, - unsigned int nr_pages); extern struct static_key_false memcg_kmem_enabled_key; extern struct workqueue_struct *memcg_kmem_cache_wq; @@ -1408,19 +1407,19 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order) __memcg_kmem_uncharge_page(page, order); } -static inline int memcg_kmem_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages) +static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, + unsigned int nr_pages) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge_memcg(memcg, gfp, nr_pages); + return __memcg_kmem_charge(memcg, gfp, nr_pages); return 0; } -static inline void memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, - unsigned int nr_pages) +static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, + unsigned int nr_pages) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge_memcg(memcg, nr_pages); + __memcg_kmem_uncharge(memcg, nr_pages); } /* -- cgit v1.2.3 From 8a931f801340c2be10552c7b5622d5f4852f3a36 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 1 Apr 2020 21:07:07 -0700 Subject: mm: memcontrol: recursive memory.low protection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, the effective protection of any given cgroup is capped by its own explicit memory.low setting, regardless of what the parent says. The reasons for this are mostly historical and ease of implementation: to make delegation of memory.low safe, effective protection is the min() of all memory.low up the tree. Unfortunately, this limitation makes it impossible to protect an entire subtree from another without forcing the user to make explicit protection allocations all the way to the leaf cgroups - something that is highly undesirable in real life scenarios. Consider memory in a data center host. At the cgroup top level, we have a distinction between system management software and the actual workload the system is executing. Both branches are further subdivided into individual services, job components etc. We want to protect the workload as a whole from the system management software, but that doesn't mean we want to protect and prioritize individual workload wrt each other. Their memory demand can vary over time, and we'd want the VM to simply cache the hottest data within the workload subtree. Yet, the current memory.low limitations force us to allocate a fixed amount of protection to each workload component in order to get protection from system management software in general. This results in very inefficient resource distribution. Another concern with mandating downward allocation is that, as the complexity of the cgroup tree grows, it gets harder for the lower levels to be informed about decisions made at the host-level. Consider a container inside a namespace that in turn creates its own nested tree of cgroups to run multiple workloads. It'd be extremely difficult to configure memory.low parameters in those leaf cgroups that on one hand balance pressure among siblings as the container desires, while also reflecting the host-level protection from e.g. rpm upgrades, that lie beyond one or more delegation and namespacing points in the tree. It's highly unusual from a cgroup interface POV that nested levels have to be aware of and reflect decisions made at higher levels for them to be effective. To enable such use cases and scale configurability for complex trees, this patch implements a resource inheritance model for memory that is similar to how the CPU and the IO controller implement work-conserving resource allocations: a share of a resource allocated to a subree always applies to the entire subtree recursively, while allowing, but not mandating, children to further specify distribution rules. That means that if protection is explicitly allocated among siblings, those configured shares are being followed during page reclaim just like they are now. However, if the memory.low set at a higher level is not fully claimed by the children in that subtree, the "floating" remainder is applied to each cgroup in the tree in proportion to its size. Since reclaim pressure is applied in proportion to size as well, each child in that tree gets the same boost, and the effect is neutral among siblings - with respect to each other, they behave as if no memory control was enabled at all, and the VM simply balances the memory demands optimally within the subtree. But collectively those cgroups enjoy a boost over the cgroups in neighboring trees. E.g. a leaf cgroup with a memory.low setting of 0 no longer means that it's not getting a share of the hierarchically assigned resource, just that it doesn't claim a fixed amount of it to protect from its siblings. This allows us to recursively protect one subtree (workload) from another (system management), while letting subgroups compete freely among each other - without having to assign fixed shares to each leaf, and without nested groups having to echo higher-level settings. The floating protection composes naturally with fixed protection. Consider the following example tree: A A: low = 2G / \ A1: low = 1G A1 A2 A2: low = 0G As outside pressure is applied to this tree, A1 will enjoy a fixed protection from A2 of 1G, but the remaining, unclaimed 1G from A is split evenly among A1 and A2, coming out to 1.5G and 0.5G. There is a slight risk of regressing theoretical setups where the top-level cgroups don't know about the true budgeting and set bogusly high "bypass" values that are meaningfully allocated down the tree. Such setups would rely on unclaimed protection to be discarded, and distributing it would change the intended behavior. Be safe and hide the new behavior behind a mount option, 'memory_recursiveprot'. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Acked-by: Tejun Heo Acked-by: Roman Gushchin Acked-by: Chris Down Cc: Michal Hocko Cc: Michal Koutný Link: http://lkml.kernel.org/r/20200227195606.46212-4-hannes@cmpxchg.org Signed-off-by: Linus Torvalds --- include/linux/cgroup-defs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 63097cb243cb..e1fafed22db1 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -94,6 +94,11 @@ enum { * Enable legacy local memory.events. */ CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), + + /* + * Enable recursive subtree protection + */ + CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), }; /* cftype->flags */ -- cgit v1.2.3 From b44437723cbcb5acd64ed25a4938b95fbb9bfccb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 1 Apr 2020 21:07:45 -0700 Subject: mm/vma: move VM_NO_KHUGEPAGED into generic header Patch series "mm/vma: some more minor changes", v2. The motivation here is to consolidate VMA flags and helpers in generic memory header and reduce code duplication when ever applicable. If there are other possible similar instances which might be missing here, please do let me me know. I will be happy to incorporate them. This patch (of 3): Move VM_NO_KHUGEPAGED into generic header (include/linux/mm.h). This just makes sure that no VMA flag is scattered in individual function files any longer. While at this, fix an old comment which is no longer valid. This should not cause any functional change. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Ingo Molnar Cc: Michael Ellerman Cc: Paul Mackerras Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1582782965-3274-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6a426f8fd1e9..18583629d424 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -356,10 +356,12 @@ extern unsigned int kobjsize(const void *objp); /* * Special vmas that are non-mergable, non-mlock()able. - * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask prevents VMA from being scanned with khugepaged */ +#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) + /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE -- cgit v1.2.3 From 7969f2264f92871b9879796f08b455f737373718 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 1 Apr 2020 21:07:49 -0700 Subject: mm/vma: make vma_is_foreign() available for general use Idea of a foreign VMA with respect to the present context is very generic. But currently there are two identical definitions for this in powerpc and x86 platforms. Lets consolidate those redundant definitions while making vma_is_foreign() available for general use later. This should not cause any functional change. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Paul Mackerras Cc: Michael Ellerman Cc: Thomas Gleixner Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1582782965-3274-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 18583629d424..083e66f80709 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,6 +27,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -543,6 +544,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + + if (current->mm != vma->vm_mm) + return true; + + return false; +} #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow -- cgit v1.2.3 From 222100eed264ba9d640af9977bffb6fcb6f11ea3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 1 Apr 2020 21:07:52 -0700 Subject: mm/vma: make is_vma_temporary_stack() available for general use Currently the declaration and definition for is_vma_temporary_stack() are scattered. Lets make is_vma_temporary_stack() helper available for general use and also drop the declaration from (include/linux/huge_mm.h) which is no longer required. While at this, rename this as vma_is_temporary_stack() in line with existing helpers. This should not cause any functional change. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Ingo Molnar Cc: Michael Ellerman Cc: Paul Mackerras Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1582782965-3274-4-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 4 +--- include/linux/mm.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 5aca3d1bdb32..31c39f4a5518 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -87,8 +87,6 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) -extern bool is_vma_temporary_stack(struct vm_area_struct *vma); - extern unsigned long transparent_hugepage_flags; /* @@ -100,7 +98,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (vma->vm_flags & VM_NOHUGEPAGE) return false; - if (is_vma_temporary_stack(vma)) + if (vma_is_temporary_stack(vma)) return false; if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) diff --git a/include/linux/mm.h b/include/linux/mm.h index 083e66f80709..12d194ec6422 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -544,6 +544,20 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) +{ + int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); + + if (!maybe_stack) + return false; + + if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == + VM_STACK_INCOMPLETE_SETUP) + return true; + + return false; +} + static inline bool vma_is_foreign(struct vm_area_struct *vma) { if (!current->mm) -- cgit v1.2.3 From 767e5ee54ed75a1a89d92c872d82f3fe72c15650 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Apr 2020 21:07:55 -0700 Subject: mm: add pagemap.h to the fine documentation The documentation currently does not include the deathless prose written to describe functions in pagemap.h because it's not included in any rst file. Fix up the mismatches between parameter names and the documentation and add the file to mm-api. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Reviewed-by: John Hubbard Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/20200221220045.24989-1-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b82eabf0268e..f56282491a48 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -33,8 +33,8 @@ enum mapping_flags { /** * mapping_set_error - record a writeback error in the address_space - * @mapping - the mapping in which an error should be set - * @error - the error to set in the mapping + * @mapping: the mapping in which an error should be set + * @error: the error to set in the mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor @@ -303,9 +303,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, * atomic allocation! */ static inline struct page *find_or_create_page(struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) + pgoff_t index, gfp_t gfp_mask) { - return pagecache_get_page(mapping, offset, + return pagecache_get_page(mapping, index, FGP_LOCK|FGP_ACCESSED|FGP_CREAT, gfp_mask); } -- cgit v1.2.3 From 4ef873226ceb9c7bf11a922caddc5698a24bcfaf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 1 Apr 2020 21:08:06 -0700 Subject: mm: introduce fault_signal_pending() For most architectures, we've got a quick path to detect fatal signal after a handle_mm_fault(). Introduce a helper for that quick path. It cleans the current codes a bit so we don't need to duplicate the same check across archs. More importantly, this will be an unified place that we handle the signal immediately right after an interrupted page fault, so it'll be much easier for us if we want to change the behavior of handling signals later on for all the archs. Note that currently only part of the archs are using this new helper, because some archs have their own way to handle signals. In the follow up patches, we'll try to apply this helper to all the rest of archs. Another note is that the "regs" parameter in the new helper is not used yet. It'll be used very soon. Now we kept it in this patch only to avoid touching all the archs again in the follow up patches. [peterx@redhat.com: fix sparse warnings] Link: http://lkml.kernel.org/r/20200311145921.GD479302@xz-x1 Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Brian Geffon Cc: Andrea Arcangeli Cc: Bobby Powers Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Matthew Wilcox Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220155353.8676-4-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 88050259c466..7e7271374799 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include /* * Types defining task->signal and task->sighand and APIs using them: @@ -369,6 +371,19 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } +/* + * This should only be used in fault handlers to decide whether we + * should stop the current fault routine to handle the signals + * instead, especially with the case where we've got interrupted with + * a VM_FAULT_RETRY. + */ +static inline bool fault_signal_pending(vm_fault_t fault_flags, + struct pt_regs *regs) +{ + return unlikely((fault_flags & VM_FAULT_RETRY) && + fatal_signal_pending(current)); +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. -- cgit v1.2.3 From 8b9a65fd282c1d2e5b8ba8d8afaf652cde27b5e7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 1 Apr 2020 21:08:29 -0700 Subject: mm: return faster for non-fatal signals in user mode faults The idea comes from the upstream discussion between Linus and Andrea: https://lore.kernel.org/lkml/20171102193644.GB22686@redhat.com/ A summary to the issue: there was a special path in handle_userfault() in the past that we'll return a VM_FAULT_NOPAGE when we detected non-fatal signals when waiting for userfault handling. We did that by reacquiring the mmap_sem before returning. However that brings a risk in that the vmas might have changed when we retake the mmap_sem and even we could be holding an invalid vma structure. This patch is a preparation of removing that special path by allowing the page fault to return even faster if we were interrupted by a non-fatal signal during a user-mode page fault handling routine. Suggested-by: Linus Torvalds Suggested-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Brian Geffon Cc: Bobby Powers Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Matthew Wilcox Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220160230.9598-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7e7271374799..65ea429daaa2 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -381,7 +381,8 @@ static inline bool fault_signal_pending(vm_fault_t fault_flags, struct pt_regs *regs) { return unlikely((fault_flags & VM_FAULT_RETRY) && - fatal_signal_pending(current)); + (fatal_signal_pending(current) || + (user_mode(regs) && signal_pending(current)))); } /* -- cgit v1.2.3 From dde1607248328cdb7570e3a252e8fb76b3411d66 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 1 Apr 2020 21:08:37 -0700 Subject: mm: introduce FAULT_FLAG_DEFAULT Although there're tons of arch-specific page fault handlers, most of them are still sharing the same initial value of the page fault flags. Say, merely all of the page fault handlers would allow the fault to be retried, and they also allow the fault to respond to SIGKILL. Let's define a default value for the fault flags to replace those initial page fault flags that were copied over. With this, it'll be far easier to introduce new fault flag that can be used by all the architectures instead of touching all the archs. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Brian Geffon Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Matthew Wilcox Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220160238.9694-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 12d194ec6422..60f1192ee06e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -391,6 +391,13 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +/* + * The default fault flags that should be used by most of the + * arch-specific page fault handlers. + */ +#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \ + FAULT_FLAG_KILLABLE) + #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ -- cgit v1.2.3 From c270a7eedcf278304e05ebd2c96807487c97db61 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 1 Apr 2020 21:08:41 -0700 Subject: mm: introduce FAULT_FLAG_INTERRUPTIBLE handle_userfaultfd() is currently the only one place in the kernel page fault procedures that can respond to non-fatal userspace signals. It was trying to detect such an allowance by checking against USER & KILLABLE flags, which was "un-official". In this patch, we introduced a new flag (FAULT_FLAG_INTERRUPTIBLE) to show that the fault handler allows the fault procedure to respond even to non-fatal signals. Meanwhile, add this new flag to the default fault flags so that all the page fault handlers can benefit from the new flag. With that, replacing the userfault check to this one. Since the line is getting even longer, clean up the fault flags a bit too to ease TTY users. Although we've got a new flag and applied it, we shouldn't have any functional change with this patch so far. Suggested-by: Linus Torvalds Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Brian Geffon Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Matthew Wilcox Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220195348.16302-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 60f1192ee06e..7eeabc37ec87 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -381,22 +381,38 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; -#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ -#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ -#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ -#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ -#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ -#define FAULT_FLAG_TRIED 0x20 /* Second try */ -#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ -#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ -#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +/** + * Fault flag definitions. + * + * @FAULT_FLAG_WRITE: Fault was a write fault. + * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. + * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying. + * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. + * @FAULT_FLAG_TRIED: The fault has been tried once. + * @FAULT_FLAG_USER: The fault originated in userspace. + * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. + * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. + * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + */ +#define FAULT_FLAG_WRITE 0x01 +#define FAULT_FLAG_MKWRITE 0x02 +#define FAULT_FLAG_ALLOW_RETRY 0x04 +#define FAULT_FLAG_RETRY_NOWAIT 0x08 +#define FAULT_FLAG_KILLABLE 0x10 +#define FAULT_FLAG_TRIED 0x20 +#define FAULT_FLAG_USER 0x40 +#define FAULT_FLAG_REMOTE 0x80 +#define FAULT_FLAG_INSTRUCTION 0x100 +#define FAULT_FLAG_INTERRUPTIBLE 0x200 /* * The default fault flags that should be used by most of the * arch-specific page fault handlers. */ #define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \ - FAULT_FLAG_KILLABLE) + FAULT_FLAG_KILLABLE | \ + FAULT_FLAG_INTERRUPTIBLE) #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ @@ -407,7 +423,8 @@ extern pgprot_t protection_map[16]; { FAULT_FLAG_TRIED, "TRIED" }, \ { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ - { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" } + { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* * vm_fault is filled by the the pagefault handler and passed to the vma's -- cgit v1.2.3 From 4064b982706375025628094e51d11cf1a958a5d3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 1 Apr 2020 21:08:45 -0700 Subject: mm: allow VM_FAULT_RETRY for multiple times The idea comes from a discussion between Linus and Andrea [1]. Before this patch we only allow a page fault to retry once. We achieved this by clearing the FAULT_FLAG_ALLOW_RETRY flag when doing handle_mm_fault() the second time. This was majorly used to avoid unexpected starvation of the system by looping over forever to handle the page fault on a single page. However that should hardly happen, and after all for each code path to return a VM_FAULT_RETRY we'll first wait for a condition (during which time we should possibly yield the cpu) to happen before VM_FAULT_RETRY is really returned. This patch removes the restriction by keeping the FAULT_FLAG_ALLOW_RETRY flag when we receive VM_FAULT_RETRY. It means that the page fault handler now can retry the page fault for multiple times if necessary without the need to generate another page fault event. Meanwhile we still keep the FAULT_FLAG_TRIED flag so page fault handler can still identify whether a page fault is the first attempt or not. Then we'll have these combinations of fault flags (only considering ALLOW_RETRY flag and TRIED flag): - ALLOW_RETRY and !TRIED: this means the page fault allows to retry, and this is the first try - ALLOW_RETRY and TRIED: this means the page fault allows to retry, and this is not the first try - !ALLOW_RETRY and !TRIED: this means the page fault does not allow to retry at all - !ALLOW_RETRY and TRIED: this is forbidden and should never be used In existing code we have multiple places that has taken special care of the first condition above by checking against (fault_flags & FAULT_FLAG_ALLOW_RETRY). This patch introduces a simple helper to detect the first retry of a page fault by checking against both (fault_flags & FAULT_FLAG_ALLOW_RETRY) and !(fault_flag & FAULT_FLAG_TRIED) because now even the 2nd try will have the ALLOW_RETRY set, then use that helper in all existing special paths. One example is in __lock_page_or_retry(), now we'll drop the mmap_sem only in the first attempt of page fault and we'll keep it in follow up retries, so old locking behavior will be retained. This will be a nice enhancement for current code [2] at the same time a supporting material for the future userfaultfd-writeprotect work, since in that work there will always be an explicit userfault writeprotect retry for protected pages, and if that cannot resolve the page fault (e.g., when userfaultfd-writeprotect is used in conjunction with swapped pages) then we'll possibly need a 3rd retry of the page fault. It might also benefit other potential users who will have similar requirement like userfault write-protection. GUP code is not touched yet and will be covered in follow up patch. Please read the thread below for more information. [1] https://lore.kernel.org/lkml/20171102193644.GB22686@redhat.com/ [2] https://lore.kernel.org/lkml/20181230154648.GB9832@redhat.com/ Suggested-by: Linus Torvalds Suggested-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Tested-by: Brian Geffon Cc: Bobby Powers Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Matthew Wilcox Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220160246.9790-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7eeabc37ec87..e8e1afab713f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -394,6 +394,25 @@ extern pgprot_t protection_map[16]; * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. */ #define FAULT_FLAG_WRITE 0x01 #define FAULT_FLAG_MKWRITE 0x02 @@ -414,6 +433,24 @@ extern pgprot_t protection_map[16]; FAULT_FLAG_KILLABLE | \ FAULT_FLAG_INTERRUPTIBLE) +/** + * fault_flag_allow_retry_first - check ALLOW_RETRY the first time + * + * This is mostly used for places where we want to try to avoid taking + * the mmap_sem for too long a time when waiting for another condition + * to change, in which case we can try to be polite to release the + * mmap_sem in the first round to avoid potential starvation of other + * processes that would also want the mmap_sem. + * + * Return: true if the page fault allows retry and this is the first + * attempt of the fault handling; false otherwise. + */ +static inline bool fault_flag_allow_retry_first(unsigned int flags) +{ + return (flags & FAULT_FLAG_ALLOW_RETRY) && + (!(flags & FAULT_FLAG_TRIED)); +} + #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ -- cgit v1.2.3 From baceaf1c8b99080ae5274d7262df8b09fa981762 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Wed, 1 Apr 2020 21:09:10 -0700 Subject: mmap: remove inline of vm_unmapped_area Patch series "mm: mmap: add mmap trace point", v3. Create mmap trace file and add trace point of vm_unmapped_area(). This patch (of 2): In preparation for next patch remove inline of vm_unmapped_area and move code to mmap.c. There is no logical change. Also remove unmapped_area[_topdown] out of mm.h, there is no code calling to them. Signed-off-by: Jaewon Kim Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Cc: Matthew Wilcox (Oracle) Cc: Michel Lespinasse Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20200320055823.27089-2-jaewon31.kim@samsung.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e8e1afab713f..937bf719c329 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2530,26 +2530,7 @@ struct vm_unmapped_area_info { unsigned long align_offset; }; -extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); -extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); - -/* - * Search for an unmapped address range. - * - * We are looking for a range that: - * - does not intersect with any VMA; - * - is contained within the [low_limit, high_limit) interval; - * - is at least the desired size. - * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) - */ -static inline unsigned long -vm_unmapped_area(struct vm_unmapped_area_info *info) -{ - if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) - return unmapped_area_topdown(info); - else - return unmapped_area(info); -} +extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); /* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); -- cgit v1.2.3 From e03d1f78341e8a16f6cb5be5dfcd37ddc31a6839 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Wed, 1 Apr 2020 21:09:27 -0700 Subject: mm/sparse: rename pfn_present() to pfn_in_present_section() After introducing mem sub section concept, pfn_present() loses its literal meaning, and will not be necessary a truth on partial populated mem section. Since all of the callers use it to judge an absent section, it is better to rename pfn_present() as pfn_in_present_section(). Signed-off-by: Pingfan Liu Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Michael Ellerman [powerpc] Cc: Dan Williams Cc: Michal Hocko Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Leonardo Bras Cc: Nathan Fontenot Cc: Nathan Lynch Link: http://lkml.kernel.org/r/1581919110-29575-1-git-send-email-kernelfans@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4bca42eeb439..e84d448988b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1374,7 +1374,7 @@ static inline int pfn_valid(unsigned long pfn) } #endif -static inline int pfn_present(unsigned long pfn) +static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; @@ -1411,7 +1411,7 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) -#define pfn_present pfn_valid +#define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -- cgit v1.2.3 From 8cceeff48f23eede76de995df08cf665182ec8fb Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Wed, 1 Apr 2020 21:09:37 -0700 Subject: kasan: detect negative size in memory operation function Patch series "fix the missing underflow in memory operation function", v4. The patchset helps to produce a KASAN report when size is negative in memory operation functions. It is helpful for programmer to solve an undefined behavior issue. Patch 1 based on Dmitry's review and suggestion, patch 2 is a test in order to verify the patch 1. [1]https://bugzilla.kernel.org/show_bug.cgi?id=199341 [2]https://lore.kernel.org/linux-arm-kernel/20190927034338.15813-1-walter-zh.wu@mediatek.com/ This patch (of 2): KASAN missed detecting size is a negative number in memset(), memcpy(), and memmove(), it will cause out-of-bounds bug. So needs to be detected by KASAN. If size is a negative number, then it has a reason to be defined as out-of-bounds bug type. Casting negative numbers to size_t would indeed turn up as a large size_t and its value will be larger than ULONG_MAX/2, so that this can qualify as out-of-bounds. KASAN report is shown below: BUG: KASAN: out-of-bounds in kmalloc_memmove_invalid_size+0x70/0xa0 Read of size 18446744073709551608 at addr ffffff8069660904 by task cat/72 CPU: 2 PID: 72 Comm: cat Not tainted 5.4.0-rc1-next-20191004ajb-00001-gdb8af2f372b2-dirty #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x288 show_stack+0x14/0x20 dump_stack+0x10c/0x164 print_address_description.isra.9+0x68/0x378 __kasan_report+0x164/0x1a0 kasan_report+0xc/0x18 check_memory_region+0x174/0x1d0 memmove+0x34/0x88 kmalloc_memmove_invalid_size+0x70/0xa0 [1] https://bugzilla.kernel.org/show_bug.cgi?id=199341 [cai@lca.pw: fix -Wdeclaration-after-statement warn] Link: http://lkml.kernel.org/r/1583509030-27939-1-git-send-email-cai@lca.pw [peterz@infradead.org: fix objtool warning] Link: http://lkml.kernel.org/r/20200305095436.GV2596@hirez.programming.kicks-ass.net Reported-by: kernel test robot Reported-by: Dmitry Vyukov Suggested-by: Dmitry Vyukov Signed-off-by: Walter Wu Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Ryabinin Cc: Alexander Potapenko Link: http://lkml.kernel.org/r/20191112065302.7015-1-walter-zh.wu@mediatek.com Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5cde9e7c2664..31314ca7c635 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -190,7 +190,7 @@ void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); -void kasan_report(unsigned long addr, size_t size, +bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); #else /* CONFIG_KASAN_SW_TAGS */ -- cgit v1.2.3 From 20ca87f22b82d5fbf1938e1668ac4f55d749fb8f Mon Sep 17 00:00:00 2001 From: Li Xinhai Date: Wed, 1 Apr 2020 21:10:52 -0700 Subject: mm/mempolicy: check hugepage migration is supported by arch in vma_migratable() vma_migratable() is called to check if pages in vma can be migrated before go ahead to further actions. Currently it is used in below code path: - task_numa_work - mbind - move_pages For hugetlb mapping, whether vma is migratable or not is determined by: - CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION - arch_hugetlb_migration_supported Issue: current code only checks for CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION alone, and no code should use it directly. (note that current code in vma_migratable don't cause failure or bug because unmap_and_move_huge_page() will catch unsupported hugepage and handle it properly) This patch checks the two factors by hugepage_migration_supported for impoving code logic and robustness. It will enable early bail out of hugepage migration procedure, but because currently all architecture supporting hugepage migration is able to support all page size, we would not see performance gain with this patch applied. vma_migratable() is moved to mm/mempolicy.c, because of the circular reference of mempolicy.h and hugetlb.h cause defining it as inline not feasible. Signed-off-by: Li Xinhai Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Anshuman Khandual Cc: Naoya Horiguchi Link: http://lkml.kernel.org/r/1579786179-30633-1-git-send-email-lixinhai.lxh@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..8165278c348a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -173,34 +173,7 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol); extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ -static inline bool vma_migratable(struct vm_area_struct *vma) -{ - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - return false; - - /* - * DAX device mappings require predictable access latency, so avoid - * incurring periodic faults. - */ - if (vma_is_dax(vma)) - return false; - -#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION - if (vma->vm_flags & VM_HUGETLB) - return false; -#endif - - /* - * Migration allocates pages in the highest zone. If we cannot - * do so then migration (at least from node to node) is not - * possible. - */ - if (vma->vm_file && - gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) - < policy_zone) - return false; - return true; -} +extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); -- cgit v1.2.3 From c0d0381ade79885c04a04c303284b040616b116e Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 1 Apr 2020 21:11:05 -0700 Subject: hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization Patch series "hugetlbfs: use i_mmap_rwsem for more synchronization", v2. While discussing the issue with huge_pte_offset [1], I remembered that there were more outstanding hugetlb races. These issues are: 1) For shared pmds, huge PTE pointers returned by huge_pte_alloc can become invalid via a call to huge_pmd_unshare by another thread. 2) hugetlbfs page faults can race with truncation causing invalid global reserve counts and state. A previous attempt was made to use i_mmap_rwsem in this manner as described at [2]. However, those patches were reverted starting with [3] due to locking issues. To effectively use i_mmap_rwsem to address the above issues it needs to be held (in read mode) during page fault processing. However, during fault processing we need to lock the page we will be adding. Lock ordering requires we take page lock before i_mmap_rwsem. Waiting until after taking the page lock is too late in the fault process for the synchronization we want to do. To address this lock ordering issue, the following patches change the lock ordering for hugetlb pages. This is not too invasive as hugetlbfs processing is done separate from core mm in many places. However, I don't really like this idea. Much ugliness is contained in the new routine hugetlb_page_mapping_lock_write() of patch 1. The only other way I can think of to address these issues is by catching all the races. After catching a race, cleanup, backout, retry ... etc, as needed. This can get really ugly, especially for huge page reservations. At one time, I started writing some of the reservation backout code for page faults and it got so ugly and complicated I went down the path of adding synchronization to avoid the races. Any other suggestions would be welcome. [1] https://lore.kernel.org/linux-mm/1582342427-230392-1-git-send-email-longpeng2@huawei.com/ [2] https://lore.kernel.org/linux-mm/20181222223013.22193-1-mike.kravetz@oracle.com/ [3] https://lore.kernel.org/linux-mm/20190103235452.29335-1-mike.kravetz@oracle.com [4] https://lore.kernel.org/linux-mm/1584028670.7365.182.camel@lca.pw/ [5] https://lore.kernel.org/lkml/20200312183142.108df9ac@canb.auug.org.au/ This patch (of 2): While looking at BUGs associated with invalid huge page map counts, it was discovered and observed that a huge pte pointer could become 'invalid' and point to another task's page table. Consider the following: A task takes a page fault on a shared hugetlbfs file and calls huge_pte_alloc to get a ptep. Suppose the returned ptep points to a shared pmd. Now, another task truncates the hugetlbfs file. As part of truncation, it unmaps everyone who has the file mapped. If the range being truncated is covered by a shared pmd, huge_pmd_unshare will be called. For all but the last user of the shared pmd, huge_pmd_unshare will clear the pud pointing to the pmd. If the task in the middle of the page fault is not the last user, the ptep returned by huge_pte_alloc now points to another task's page table or worse. This leads to bad things such as incorrect page map/reference counts or invalid memory references. To fix, expand the use of i_mmap_rwsem as follows: - i_mmap_rwsem is held in read mode whenever huge_pmd_share is called. huge_pmd_share is only called via huge_pte_alloc, so callers of huge_pte_alloc take i_mmap_rwsem before calling. In addition, callers of huge_pte_alloc continue to hold the semaphore until finished with the ptep. - i_mmap_rwsem is held in write mode whenever huge_pmd_unshare is called. One problem with this scheme is that it requires taking i_mmap_rwsem before taking the page lock during page faults. This is not the order specified in the rest of mm code. Handling of hugetlbfs pages is mostly isolated today. Therefore, we use this alternative locking order for PageHuge() pages. mapping->i_mmap_rwsem hugetlb_fault_mutex (hugetlbfs specific page fault mutex) page->flags PG_locked (lock_page) To help with lock ordering issues, hugetlb_page_mapping_lock_write() is introduced to write lock the i_mmap_rwsem associated with a page. In most cases it is easy to get address_space via vma->vm_file->f_mapping. However, in the case of migration or memory errors for anon pages we do not have an associated vma. A new routine _get_hugetlb_page_mapping() will use anon_vma to get address_space in these cases. Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Link: http://lkml.kernel.org/r/20200316205756.146666-2-mike.kravetz@oracle.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++++ include/linux/hugetlb.h | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3d69de600494..f81c822f4d89 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -526,6 +526,11 @@ static inline void i_mmap_lock_write(struct address_space *mapping) down_write(&mapping->i_mmap_rwsem); } +static inline int i_mmap_trylock_write(struct address_space *mapping) +{ + return down_write_trylock(&mapping->i_mmap_rwsem); +} + static inline void i_mmap_unlock_write(struct address_space *mapping) { up_write(&mapping->i_mmap_rwsem); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1e897e4168ac..4ba379eae152 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -109,6 +109,8 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); + extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; @@ -151,6 +153,12 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline struct address_space *hugetlb_page_mapping_lock_write( + struct page *hpage) +{ + return NULL; +} + static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { -- cgit v1.2.3 From cdc2fcfea79b9873bb63159f8ed973f4046018c8 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 1 Apr 2020 21:11:11 -0700 Subject: hugetlb_cgroup: add hugetlb_cgroup reservation counter These counters will track hugetlb reservations rather than hugetlb memory faulted in. This patch only adds the counter, following patches add the charging and uncharging of the counter. This is patch 1 of an 9 patch series. Problem: Currently tasks attempting to reserve more hugetlb memory than is available get a failure at mmap/shmget time. This is thanks to Hugetlbfs Reservations [1]. However, if a task attempts to reserve more hugetlb memory than its hugetlb_cgroup limit allows, the kernel will allow the mmap/shmget call, but will SIGBUS the task when it attempts to fault in the excess memory. We have users hitting their hugetlb_cgroup limits and thus we've been looking at this failure mode. We'd like to improve this behavior such that users violating the hugetlb_cgroup limits get an error on mmap/shmget time, rather than getting SIGBUS'd when they try to fault the excess memory in. This gives the user an opportunity to fallback more gracefully to non-hugetlbfs memory for example. The underlying problem is that today's hugetlb_cgroup accounting happens at hugetlb memory *fault* time, rather than at *reservation* time. Thus, enforcing the hugetlb_cgroup limit only happens at fault time, and the offending task gets SIGBUS'd. Proposed Solution: A new page counter named 'hugetlb.xMB.rsvd.[limit|usage|max_usage]_in_bytes'. This counter has slightly different semantics than 'hugetlb.xMB.[limit|usage|max_usage]_in_bytes': - While usage_in_bytes tracks all *faulted* hugetlb memory, rsvd.usage_in_bytes tracks all *reserved* hugetlb memory and hugetlb memory faulted in without a prior reservation. - If a task attempts to reserve more memory than limit_in_bytes allows, the kernel will allow it to do so. But if a task attempts to reserve more memory than rsvd.limit_in_bytes, the kernel will fail this reservation. This proposal is implemented in this patch series, with tests to verify functionality and show the usage. Alternatives considered: 1. A new cgroup, instead of only a new page_counter attached to the existing hugetlb_cgroup. Adding a new cgroup seemed like a lot of code duplication with hugetlb_cgroup. Keeping hugetlb related page counters under hugetlb_cgroup seemed cleaner as well. 2. Instead of adding a new counter, we considered adding a sysctl that modifies the behavior of hugetlb.xMB.[limit|usage]_in_bytes, to do accounting at reservation time rather than fault time. Adding a new page_counter seems better as userspace could, if it wants, choose to enforce different cgroups differently: one via limit_in_bytes, and another via rsvd.limit_in_bytes. This could be very useful if you're transitioning how hugetlb memory is partitioned on your system one cgroup at a time, for example. Also, someone may find usage for both limit_in_bytes and rsvd.limit_in_bytes concurrently, and this approach gives them the option to do so. Testing: - Added tests passing. - Used libhugetlbfs for regression testing. [1]: https://www.kernel.org/doc/html/latest/vm/hugetlbfs_reserv.html Signed-off-by: Mina Almasry Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: David Rientjes Cc: Shuah Khan Cc: Shakeel Butt Cc: Greg Thelen Cc: Sandipan Das Link: http://lkml.kernel.org/r/20200211213128.73302-1-almasrymina@google.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 4ba379eae152..83ae629c9beb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -440,8 +440,8 @@ struct hstate { unsigned int surplus_huge_pages_node[MAX_NUMNODES]; #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ - struct cftype cgroup_files_dfl[5]; - struct cftype cgroup_files_legacy[5]; + struct cftype cgroup_files_dfl[7]; + struct cftype cgroup_files_legacy[9]; #endif char name[HSTATE_NAME_LEN]; }; -- cgit v1.2.3 From 1adc4d419aa282ed6d86f01935ce45d2215d8b8d Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 1 Apr 2020 21:11:15 -0700 Subject: hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations Augments hugetlb_cgroup_charge_cgroup to be able to charge hugetlb usage or hugetlb reservation counter. Adds a new interface to uncharge a hugetlb_cgroup counter via hugetlb_cgroup_uncharge_counter. Integrates the counter with hugetlb_cgroup, via hugetlb_cgroup_init, hugetlb_cgroup_have_usage, and hugetlb_cgroup_css_offline. Signed-off-by: Mina Almasry Signed-off-by: Andrew Morton Acked-by: Mike Kravetz Acked-by: David Rientjes Cc: Greg Thelen Cc: Sandipan Das Cc: Shakeel Butt Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200211213128.73302-2-almasrymina@google.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 123 +++++++++++++++++++++++++++++++++++------ 1 file changed, 105 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 063962f6dfc6..5443f4548d52 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -20,32 +20,64 @@ struct hugetlb_cgroup; /* * Minimum page order trackable by hugetlb cgroup. - * At least 3 pages are necessary for all the tracking information. + * At least 4 pages are necessary for all the tracking information. + * The second tail page (hpage[2]) is the fault usage cgroup. + * The third tail page (hpage[3]) is the reservation usage cgroup. */ #define HUGETLB_CGROUP_MIN_ORDER 2 #ifdef CONFIG_CGROUP_HUGETLB -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup * +__hugetlb_cgroup_from_page(struct page *page, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - return (struct hugetlb_cgroup *)page[2].private; + if (rsvd) + return (struct hugetlb_cgroup *)page[3].private; + else + return (struct hugetlb_cgroup *)page[2].private; +} + +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +{ + return __hugetlb_cgroup_from_page(page, false); } -static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_rsvd(struct page *page) +{ + return __hugetlb_cgroup_from_page(page, true); +} + +static inline int __set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; - page[2].private = (unsigned long)h_cg; + if (rsvd) + page[3].private = (unsigned long)h_cg; + else + page[2].private = (unsigned long)h_cg; return 0; } +static inline int set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return __set_hugetlb_cgroup(page, h_cg, false); +} + +static inline int set_hugetlb_cgroup_rsvd(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return __set_hugetlb_cgroup(page, h_cg, true); +} + static inline bool hugetlb_cgroup_disabled(void) { return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); @@ -53,13 +85,27 @@ static inline bool hugetlb_cgroup_disabled(void) extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); +extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup **ptr); extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page); +extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page); extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page); +extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, + struct page *page); + extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); +extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg); +extern void hugetlb_cgroup_uncharge_counter(struct page_counter *p, + unsigned long nr_pages, + struct cgroup_subsys_state *css); + extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage); @@ -70,8 +116,26 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) return NULL; } -static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_resv(struct page *page) +{ + return NULL; +} + +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_rsvd(struct page *page) +{ + return NULL; +} + +static inline int set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return 0; +} + +static inline int set_hugetlb_cgroup_rsvd(struct page *page, + struct hugetlb_cgroup *h_cg) { return 0; } @@ -81,28 +145,51 @@ static inline bool hugetlb_cgroup_disabled(void) return true; } -static inline int -hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, - struct hugetlb_cgroup **ptr) +static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, + struct hugetlb_cgroup **ptr) { return 0; } -static inline void -hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, - struct hugetlb_cgroup *h_cg, - struct page *page) +static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx, + unsigned long nr_pages, + struct hugetlb_cgroup **ptr) +{ + return 0; +} + +static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page) { } static inline void -hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) +hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page) +{ +} + +static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, + struct page *page) +{ +} + +static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx, + unsigned long nr_pages, + struct page *page) +{ +} +static inline void hugetlb_cgroup_uncharge_cgroup(int idx, + unsigned long nr_pages, + struct hugetlb_cgroup *h_cg) { } static inline void -hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, - struct hugetlb_cgroup *h_cg) +hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg) { } -- cgit v1.2.3 From e9fe92ae0cd28aac5cf6d3fb8442825c22fbd3a6 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 1 Apr 2020 21:11:21 -0700 Subject: hugetlb_cgroup: add reservation accounting for private mappings Normally the pointer to the cgroup to uncharge hangs off the struct page, and gets queried when it's time to free the page. With hugetlb_cgroup reservations, this is not possible. Because it's possible for a page to be reserved by one task and actually faulted in by another task. The best place to put the hugetlb_cgroup pointer to uncharge for reservations is in the resv_map. But, because the resv_map has different semantics for private and shared mappings, the code patch to charge/uncharge shared and private mappings is different. This patch implements charging and uncharging for private mappings. For private mappings, the counter to uncharge is in resv_map->reservation_counter. On initializing the resv_map this is set to NULL. On reservation of a region in private mapping, the tasks hugetlb_cgroup is charged and the hugetlb_cgroup is placed is resv_map->reservation_counter. On hugetlb_vm_op_close, we uncharge resv_map->reservation_counter. [akpm@linux-foundation.org: forward declare struct resv_map] Signed-off-by: Mina Almasry Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: David Rientjes Cc: Greg Thelen Cc: Sandipan Das Cc: Shakeel Butt Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200211213128.73302-3-almasrymina@google.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 10 ++++++++++ include/linux/hugetlb_cgroup.h | 41 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 83ae629c9beb..8f8c0b915fbb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -46,6 +46,16 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; +#ifdef CONFIG_CGROUP_HUGETLB + /* + * On private mappings, the counter to uncharge reservations is stored + * here. If these fields are 0, then either the mapping is shared, or + * cgroup accounting is disabled for this resv_map. + */ + struct page_counter *reservation_counter; + unsigned long pages_per_hpage; + struct cgroup_subsys_state *css; +#endif }; extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 5443f4548d52..0699cd26e3ec 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -18,6 +18,8 @@ #include struct hugetlb_cgroup; +struct resv_map; + /* * Minimum page order trackable by hugetlb cgroup. * At least 4 pages are necessary for all the tracking information. @@ -27,6 +29,33 @@ struct hugetlb_cgroup; #define HUGETLB_CGROUP_MIN_ORDER 2 #ifdef CONFIG_CGROUP_HUGETLB +enum hugetlb_memory_event { + HUGETLB_MAX, + HUGETLB_NR_MEMORY_EVENTS, +}; + +struct hugetlb_cgroup { + struct cgroup_subsys_state css; + + /* + * the counter to account for hugepages from hugetlb. + */ + struct page_counter hugepage[HUGE_MAX_HSTATE]; + + /* + * the counter to account for hugepage reservations from hugetlb. + */ + struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE]; + + atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS]; + atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS]; + + /* Handle for "hugetlb.events" */ + struct cgroup_file events_file[HUGE_MAX_HSTATE]; + + /* Handle for "hugetlb.events.local" */ + struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; +}; static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_page(struct page *page, bool rsvd) @@ -102,9 +131,9 @@ extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); -extern void hugetlb_cgroup_uncharge_counter(struct page_counter *p, - unsigned long nr_pages, - struct cgroup_subsys_state *css); +extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, + unsigned long start, + unsigned long end); extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, @@ -193,6 +222,12 @@ hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, { } +static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, + unsigned long start, + unsigned long end) +{ +} + static inline void hugetlb_cgroup_file_init(void) { } -- cgit v1.2.3 From 075a61d07a8eca2fe980acd94105ed5d6429c55d Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 1 Apr 2020 21:11:28 -0700 Subject: hugetlb_cgroup: add accounting for shared mappings For shared mappings, the pointer to the hugetlb_cgroup to uncharge lives in the resv_map entries, in file_region->reservation_counter. After a call to region_chg, we charge the approprate hugetlb_cgroup, and if successful, we pass on the hugetlb_cgroup info to a follow up region_add call. When a file_region entry is added to the resv_map via region_add, we put the pointer to that cgroup in file_region->reservation_counter. If charging doesn't succeed, we report the error to the caller, so that the kernel fails the reservation. On region_del, which is when the hugetlb memory is unreserved, we also uncharge the file_region->reservation_counter. [akpm@linux-foundation.org: forward declare struct file_region] Signed-off-by: Mina Almasry Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: David Rientjes Cc: Greg Thelen Cc: Mike Kravetz Cc: Sandipan Das Cc: Shakeel Butt Cc: Shuah Khan Link: http://lkml.kernel.org/r/20200211213128.73302-5-almasrymina@google.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/hugetlb_cgroup.h | 11 +++++++++++ 2 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f8c0b915fbb..219962c9517e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -57,6 +57,41 @@ struct resv_map { struct cgroup_subsys_state *css; #endif }; + +/* + * Region tracking -- allows tracking of reservations and instantiated pages + * across the pages in a mapping. + * + * The region data structures are embedded into a resv_map and protected + * by a resv_map's lock. The set of regions within the resv_map represent + * reservations for huge pages, or huge pages that have already been + * instantiated within the map. The from and to elements are huge page + * indicies into the associated mapping. from indicates the starting index + * of the region. to represents the first index past the end of the region. + * + * For example, a file region structure with from == 0 and to == 4 represents + * four huge pages in a mapping. It is important to note that the to element + * represents the first element past the end of the region. This is used in + * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. + * + * Interval notation of the form [from, to) will be used to indicate that + * the endpoint from is inclusive and to is exclusive. + */ +struct file_region { + struct list_head link; + long from; + long to; +#ifdef CONFIG_CGROUP_HUGETLB + /* + * On shared mappings, each reserved region appears as a struct + * file_region in resv_map. These fields hold the info needed to + * uncharge each reservation. + */ + struct page_counter *reservation_counter; + struct cgroup_subsys_state *css; +#endif +}; + extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 0699cd26e3ec..2ad6e92f124a 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -19,6 +19,7 @@ struct hugetlb_cgroup; struct resv_map; +struct file_region; /* * Minimum page order trackable by hugetlb cgroup. @@ -135,11 +136,21 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, unsigned long end); +extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, + struct file_region *rg, + unsigned long nr_pages); + extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage); #else +static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, + struct file_region *rg, + unsigned long nr_pages) +{ +} + static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) { return NULL; -- cgit v1.2.3 From bb297bb2de517e41199185021f043bbc5d75b377 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Apr 2020 21:11:54 -0700 Subject: mm/hugetlb: fix build failure with HUGETLB_PAGE but not HUGEBTLBFS When CONFIG_HUGETLB_PAGE is set but not CONFIG_HUGETLBFS, the following build failure is encoutered: In file included from arch/powerpc/mm/fault.c:33:0: include/linux/hugetlb.h: In function 'hstate_inode': include/linux/hugetlb.h:477:9: error: implicit declaration of function 'HUGETLBFS_SB' [-Werror=implicit-function-declaration] return HUGETLBFS_SB(i->i_sb)->hstate; ^ include/linux/hugetlb.h:477:30: error: invalid type argument of '->' (have 'int') return HUGETLBFS_SB(i->i_sb)->hstate; ^ Gate hstate_inode() with CONFIG_HUGETLBFS instead of CONFIG_HUGETLB_PAGE. Fixes: a137e1cc6d6e ("hugetlbfs: per mount huge page sizes") Reported-by: kbuild test robot Signed-off-by: Christophe Leroy Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: Baoquan He Cc: Nishanth Aravamudan Cc: Nick Piggin Cc: Adam Litke Cc: Andi Kleen Link: http://lkml.kernel.org/r/7e8c3a3c9a587b9cd8a2f146df32a421b961f3a2.1584432148.git.christophe.leroy@c-s.fr Link: https://patchwork.ozlabs.org/patch/1255548/#2386036 Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 219962c9517e..5ea05879a0a9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -443,7 +443,10 @@ static inline bool is_file_hugepages(struct file *file) return is_file_shm_hugepages(file); } - +static inline struct hstate *hstate_inode(struct inode *i) +{ + return HUGETLBFS_SB(i->i_sb)->hstate; +} #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) false @@ -455,6 +458,10 @@ hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, return ERR_PTR(-ENOSYS); } +static inline struct hstate *hstate_inode(struct inode *i) +{ + return NULL; +} #endif /* !CONFIG_HUGETLBFS */ #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA @@ -525,11 +532,6 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) -static inline struct hstate *hstate_inode(struct inode *i) -{ - return HUGETLBFS_SB(i->i_sb)->hstate; -} - static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); @@ -782,11 +784,6 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return NULL; } -static inline struct hstate *hstate_inode(struct inode *i) -{ - return NULL; -} - static inline struct hstate *page_hstate(struct page *page) { return NULL; -- cgit v1.2.3 From 77d6b9094819ba55353de0ef92957f3f54f2c36c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 1 Apr 2020 21:11:58 -0700 Subject: include/linux/huge_mm.h: check PageTail in hpage_nr_pages even when !THP It's even more important to check that we don't have a tail page when calling hpage_nr_pages() when THP are disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Cc: Aneesh Kumar K.V Cc: Pankaj Gupta Link: http://lkml.kernel.org/r/20200318140253.6141-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 31c39f4a5518..680a0d9a9721 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -287,7 +287,11 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -#define hpage_nr_pages(x) 1 +static inline int hpage_nr_pages(struct page *page) +{ + VM_BUG_ON_PAGE(PageTail(page), page); + return 1; +} static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { -- cgit v1.2.3 From 6487a8019b3e9f9d79e5f6ad3ea49f9379209b7e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 30 Mar 2020 22:15:08 +0200 Subject: rtc: remove rtc_time_to_tm and rtc_tm_to_time There are no callers of the 32bit versions of rtc_time conversion functions, drop them. Link: https://lore.kernel.org/r/20200330201510.861217-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 23990bd29040..bba3db3f7efa 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -34,18 +34,6 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); } -static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) -{ - rtc_time64_to_tm(time, tm); -} - -static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time) -{ - *time = rtc_tm_to_time64(tm); - - return 0; -} - #include #include #include -- cgit v1.2.3 From 83153d9f36e24978c6211d246cb6f532bf54e5dc Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 25 Feb 2020 13:47:01 +0530 Subject: PCI: endpoint: Fix ->set_msix() to take BIR and offset as arguments commit 8963106eabdc ("PCI: endpoint: Add MSI-X interfaces") while adding support to raise MSI-X interrupts from endpoint didn't include BAR Indicator register (BIR) configuration and MSI-X table offset as arguments in pci_epc_set_msix(). This would result in endpoint controller register using random BAR indicator register, the memory for which might not be allocated by the endpoint function driver. Add BAR indicator register and MSI-X table offset as arguments in pci_epc_set_msix() and allocate space for MSI-X table and pending bit array (PBA) in pci-epf-test endpoint function driver. Fixes: 8963106eabdc ("PCI: endpoint: Add MSI-X interfaces") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/linux/pci-epc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 0d7e91bad91e..e0ed9d01f6e5 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -53,7 +53,8 @@ struct pci_epc_ops { phys_addr_t addr); int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts); int (*get_msi)(struct pci_epc *epc, u8 func_no); - int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts); + int (*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts, + enum pci_barno, u32 offset); int (*get_msix)(struct pci_epc *epc, u8 func_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u16 interrupt_num); @@ -180,7 +181,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr); int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no); -int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts); +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts, + enum pci_barno, u32 offset); int pci_epc_get_msix(struct pci_epc *epc, u8 func_no); int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no, enum pci_epc_irq_type type, u16 interrupt_num); -- cgit v1.2.3 From 6f5e193bfb55963ce5f4f68cc927f371ddb0913b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 25 Feb 2020 13:47:02 +0530 Subject: PCI: dwc: Fix dw_pcie_ep_raise_msix_irq() to get correct MSI-X table address commit beb4641a787d ("PCI: dwc: Add MSI-X callbacks handler"), in order to raise MSI-X interrupt, obtained MSIX table address from Base Address Register (BAR). However BAR only holds PCI address programmed by the host whereas the MSI-X table should be in the local memory. Store the MSI-X table address (virtual address) as part of ->set_bar() callback and use that to get the message address and message data here. Fixes: beb4641a787d ("PCI: dwc: Add MSI-X callbacks handler") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Lorenzo Pieralisi --- include/linux/pci-epf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 0c628e30c582..6644ff3b0702 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -94,10 +94,12 @@ struct pci_epf_driver { /** * struct pci_epf_bar - represents the BAR of EPF device * @phys_addr: physical address that should be mapped to the BAR + * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR */ struct pci_epf_bar { dma_addr_t phys_addr; + void *addr; size_t size; enum pci_barno barno; int flags; @@ -134,6 +136,19 @@ struct pci_epf { struct mutex lock; }; +/** + * struct pci_epf_msix_tbl - represents the MSIX table entry structure + * @msg_addr: Writes to this address will trigger MSIX interrupt in host + * @msg_data: Data that should be written to @msg_addr to trigger MSIX interrupt + * @vector_ctrl: Identifies if the function is prohibited from sending a message + * using this MSIX table entry + */ +struct pci_epf_msix_tbl { + u64 msg_addr; + u32 msg_data; + u32 vector_ctrl; +}; + #define to_pci_epf(epf_dev) container_of((epf_dev), struct pci_epf, dev) #define pci_epf_register_driver(driver) \ -- cgit v1.2.3 From f605a263e0690177ecc180417eacf2b5507dd177 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 28 Feb 2020 11:34:52 -0500 Subject: dax, pmem: Add a dax operation zero_page_range Add a dax operation zero_page_range, to zero a page. This will also clear any known poison in the page being zeroed. As of now, zeroing of one page is allowed in a single call. There are no callers which are trying to zero more than a page in a single call. Once we grow the callers which zero more than a page in single call, we can add that support. Primary reason for not doing that yet is that this will add little complexity in dm implementation where a range might be spanning multiple underlying targets and one will have to split the range into multiple sub ranges and call zero_page_range() on individual targets. Suggested-by: Christoph Hellwig Signed-off-by: Vivek Goyal Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20200228163456.1587-3-vgoyal@redhat.com Signed-off-by: Dan Williams --- include/linux/dax.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 328c2dbb4409..71735c430c05 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -34,6 +34,8 @@ struct dax_operations { /* copy_to_iter: required operation for fs-dax direct-i/o */ size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* zero_page_range: required operation. Zero page range */ + int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); }; extern struct attribute_group dax_attribute_group; @@ -199,6 +201,8 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, -- cgit v1.2.3 From cdf6cdcd3b99a99ea9ecc1b05d1d040d5a69a134 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 28 Feb 2020 11:34:54 -0500 Subject: dm,dax: Add dax zero_page_range operation This patch adds support for dax zero_page_range operation to dm targets. Signed-off-by: Vivek Goyal Acked-by: Mike Snitzer Link: https://lore.kernel.org/r/20200228163456.1587-5-vgoyal@redhat.com Signed-off-by: Dan Williams --- include/linux/device-mapper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 475668c69dbc..af48d9da3916 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, + size_t nr_pages); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -195,6 +197,7 @@ struct target_type { dm_dax_direct_access_fn direct_access; dm_dax_copy_iter_fn dax_copy_from_iter; dm_dax_copy_iter_fn dax_copy_to_iter; + dm_dax_zero_page_range_fn dax_zero_page_range; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 4f3b4f161d7a070d2181dbcf7fbd97c7631d5c24 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 28 Feb 2020 11:34:56 -0500 Subject: dax,iomap: Add helper dax_iomap_zero() to zero a range Add a helper dax_ioamp_zero() to zero a range. This patch basically merges __dax_zero_page_range() and iomap_dax_zero(). Suggested-by: Christoph Hellwig Signed-off-by: Vivek Goyal Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200228163456.1587-7-vgoyal@redhat.com Signed-off-by: Dan Williams --- include/linux/dax.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 71735c430c05..d7af5d243f24 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -13,6 +13,7 @@ typedef unsigned long dax_entry_t; struct iomap_ops; +struct iomap; struct dax_device; struct dax_operations { /* @@ -214,20 +215,8 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); - -#ifdef CONFIG_FS_DAX -int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length); -#else -static inline int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length) -{ - return -ENXIO; -} -#endif - +int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, + struct iomap *iomap); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); -- cgit v1.2.3 From 2b729fe7f3e9478a21a336231daf35768e7cf37b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Apr 2020 11:32:13 -0400 Subject: Revert "cpuset: Make cpuset hotplug synchronous" This reverts commit a49e4629b5ed ("cpuset: Make cpuset hotplug synchronous") as it may deadlock with cpu hotplug path. Link: http://lkml.kernel.org/r/F0388D99-84D7-453B-9B6B-EEFF0E7BE4CC@lca.pw Signed-off-by: Tejun Heo Reported-by: Qian Cai Cc: Prateek Sood --- include/linux/cpuset.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cede4cb98b78..04c20de66afc 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -54,6 +54,7 @@ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); +extern void cpuset_wait_for_hotplug(void); extern void cpuset_read_lock(void); extern void cpuset_read_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); @@ -175,6 +176,8 @@ static inline void cpuset_update_active_cpus(void) partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_wait_for_hotplug(void) { } + static inline void cpuset_read_lock(void) { } static inline void cpuset_read_unlock(void) { } -- cgit v1.2.3 From 8c5c660529209a0e324c1c1a35ce3f83d67a2aa5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 3 Apr 2020 07:33:20 -0700 Subject: nvme-fc: Revert "add module to ops template to allow module references" The original patch was to resolve the lldd being able to be unloaded while being used to talk to the boot device of the system. However, the end result of the original patch is that any driver unload while a nvme controller is live via the lldd is now being prohibited. Given the module reference, the module teardown routine can't be called, thus there's no way, other than manual actions to terminate the controllers. Fixes: 863fbae929c7 ("nvme_fc: add module to ops template to allow module references") Cc: # v5.4+ Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 6d0d70f3219c..10f81629b9ce 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -270,8 +270,6 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * - * @module: The LLDD module using the interface - * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -385,8 +383,6 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { - struct module *module; - /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); -- cgit v1.2.3 From ddfd9dcf270ce23ed1985b66fcfa163920e2e1b8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Apr 2020 17:48:33 +0200 Subject: ACPI: PM: Add acpi_[un]register_wakeup_handler() Since commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") the SCI triggering without there being a wakeup cause recognized by the ACPI sleep code will no longer wakeup the system. This works as intended, but this is a problem for devices where the SCI is shared with another device which is also a wakeup source. In the past these, from the pov of the ACPI sleep code, spurious SCIs would still cause a wakeup so the wakeup from the device sharing the interrupt would actually wakeup the system. This now no longer works. This is a problem on e.g. Bay Trail-T and Cherry Trail devices where some peripherals (typically the XHCI controller) can signal a Power Management Event (PME) to the Power Management Controller (PMC) to wakeup the system, this uses the same interrupt as the SCI. These wakeups are handled through a special INT0002 ACPI device which checks for events in the GPE0a_STS for this and takes care of acking the PME so that the shared interrupt stops triggering. The change to the ACPI sleep code to ignore the spurious SCI, causes the system to no longer wakeup on these PME events. To make things worse this means that the INT0002 device driver interrupt handler will no longer run, causing the PME to not get cleared and resulting in the system hanging. Trying to wakeup the system after such a PME through e.g. the power button no longer works. Add an acpi_register_wakeup_handler() function which registers a handler to be called from acpi_s2idle_wake() and when the handler returns true, return true from acpi_s2idle_wake(). The INT0002 driver will use this mechanism to check the GPE0a_STS register from acpi_s2idle_wake() and to tell the system to wakeup if a PME is signaled in the register. Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Cc: 5.4+ # 5.4+ Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0f24d701fbdc..efac0f9c01a2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -488,6 +488,11 @@ void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ +int acpi_register_wakeup_handler( + int wake_irq, bool (*wakeup)(void *context), void *context); +void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context); + struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; -- cgit v1.2.3 From 70fbdfef4ba63eeef83b2c94eac9a5a9f913e442 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Apr 2020 11:34:35 -0700 Subject: sysfs: remove redundant __compat_only_sysfs_link_entry_to_kobj fn Commit 9255782f7061 ("sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function to change the symlink name") made this function a wrapper around a new non-underscored function, which is a bit odd. The normal naming convention is the other way around: the underscored function is the wrappee, and the non-underscored function is the wrapper. There's only one single user (well, two call-sites in that user) of the more limited double underscore version of this function, so just remove the oddly named wrapper entirely and just add the extra NULL argument to the user. I considered just doing that in the merge, but that tends to make history really hard to read. Link: https://lore.kernel.org/lkml/CAHk-=wgkkmNV5tMzQDmPAQuNJBuMcry--Jb+h8H1o4RA3kF7QQ@mail.gmail.com/ Cc: Sourabh Jain Cc: Michael Ellerman Signed-off-by: Linus Torvalds --- include/linux/sysfs.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd0fcb4d4f4d..80bb865b3a33 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -297,9 +297,6 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct kobject *target, const char *link_name); void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, const char *link_name); -int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, - struct kobject *target_kobj, - const char *target_name); int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name, @@ -516,14 +513,6 @@ static inline void sysfs_remove_link_from_group(struct kobject *kobj, { } -static inline int __compat_only_sysfs_link_entry_to_kobj( - struct kobject *kobj, - struct kobject *target_kobj, - const char *target_name) -{ - return 0; -} - static inline int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, struct kobject *target_kobj, const char *target_name, -- cgit v1.2.3 From db1f00fb8ff793889e83f2e37e0c7bbb6fc9934e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 5 Apr 2020 18:59:24 -0700 Subject: skbuff.h: Improve the checksum related comments Fixed the punctuation and some typos. Improved some sentences with minor changes. No change of semantics or code. Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Randy Dunlap Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- include/linux/skbuff.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 28b1a2b4459e..3a2ac7072dbb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -47,8 +47,8 @@ * A. IP checksum related features * * Drivers advertise checksum offload capabilities in the features of a device. - * From the stack's point of view these are capabilities offered by the driver, - * a driver typically only advertises features that it is capable of offloading + * From the stack's point of view these are capabilities offered by the driver. + * A driver typically only advertises features that it is capable of offloading * to its device. * * The checksum related features are: @@ -63,7 +63,7 @@ * TCP or UDP packets over IPv4. These are specifically * unencapsulated packets of the form IPv4|TCP or * IPv4|UDP where the Protocol field in the IPv4 header - * is TCP or UDP. The IPv4 header may contain IP options + * is TCP or UDP. The IPv4 header may contain IP options. * This feature cannot be set in features for a device * with NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). @@ -79,13 +79,13 @@ * DEPRECATED (see below). * * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. - * This flag is used only used to disable the RX checksum + * This flag is only used to disable the RX checksum * feature for a device. The stack will accept receive * checksum indication in packets received on a device * regardless of whether NETIF_F_RXCSUM is set. * * B. Checksumming of received packets by device. Indication of checksum - * verification is in set skb->ip_summed. Possible values are: + * verification is set in skb->ip_summed. Possible values are: * * CHECKSUM_NONE: * @@ -115,16 +115,16 @@ * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), - * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * GRE (checksum flag is set) and TCP, skb->csum_level would be set to * two. If the device were only able to verify the UDP checksum and not - * GRE, either because it doesn't support GRE checksum of because GRE + * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * * CHECKSUM_COMPLETE: * * This is the most generic way. The device supplied checksum of the _whole_ - * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * packet as seen by netif_rx() and fills in skb->csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: @@ -153,8 +153,8 @@ * from skb->csum_start up to the end, and to record/write the checksum at * offset skb->csum_start + skb->csum_offset. A driver may verify that the * csum_start and csum_offset values are valid values given the length and - * offset of the packet, however they should not attempt to validate that the - * checksum refers to a legitimate transport layer checksum-- it is the + * offset of the packet, but it should not attempt to validate that the + * checksum refers to a legitimate transport layer checksum -- it is the * purview of the stack to validate that csum_start and csum_offset are set * correctly. * @@ -178,18 +178,18 @@ * * CHECKSUM_UNNECESSARY: * - * This has the same meaning on as CHECKSUM_NONE for checksum offload on + * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * * CHECKSUM_COMPLETE: * Not used in checksum output. If a driver observes a packet with this value - * set in skbuff, if should treat as CHECKSUM_NONE being set. + * set in skbuff, it should treat the packet as if CHECKSUM_NONE were set. * * D. Non-IP checksum (CRC) offloads * * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set set csum_start and csum_offset accordingly, set ip_summed to + * will set csum_start and csum_offset accordingly, set ip_summed to * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. * A driver that supports both IP checksum offload and SCTP CRC32c offload @@ -200,10 +200,10 @@ * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of * offloading the FCOE CRC in a packet. To perform this offload the stack * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note the there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports + * accordingly. Note that there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports * both IP checksum offload and FCOE CRC offload must verify which offload - * is configured for a packet presumably by inspecting packet headers. + * is configured for a packet, presumably by inspecting packet headers. * * E. Checksumming on output with GSO. * @@ -211,9 +211,9 @@ * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded - * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset - * are set to refer to the outermost checksum being offload (two offloaded - * checksums are possible with UDP encapsulation). + * with GSO then ip_summed is CHECKSUM_PARTIAL, and both csum_start and + * csum_offset are set to refer to the outermost checksum being offloaded + * (two offloaded checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ -- cgit v1.2.3 From 93ce4af774bc3d8a72ce2271d03241c96383629d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Apr 2020 13:39:29 -0400 Subject: NFS: Clean up process of marking inode stale. Instead of the various open coded calls to set the NFS_INO_STALE bit and call nfs_zap_caches(), consolidate them into a single function nfs_set_inode_stale(). Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5d5b91e54f73..73eda45f1cfd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -354,6 +354,7 @@ static inline unsigned long nfs_save_change_attribute(struct inode *dir) extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); extern void nfs_zap_caches(struct inode *); +extern void nfs_set_inode_stale(struct inode *inode); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); -- cgit v1.2.3 From c7e4ea68c1626cceb966323a4b572e2f8d805138 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 21 Mar 2020 17:01:53 +0100 Subject: leds: old enums are not really applicable to new code Warn about old defines that probably should not be used. Signed-off-by: Pavel Machek --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 75353e5f9d13..2451962d1ec5 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -25,6 +25,7 @@ struct device_node; * LED Core */ +/* This is obsolete/useless. We now support variable maximum brightness. */ enum led_brightness { LED_OFF = 0, LED_ON = 1, -- cgit v1.2.3 From 3f5b9959041e0db6dacbea80bb833bff5900999f Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 3 Apr 2020 22:51:33 +0200 Subject: thermal: devfreq_cooling: inline all stubs for CONFIG_DEVFREQ_THERMAL=n When CONFIG_DEVFREQ_THERMAL is disabled all functions except of_devfreq_cooling_register_power() were already inlined. Also inline the last function to avoid compile errors when multiple drivers call of_devfreq_cooling_register_power() when CONFIG_DEVFREQ_THERMAL is not set. Compilation failed with the following message: multiple definition of `of_devfreq_cooling_register_power' (which then lists all usages of of_devfreq_cooling_register_power()) Thomas Zimmermann reported this problem [0] on a kernel config with CONFIG_DRM_LIMA={m,y}, CONFIG_DRM_PANFROST={m,y} and CONFIG_DEVFREQ_THERMAL=n after both, the lima and panfrost drivers gained devfreq cooling support. [0] https://www.spinics.net/lists/dri-devel/msg252825.html Fixes: a76caf55e5b356 ("thermal: Add devfreq cooling") Cc: stable@vger.kernel.org Reported-by: Thomas Zimmermann Signed-off-by: Martin Blumenstingl Tested-by: Thomas Zimmermann Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200403205133.1101808-1-martin.blumenstingl@googlemail.com --- include/linux/devfreq_cooling.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 4635f95000a4..79a6e37a1d6f 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -75,7 +75,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct thermal_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { -- cgit v1.2.3 From 3122e80efc0faf4a2accba7a46c7ed795edbfded Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 6 Apr 2020 20:03:47 -0700 Subject: mm/vma: make vma_is_accessible() available for general use Lets move vma_is_accessible() helper to include/linux/mm.h which makes it available for general use. While here, this replaces all remaining open encodings for VMA access check with vma_is_accessible(). Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Geert Uytterhoeven Acked-by: Guo Ren Acked-by: Vlastimil Babka Cc: Guo Ren Cc: Geert Uytterhoeven Cc: Ralf Baechle Cc: Paul Burton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Steven Rostedt Cc: Mel Gorman Cc: Alexander Viro Cc: "Aneesh Kumar K.V" Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Nick Piggin Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/1582520593-30704-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7dd5c4ccbf85..be49e371e4b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -629,6 +629,12 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) return false; } + +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); +} + #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow -- cgit v1.2.3 From 29fd1897070125ab49634524a20f146fb4240a51 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 6 Apr 2020 20:04:06 -0700 Subject: mm: make it clear that gfp reclaim modifiers are valid only for sleepable allocations While it might be really clear to MM developers that gfp reclaim modifiers are applicable only to sleepable allocations (those with __GFP_DIRECT_RECLAIM) it seems that actual users of the API are not always sure. Make it explicit that they are not applicable for GFP_NOWAIT or GFP_ATOMIC allocations which are the most commonly used non-sleepable allocation masks. Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Reviewed-by: Joel Fernandes (Google) Acked-by: Paul E. McKenney Acked-by: David Rientjes Cc: Neil Brown Link: http://lkml.kernel.org/r/20200403083543.11552-3-mhocko@kernel.org Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index be2754841369..4aba4c86c626 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -124,6 +124,8 @@ struct vm_area_struct; * * Reclaim modifiers * ~~~~~~~~~~~~~~~~~ + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). * * %__GFP_IO can start physical IO. * -- cgit v1.2.3 From dcdf11ee144133328664d90836e712d840d047d9 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 6 Apr 2020 20:04:25 -0700 Subject: mm, shmem: add vmstat for hugepage fallback The existing thp_fault_fallback indicates when thp attempts to allocate a hugepage but fails, or if the hugepage cannot be charged to the mem cgroup hierarchy. Extend this to shmem as well. Adds a new thp_file_fallback to complement thp_file_alloc that gets incremented when a hugepage is attempted to be allocated but fails, or if it cannot be charged to the mem cgroup hierarchy. Additionally, remove the check for CONFIG_TRANSPARENT_HUGE_PAGECACHE from shmem_alloc_hugepage() since it is only called with this configuration option. Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Reviewed-by: Yang Shi Acked-by: Kirill A. Shutemov Cc: Mike Rapoport Cc: Jeremy Cline Cc: Andrea Arcangeli Cc: Mike Kravetz Cc: Michal Hocko Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2003061421240.7412@chino.kir.corp.google.com Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 47a3441cf4c4..41a4c7568748 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -76,6 +76,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, THP_FILE_ALLOC, + THP_FILE_FALLBACK, THP_FILE_MAPPED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, @@ -115,6 +116,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) +#define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; }) #define THP_FILE_MAPPED ({ BUILD_BUG(); 0; }) #endif -- cgit v1.2.3 From 85b9f46e8ea451633ccd60a7d8cacbfff9f34047 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 6 Apr 2020 20:04:28 -0700 Subject: mm, thp: track fallbacks due to failed memcg charges separately The thp_fault_fallback and thp_file_fallback vmstats are incremented if either the hugepage allocation fails through the page allocator or the hugepage charge fails through mem cgroup. This patch leaves this field untouched but adds two new fields, thp_{fault,file}_fallback_charge, which is incremented only when the mem cgroup charge fails. This distinguishes between attempted hugepage allocations that fail due to fragmentation (or low memory conditions) and those that fail due to mem cgroup limits. That can be used to determine the impact of fragmentation on the system by excluding faults that failed due to memcg usage. Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Reviewed-by: Yang Shi Acked-by: Kirill A. Shutemov Cc: Mike Rapoport Cc: Jeremy Cline Cc: Andrea Arcangeli Cc: Mike Kravetz Cc: Michal Hocko Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2003061422070.7412@chino.kir.corp.google.com Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 41a4c7568748..ffef0f279747 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -73,10 +73,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_TRANSPARENT_HUGEPAGE THP_FAULT_ALLOC, THP_FAULT_FALLBACK, + THP_FAULT_FALLBACK_CHARGE, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, THP_FILE_ALLOC, THP_FILE_FALLBACK, + THP_FILE_FALLBACK_CHARGE, THP_FILE_MAPPED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, @@ -117,6 +119,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; }) +#define THP_FILE_FALLBACK_CHARGE ({ BUILD_BUG(); 0; }) #define THP_FILE_MAPPED ({ BUILD_BUG(); 0; }) #endif -- cgit v1.2.3 From a0650604a707b1926cbc32feb2f006ebb74ef47b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Apr 2020 20:04:31 -0700 Subject: include/linux/pagemap.h: optimise find_subpage for !THP If THP is disabled, find_subpage() can become a no-op by using hpage_nr_pages() instead of compound_nr(). hpage_nr_pages() embeds a check for PageTail, so we can drop the check here. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Cc: Aneesh Kumar K.V Cc: Pankaj Gupta Link: http://lkml.kernel.org/r/20200318140253.6141-5-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f56282491a48..a8f7bd8ea1c6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -341,9 +341,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) if (PageHuge(head)) return head; - VM_BUG_ON_PAGE(PageTail(head), head); - - return head + (index & (compound_nr(head) - 1)); + return head + (index & (hpage_nr_pages(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -- cgit v1.2.3 From 396bcc5299c281e9cf1737ad0efcd97be9f83845 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Apr 2020 20:04:35 -0700 Subject: mm: remove CONFIG_TRANSPARENT_HUGE_PAGECACHE Commit e496cf3d7821 ("thp: introduce CONFIG_TRANSPARENT_HUGE_PAGECACHE") notes that it should be reverted when the PowerPC problem was fixed. The commit fixing the PowerPC problem (953c66c2b22a) did not revert the commit; instead setting CONFIG_TRANSPARENT_HUGE_PAGECACHE to the same as CONFIG_TRANSPARENT_HUGEPAGE. Checking with Kirill and Aneesh, this was an oversight, so remove the Kconfig symbol and undo the work of commit e496cf3d7821. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Acked-by: Kirill A. Shutemov Cc: Aneesh Kumar K.V Cc: Christoph Hellwig Cc: Pankaj Gupta Link: http://lkml.kernel.org/r/20200318140253.6141-6-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d56fefef8905..7a35a6901221 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -78,6 +78,7 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse); +extern bool shmem_huge_enabled(struct vm_area_struct *vma); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -114,15 +115,6 @@ static inline bool shmem_file(struct file *file) extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE -extern bool shmem_huge_enabled(struct vm_area_struct *vma); -#else -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) -{ - return false; -} -#endif - #ifdef CONFIG_SHMEM extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, -- cgit v1.2.3 From 9de4f22a60f731943f050f4448bf2933ed3fa70b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 6 Apr 2020 20:04:41 -0700 Subject: mm: code cleanup for MADV_FREE Some comments for MADV_FREE is revised and added to help people understand the MADV_FREE code, especially the page flag, PG_swapbacked. This makes page_is_file_cache() isn't consistent with its comments. So the function is renamed to page_is_file_lru() to make them consistent again. All these are put in one patch as one logical change. Suggested-by: David Hildenbrand Suggested-by: Johannes Weiner Suggested-by: David Rientjes Signed-off-by: "Huang, Ying" Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: David Rientjes Acked-by: Michal Hocko Acked-by: Pankaj Gupta Acked-by: Vlastimil Babka Cc: Dave Hansen Cc: Mel Gorman Cc: Minchan Kim Cc: Hugh Dickins Cc: Rik van Riel Link: http://lkml.kernel.org/r/20200317100342.2730705-1-ying.huang@intel.com Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 15 ++++++++------- include/linux/page-flags.h | 5 +++++ 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 6f2fef7b0784..219bef41d87c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,19 +6,20 @@ #include /** - * page_is_file_cache - should the page be on a file LRU or anon LRU? + * page_is_file_lru - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns 1 if @page is page cache page backed by a regular filesystem, - * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. - * Used by functions that manipulate the LRU lists, to sort a page - * onto the right LRU list. + * Returns 1 if @page is a regular filesystem backed page cache page or a lazily + * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal + * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by + * functions that manipulate the LRU lists, to sort a page onto the right LRU + * list. * * We would like to get this info without a page flag, but the state * needs to survive until the page is last deleted from the LRU, which * could be as far down as __page_cache_release. */ -static inline int page_is_file_cache(struct page *page) +static inline int page_is_file_lru(struct page *page) { return !PageSwapBacked(page); } @@ -75,7 +76,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, */ static inline enum lru_list page_lru_base_type(struct page *page) { - if (page_is_file_cache(page)) + if (page_is_file_lru(page)) return LRU_INACTIVE_FILE; return LRU_INACTIVE_ANON; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 77de28bfefb0..acf7988fd640 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -63,6 +63,11 @@ * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * + * PG_swapbacked is set when a page uses swap as a backing storage. This are + * usually PageAnon or shmem pages but please note that even anonymous pages + * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as + * a result of MADV_FREE). + * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * -- cgit v1.2.3 From a2129f24798a993abde9b4bf8b3713b52d56c121 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 6 Apr 2020 20:04:45 -0700 Subject: mm: adjust shuffle code to allow for future coalescing Patch series "mm / virtio: Provide support for free page reporting", v17. This series provides an asynchronous means of reporting free guest pages to a hypervisor so that the memory associated with those pages can be dropped and reused by other processes and/or guests on the host. Using this it is possible to avoid unnecessary I/O to disk and greatly improve performance in the case of memory overcommit on the host. When enabled we will be performing a scan of free memory every 2 seconds while pages of sufficiently high order are being freed. In each pass at least one sixteenth of each free list will be reported. By doing this we avoid racing against other threads that may be causing a high amount of memory churn. The lowest page order currently scanned when reporting pages is pageblock_order so that this feature will not interfere with the use of Transparent Huge Pages in the case of virtualization. Currently this is only in use by virtio-balloon however there is the hope that at some point in the future other hypervisors might be able to make use of it. In the virtio-balloon/QEMU implementation the hypervisor is currently using MADV_DONTNEED to indicate to the host kernel that the page is currently free. It will be zeroed and faulted back into the guest the next time the page is accessed. To track if a page is reported or not the Uptodate flag was repurposed and used as a Reported flag for Buddy pages. We walk though the free list isolating pages and adding them to the scatterlist until we either encounter the end of the list or have processed at least one sixteenth of the pages that were listed in nr_free prior to us starting. If we fill the scatterlist before we reach the end of the list we rotate the list so that the first unreported page we encounter is moved to the head of the list as that is where we will resume after we have freed the reported pages back into the tail of the list. Below are the results from various benchmarks. I primarily focused on two tests. The first is the will-it-scale/page_fault2 test, and the other is a modified version of will-it-scale/page_fault1 that was enabled to use THP. I did this as it allows for better visibility into different parts of the memory subsystem. The guest is running with 32G for RAM on one node of a E5-2630 v3. The host has had some features such as CPU turbo disabled in the BIOS. Test page_fault1 (THP) page_fault2 Name tasks Process Iter STDEV Process Iter STDEV Baseline 1 1012402.50 0.14% 361855.25 0.81% 16 8827457.25 0.09% 3282347.00 0.34% Patches Applied 1 1007897.00 0.23% 361887.00 0.26% 16 8784741.75 0.39% 3240669.25 0.48% Patches Enabled 1 1010227.50 0.39% 359749.25 0.56% 16 8756219.00 0.24% 3226608.75 0.97% Patches Enabled 1 1050982.00 4.26% 357966.25 0.14% page shuffle 16 8672601.25 0.49% 3223177.75 0.40% Patches enabled 1 1003238.00 0.22% 360211.00 0.22% shuffle w/ RFC 16 8767010.50 0.32% 3199874.00 0.71% The results above are for a baseline with a linux-next-20191219 kernel, that kernel with this patch set applied but page reporting disabled in virtio-balloon, the patches applied and page reporting fully enabled, the patches enabled with page shuffling enabled, and the patches applied with page shuffling enabled and an RFC patch that makes used of MADV_FREE in QEMU. These results include the deviation seen between the average value reported here versus the high and/or low value. I observed that during the test memory usage for the first three tests never dropped whereas with the patches fully enabled the VM would drop to using only a few GB of the host's memory when switching from memhog to page fault tests. Any of the overhead visible with this patch set enabled seems due to page faults caused by accessing the reported pages and the host zeroing the page before giving it back to the guest. This overhead is much more visible when using THP than with standard 4K pages. In addition page shuffling seemed to increase the amount of faults generated due to an increase in memory churn. The overehad is reduced when using MADV_FREE as we can avoid the extra zeroing of the pages when they are reintroduced to the host, as can be seen when the RFC is applied with shuffling enabled. The overall guest size is kept fairly small to only a few GB while the test is running. If the host memory were oversubscribed this patch set should result in a performance improvement as swapping memory in the host can be avoided. A brief history on the background of free page reporting can be found at: https://lore.kernel.org/lkml/29f43d5796feed0dec8e8bb98b187d9dac03b900.camel@linux.intel.com/ This patch (of 9): Move the head/tail adding logic out of the shuffle code and into the __free_one_page function since ultimately that is where it is really needed anyway. By doing this we should be able to reduce the overhead and can consolidate all of the list addition bits in one spot. Signed-off-by: Alexander Duyck Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Acked-by: Mel Gorman Acked-by: David Hildenbrand Cc: Yang Zhang Cc: Pankaj Gupta Cc: Konrad Rzeszutek Wilk Cc: Nitesh Narayan Lal Cc: Rik van Riel Cc: Matthew Wilcox Cc: Luiz Capitulino Cc: Dave Hansen Cc: Wei Wang Cc: Andrea Arcangeli Cc: Paolo Bonzini Cc: Michal Hocko Cc: Vlastimil Babka Cc: Oscar Salvador Cc: Michael S. Tsirkin Cc: wei qi Link: http://lkml.kernel.org/r/20200211224602.29318.84523.stgit@localhost.localdomain Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e84d448988b6..c023d7968b14 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -116,18 +116,6 @@ static inline void add_to_free_area_tail(struct page *page, struct free_area *ar area->nr_free++; } -#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR -/* Used to preserve page allocation order entropy */ -void add_to_free_area_random(struct page *page, struct free_area *area, - int migratetype); -#else -static inline void add_to_free_area_random(struct page *page, - struct free_area *area, int migratetype) -{ - add_to_free_area(page, area, migratetype); -} -#endif - /* Used for pages which are on another list */ static inline void move_to_free_area(struct page *page, struct free_area *area, int migratetype) -- cgit v1.2.3 From 6ab0136310961ebf4b5ecb565f0bf52c233dc093 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 6 Apr 2020 20:04:49 -0700 Subject: mm: use zone and order instead of free area in free_list manipulators In order to enable the use of the zone from the list manipulator functions I will need access to the zone pointer. As it turns out most of the accessors were always just being directly passed &zone->free_area[order] anyway so it would make sense to just fold that into the function itself and pass the zone and order as arguments instead of the free area. In order to be able to reference the zone we need to move the declaration of the functions down so that we have the zone defined before we define the list manipulation functions. Since the functions are only used in the file mm/page_alloc.c we can just move them there to reduce noise in the header. Signed-off-by: Alexander Duyck Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Reviewed-by: David Hildenbrand Reviewed-by: Pankaj Gupta Acked-by: Mel Gorman Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Konrad Rzeszutek Wilk Cc: Luiz Capitulino Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Nitesh Narayan Lal Cc: Oscar Salvador Cc: Paolo Bonzini Cc: Rik van Riel Cc: Vlastimil Babka Cc: Wei Wang Cc: Yang Zhang Cc: wei qi Link: http://lkml.kernel.org/r/20200211224613.29318.43080.stgit@localhost.localdomain Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c023d7968b14..42b77d3b68e8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,29 +100,6 @@ struct free_area { unsigned long nr_free; }; -/* Used for pages not on another list */ -static inline void add_to_free_area(struct page *page, struct free_area *area, - int migratetype) -{ - list_add(&page->lru, &area->free_list[migratetype]); - area->nr_free++; -} - -/* Used for pages not on another list */ -static inline void add_to_free_area_tail(struct page *page, struct free_area *area, - int migratetype) -{ - list_add_tail(&page->lru, &area->free_list[migratetype]); - area->nr_free++; -} - -/* Used for pages which are on another list */ -static inline void move_to_free_area(struct page *page, struct free_area *area, - int migratetype) -{ - list_move(&page->lru, &area->free_list[migratetype]); -} - static inline struct page *get_page_from_free_area(struct free_area *area, int migratetype) { @@ -130,15 +107,6 @@ static inline struct page *get_page_from_free_area(struct free_area *area, struct page, lru); } -static inline void del_page_from_free_area(struct page *page, - struct free_area *area) -{ - list_del(&page->lru); - __ClearPageBuddy(page); - set_page_private(page, 0); - area->nr_free--; -} - static inline bool free_area_empty(struct free_area *area, int migratetype) { return list_empty(&area->free_list[migratetype]); -- cgit v1.2.3 From 36e66c554b5c6a9d17a229faca7a61693527b0bd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 6 Apr 2020 20:04:56 -0700 Subject: mm: introduce Reported pages In order to pave the way for free page reporting in virtualized environments we will need a way to get pages out of the free lists and identify those pages after they have been returned. To accomplish this, this patch adds the concept of a Reported Buddy, which is essentially meant to just be the Uptodate flag used in conjunction with the Buddy page type. To prevent the reported pages from leaking outside of the buddy lists I added a check to clear the PageReported bit in the del_page_from_free_list function. As a result any reported page that is split, merged, or allocated will have the flag cleared prior to the PageBuddy value being cleared. The process for reporting pages is fairly simple. Once we free a page that meets the minimum order for page reporting we will schedule a worker thread to start 2s or more in the future. That worker thread will begin working from the lowest supported page reporting order up to MAX_ORDER - 1 pulling unreported pages from the free list and storing them in the scatterlist. When processing each individual free list it is necessary for the worker thread to release the zone lock when it needs to stop and report the full scatterlist of pages. To reduce the work of the next iteration the worker thread will rotate the free list so that the first unreported page in the free list becomes the first entry in the list. It will then call a reporting function providing information on how many entries are in the scatterlist. Once the function completes it will return the pages to the free area from which they were allocated and start over pulling more pages from the free areas until there are no longer enough pages to report on to keep the worker busy, or we have processed as many pages as were contained in the free area when we started processing the list. The worker thread will work in a round-robin fashion making its way though each zone requesting reporting, and through each reportable free list within that zone. Once all free areas within the zone have been processed it will check to see if there have been any requests for reporting while it was processing. If so it will reschedule the worker thread to start up again in roughly 2s and exit. Signed-off-by: Alexander Duyck Signed-off-by: Andrew Morton Acked-by: Mel Gorman Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Konrad Rzeszutek Wilk Cc: Luiz Capitulino Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Nitesh Narayan Lal Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Paolo Bonzini Cc: Rik van Riel Cc: Vlastimil Babka Cc: Wei Wang Cc: Yang Zhang Cc: wei qi Link: http://lkml.kernel.org/r/20200211224635.29318.19750.stgit@localhost.localdomain Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 11 +++++++++++ include/linux/page_reporting.h | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 include/linux/page_reporting.h (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index acf7988fd640..222f6f7b2bb3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -168,6 +168,9 @@ enum pageflags { /* non-lru isolated movable page */ PG_isolated = PG_reclaim, + + /* Only valid for buddy pages. Used to track pages that are reported */ + PG_reported = PG_uptodate, }; #ifndef __GENERATING_BOUNDS_H @@ -436,6 +439,14 @@ TESTCLEARFLAG(Young, young, PF_ANY) PAGEFLAG(Idle, idle, PF_ANY) #endif +/* + * PageReported() is used to track reported free pages within the Buddy + * allocator. We can use the non-atomic version of the test and set + * operations as both should be shielded with the zone lock to prevent + * any possible races on the setting or clearing of the bit. + */ +__PAGEFLAG(Reported, reported, PF_NO_COMPOUND) + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h new file mode 100644 index 000000000000..32355486f572 --- /dev/null +++ b/include/linux/page_reporting.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PAGE_REPORTING_H +#define _LINUX_PAGE_REPORTING_H + +#include +#include + +#define PAGE_REPORTING_CAPACITY 32 + +struct page_reporting_dev_info { + /* function that alters pages to make them "reported" */ + int (*report)(struct page_reporting_dev_info *prdev, + struct scatterlist *sg, unsigned int nents); + + /* work struct for processing reports */ + struct delayed_work work; + + /* Current state of page reporting */ + atomic_t state; +}; + +/* Tear-down and bring-up for page reporting devices */ +void page_reporting_unregister(struct page_reporting_dev_info *prdev); +int page_reporting_register(struct page_reporting_dev_info *prdev); +#endif /*_LINUX_PAGE_REPORTING_H */ -- cgit v1.2.3 From 43b76f298f023d32273c2b9c25dd83ae02711019 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 6 Apr 2020 20:05:14 -0700 Subject: mm/page_reporting: add budget limit on how many pages can be reported per pass In order to keep ourselves from reporting pages that are just going to be reused again in the case of heavy churn we can put a limit on how many total pages we will process per pass. Doing this will allow the worker thread to go into idle much more quickly so that we avoid competing with other threads that might be allocating or freeing pages. The logic added here will limit the worker thread to no more than one sixteenth of the total free pages in a given area per list. Once that limit is reached it will update the state so that at the end of the pass we will reschedule the worker to try again in 2 seconds when the memory churn has hopefully settled down. Again this optimization doesn't show much of a benefit in the standard case as the memory churn is minmal. However with page allocator shuffling enabled the gain is quite noticeable. Below are the results with a THP enabled version of the will-it-scale page_fault1 test showing the improvement in iterations for 16 processes or threads. Without: tasks processes processes_idle threads threads_idle 16 8283274.75 0.17 5594261.00 38.15 With: tasks processes processes_idle threads threads_idle 16 8767010.50 0.21 5791312.75 36.98 Signed-off-by: Alexander Duyck Signed-off-by: Andrew Morton Acked-by: Mel Gorman Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Konrad Rzeszutek Wilk Cc: Luiz Capitulino Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Nitesh Narayan Lal Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Paolo Bonzini Cc: Rik van Riel Cc: Vlastimil Babka Cc: Wei Wang Cc: Yang Zhang Cc: wei qi Link: http://lkml.kernel.org/r/20200211224719.29318.72113.stgit@localhost.localdomain Signed-off-by: Linus Torvalds --- include/linux/page_reporting.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h index 32355486f572..3b99e0ec24f2 100644 --- a/include/linux/page_reporting.h +++ b/include/linux/page_reporting.h @@ -5,6 +5,7 @@ #include #include +/* This value should always be a power of 2, see page_reporting_cycle() */ #define PAGE_REPORTING_CAPACITY 32 struct page_reporting_dev_info { -- cgit v1.2.3 From 1df319e0b4dee11436fe2ab1a0d536d3fad7cfef Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 6 Apr 2020 20:05:25 -0700 Subject: userfaultfd: wp: add helper for writeprotect check Patch series "userfaultfd: write protection support", v6. Overview ======== The uffd-wp work was initialized by Shaohua Li [1], and later continued by Andrea [2]. This series is based upon Andrea's latest userfaultfd tree, and it is a continuous works from both Shaohua and Andrea. Many of the follow up ideas come from Andrea too. Besides the old MISSING register mode of userfaultfd, the new uffd-wp support provides another alternative register mode called UFFDIO_REGISTER_MODE_WP that can be used to listen to not only missing page faults but also write protection page faults, or even they can be registered together. At the same time, the new feature also provides a new userfaultfd ioctl called UFFDIO_WRITEPROTECT which allows the userspace to write protect a range or memory or fixup write permission of faulted pages. Please refer to the document patch "userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update" for more information on the new interface and what it can do. The major workflow of an uffd-wp program should be: 1. Register a memory region with WP mode using UFFDIO_REGISTER_MODE_WP 2. Write protect part of the whole registered region using UFFDIO_WRITEPROTECT, passing in UFFDIO_WRITEPROTECT_MODE_WP to show that we want to write protect the range. 3. Start a working thread that modifies the protected pages, meanwhile listening to UFFD messages. 4. When a write is detected upon the protected range, page fault happens, a UFFD message will be generated and reported to the page fault handling thread 5. The page fault handler thread resolves the page fault using the new UFFDIO_WRITEPROTECT ioctl, but this time passing in !UFFDIO_WRITEPROTECT_MODE_WP instead showing that we want to recover the write permission. Before this operation, the fault handler thread can do anything it wants, e.g., dumps the page to a persistent storage. 6. The worker thread will continue running with the correctly applied write permission from step 5. Currently there are already two projects that are based on this new userfaultfd feature. QEMU Live Snapshot: The project provides a way to allow the QEMU hypervisor to take snapshot of VMs without stopping the VM [3]. LLNL umap library: The project provides a mmap-like interface and "allow to have an application specific buffer of pages cached from a large file, i.e. out-of-core execution using memory map" [4][5]. Before posting the patchset, this series was smoke tested against QEMU live snapshot and the LLNL umap library (by doing parallel quicksort using 128 sorting threads + 80 uffd servicing threads). My sincere thanks to Marty Mcfadden and Denis Plotnikov for the help along the way. TODO ==== - hugetlbfs/shmem support - performance - more architectures - cooperate with mprotect()-allowed processes (???) - ... References ========== [1] https://lwn.net/Articles/666187/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git/log/?h=userfault [3] https://github.com/denis-plotnikov/qemu/commits/background-snapshot-kvm [4] https://github.com/LLNL/umap [5] https://llnl-umap.readthedocs.io/en/develop/ [6] https://git.kernel.org/pub/scm/linux/kernel/git/andrea/aa.git/commit/?h=userfault&id=b245ecf6cf59156966f3da6e6b674f6695a5ffa5 [7] https://lkml.org/lkml/2018/11/21/370 [8] https://lkml.org/lkml/2018/12/30/64 This patch (of 19): Add helper for writeprotect check. Will use it later. Signed-off-by: Shaohua Li Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Rik van Riel Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mike Kravetz Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220163112.11409-2-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac9d71e24b81..5dc247af0f2e 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -52,6 +52,11 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma) return vma->vm_flags & VM_UFFD_MISSING; } +static inline bool userfaultfd_wp(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_UFFD_WP; +} + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); @@ -96,6 +101,11 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_wp(struct vm_area_struct *vma) +{ + return false; +} + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return false; -- cgit v1.2.3 From 55adf4de30346e0ec6b988cb9b885a5dddc954af Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 6 Apr 2020 20:05:37 -0700 Subject: userfaultfd: wp: userfaultfd_pte/huge_pmd_wp() helpers Implement helpers methods to invoke userfaultfd wp faults more selectively: not only when a wp fault triggers on a vma with vma->vm_flags VM_UFFD_WP set, but only if the _PAGE_UFFD_WP bit is set in the pagetable too. Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-5-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 5dc247af0f2e..7b91b76aac58 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -14,6 +14,8 @@ #include /* linux/include/uapi/linux/userfaultfd.h */ #include +#include +#include /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -57,6 +59,18 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return vma->vm_flags & VM_UFFD_WP; } +static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, + pte_t pte) +{ + return userfaultfd_wp(vma) && pte_uffd_wp(pte); +} + +static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, + pmd_t pmd) +{ + return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); +} + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); @@ -106,6 +120,19 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, + pte_t pte) +{ + return false; +} + +static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, + pmd_t pmd) +{ + return false; +} + + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return false; -- cgit v1.2.3 From 72981e0e7b609c741d7764cc920c8fec00920bd5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 6 Apr 2020 20:05:41 -0700 Subject: userfaultfd: wp: add UFFDIO_COPY_MODE_WP This allows UFFDIO_COPY to map pages write-protected. [peterx@redhat.com: switch to VM_WARN_ON_ONCE in mfill_atomic_pte; add brackets around "dst_vma->vm_flags & VM_WRITE"; fix wordings in comments and commit messages] Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-6-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 7b91b76aac58..dcd33172b728 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,7 +36,7 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - bool *mmap_changing); + bool *mmap_changing, __u64 mode); extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, -- cgit v1.2.3 From 58705444c45b3ca987b03bd9beb41bbbe41ae439 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Apr 2020 20:05:45 -0700 Subject: mm: merge parameters for change_protection() change_protection() was used by either the NUMA or mprotect() code, there's one parameter for each of the callers (dirty_accountable and prot_numa). Further, these parameters are passed along the calls: - change_protection_range() - change_p4d_range() - change_pud_range() - change_pmd_range() - ... Now we introduce a flag for change_protect() and all these helpers to replace these parameters. Then we can avoid passing multiple parameters multiple times along the way. More importantly, it'll greatly simplify the work if we want to introduce any new parameters to change_protection(). In the follow up patches, a new parameter for userfaultfd write protection will be introduced. No functional change at all. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-7-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- include/linux/mm.h | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f2df2247026a..cfbb0a87c5f0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -46,7 +46,7 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, - int prot_numa); + unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); diff --git a/include/linux/mm.h b/include/linux/mm.h index be49e371e4b5..c7d87ff5027b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1771,9 +1771,21 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, bool need_rmap_locks); + +/* + * Flags used by change_protection(). For now we make it a bitmap so + * that we can pass in multiple flags just like parameters. However + * for now all the callers are only use one of the flags at the same + * time. + */ +/* Whether we should allow dirty bit accounting */ +#define MM_CP_DIRTY_ACCT (1UL << 0) +/* Whether this protection change is for NUMA hints */ +#define MM_CP_PROT_NUMA (1UL << 1) + extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, - int dirty_accountable, int prot_numa); + unsigned long cp_flags); extern int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); -- cgit v1.2.3 From 292924b260247483a58916f6d3550d8c92f32f55 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Apr 2020 20:05:49 -0700 Subject: userfaultfd: wp: apply _PAGE_UFFD_WP bit Firstly, introduce two new flags MM_CP_UFFD_WP[_RESOLVE] for change_protection() when used with uffd-wp and make sure the two new flags are exclusively used. Then, - For MM_CP_UFFD_WP: apply the _PAGE_UFFD_WP bit and remove _PAGE_RW when a range of memory is write protected by uffd - For MM_CP_UFFD_WP_RESOLVE: remove the _PAGE_UFFD_WP bit and recover _PAGE_RW when write protection is resolved from userspace And use this new interface in mwriteprotect_range() to replace the old MM_CP_DIRTY_ACCT. Do this change for both PTEs and huge PMDs. Then we can start to identify which PTE/PMD is write protected by general (e.g., COW or soft dirty tracking), and which is for userfaultfd-wp. Since we should keep the _PAGE_UFFD_WP when doing pte_modify(), add it into _PAGE_CHG_MASK as well. Meanwhile, since we have this new bit, we can be even more strict when detecting uffd-wp page faults in either do_wp_page() or wp_huge_pmd(). After we're with _PAGE_UFFD_WP, a special case is when a page is both protected by the general COW logic and also userfault-wp. Here the userfault-wp will have higher priority and will be handled first. Only after the uffd-wp bit is cleared on the PTE/PMD will we continue to handle the general COW. These are the steps on what will happen with such a page: 1. CPU accesses write protected shared page (so both protected by general COW and uffd-wp), blocked by uffd-wp first because in do_wp_page we'll handle uffd-wp first, so it has higher priority than general COW. 2. Uffd service thread receives the request, do UFFDIO_WRITEPROTECT to remove the uffd-wp bit upon the PTE/PMD. However here we still keep the write bit cleared. Notify the blocked CPU. 3. The blocked CPU resumes the page fault process with a fault retry, during retry it'll notice it was not with the uffd-wp bit this time but it is still write protected by general COW, then it'll go though the COW path in the fault handler, copy the page, apply write bit where necessary, and retry again. 4. The CPU will be able to access this page with write bit set. Suggested-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Brian Geffon Cc: Pavel Emelyanov Cc: Mike Kravetz Cc: David Hildenbrand Cc: Martin Cracauer Cc: Mel Gorman Cc: Bobby Powers Cc: Mike Rapoport Cc: "Kirill A . Shutemov" Cc: Maya Gokhale Cc: Johannes Weiner Cc: Marty McFadden Cc: Denis Plotnikov Cc: Hugh Dickins Cc: "Dr . David Alan Gilbert" Cc: Jerome Glisse Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-8-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c7d87ff5027b..e2f938c5a9d8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1782,6 +1782,11 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, #define MM_CP_DIRTY_ACCT (1UL << 0) /* Whether this protection change is for NUMA hints */ #define MM_CP_PROT_NUMA (1UL << 1) +/* Whether this change is for write protecting */ +#define MM_CP_UFFD_WP (1UL << 2) /* do wp */ +#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */ +#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ + MM_CP_UFFD_WP_RESOLVE) extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, -- cgit v1.2.3 From f45ec5ff16a75f96dac8c89862d75f1d8739efd4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Apr 2020 20:06:01 -0700 Subject: userfaultfd: wp: support swap and page migration For either swap and page migration, we all use the bit 2 of the entry to identify whether this entry is uffd write-protected. It plays a similar role as the existing soft dirty bit in swap entries but only for keeping the uffd-wp tracking for a specific PTE/PMD. Something special here is that when we want to recover the uffd-wp bit from a swap/migration entry to the PTE bit we'll also need to take care of the _PAGE_RW bit and make sure it's cleared, otherwise even with the _PAGE_UFFD_WP bit we can't trap it at all. In change_pte_range() we do nothing for uffd if the PTE is a swap entry. That can lead to data mismatch if the page that we are going to write protect is swapped out when sending the UFFDIO_WRITEPROTECT. This patch also applies/removes the uffd-wp bit even for the swap entries. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Andrea Arcangeli Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Hugh Dickins Cc: Jerome Glisse Cc: Johannes Weiner Cc: "Kirill A . Shutemov" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mel Gorman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Pavel Emelyanov Cc: Rik van Riel Cc: Shaohua Li Link: http://lkml.kernel.org/r/20200220163112.11409-11-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 877fd239b6ff..9a6f06de183b 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); + if (pte_swp_uffd_wp(pte)) + pte = pte_swp_clear_uffd_wp(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } -- cgit v1.2.3 From ffd05793963a44bd119311df3c02b191982574ee Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 6 Apr 2020 20:06:09 -0700 Subject: userfaultfd: wp: support write protection for userfault vma range Add API to enable/disable writeprotect a vma range. Unlike mprotect, this doesn't split/merge vmas. [peterx@redhat.com: - use the helper to find VMA; - return -ENOENT if not found to match mcopy case; - use the new MM_CP_UFFD_WP* flags for change_protection - check against mmap_changing for failures - replace find_dst_vma with vma_find_uffd] Signed-off-by: Shaohua Li Signed-off-by: Andrea Arcangeli Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: Jerome Glisse Reviewed-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Bobby Powers Cc: Brian Geffon Cc: David Hildenbrand Cc: Denis Plotnikov Cc: "Dr . David Alan Gilbert" Cc: Martin Cracauer Cc: Marty McFadden Cc: Maya Gokhale Cc: Mike Kravetz Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/20200220163112.11409-13-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index dcd33172b728..a8e5f3ea9bb2 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -41,6 +41,9 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, bool *mmap_changing); +extern int mwriteprotect_range(struct mm_struct *dst_mm, + unsigned long start, unsigned long len, + bool enable_wp, bool *mmap_changing); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, -- cgit v1.2.3 From 68c3a6ac65f675b4b783635787fa0ed896f5b3d5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Apr 2020 20:06:40 -0700 Subject: drivers/base/memory.c: drop section_count Patch series "mm: drop superfluous section checks when onlining/offlining". Let's drop some superfluous section checks on the onlining/offlining path. This patch (of 3): Since commit c5e79ef561b0 ("mm/memory_hotplug.c: don't allow to online/offline memory blocks with holes") we have a generic check in offline_pages() that disallows offlining memory blocks with holes. Memory blocks with missing sections are just another variant of these type of blocks. We can stop checking (and especially storing) present sections. A proper error message is now printed why offlining failed. section_count was initially introduced in commit 07681215975e ("Driver core: Add section count to memory_block struct") in order to detect when it is okay to remove a memory block. It was used in commit 26bbe7ef6d5c ("drivers/base/memory.c: prohibit offlining of memory blocks with missing sections") to disallow offlining memory blocks with missing sections. As we refactored creation/removal of memory devices and have a proper check for holes in place, we can drop the section_count. This also removes a leftover comment regarding the mem_sysfs_mutex, which was removed in commit 848e19ad3c33 ("drivers/base/memory.c: drop the mem_sysfs_mutex"). Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Michal Hocko Cc: Dan Williams Cc: Pavel Tatashin Cc: Anshuman Khandual Link: http://lkml.kernel.org/r/20200127110424.5757-2-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 0b8d791b6669..439a89e758d8 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -26,7 +26,6 @@ struct memory_block { unsigned long start_section_nr; unsigned long state; /* serialized by the dev->lock */ - int section_count; /* serialized by mem_sysfs_mutex */ int online_type; /* for passing data to online routine */ int phys_device; /* to which fru does this belong? */ struct device dev; -- cgit v1.2.3 From 0a9f9f62316606ee827fa3318e95a1c489d9acf5 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 6 Apr 2020 20:07:06 -0700 Subject: mm/sparse.c: only use subsection map in VMEMMAP case Currently, to support subsection aligned memory region adding for pmem, subsection map is added to track which subsection is present. However, config ZONE_DEVICE depends on SPARSEMEM_VMEMMAP. It means subsection map only makes sense when SPARSEMEM_VMEMMAP enabled. For the classic sparse, it's meaningless. Even worse, it may confuse people when checking code related to the classic sparse. About the classic sparse which doesn't support subsection hotplug, Dan said it's more because the effort and maintenance burden outweighs the benefit. Besides, the current 64 bit ARCHes all enable SPARSEMEM_VMEMMAP_ENABLE by default. Combining the above reasons, no need to provide subsection map and the relevant handling for the classic sparse. Let's remove them. Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Cc: Dan Williams Cc: Michal Hocko Cc: Pankaj Gupta Cc: Wei Yang Link: http://lkml.kernel.org/r/20200312124414.439-4-bhe@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 42b77d3b68e8..f3f264826423 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1143,7 +1143,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { +#ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); +#endif /* See declaration of similar field in struct zone */ unsigned long pageblock_flags[0]; }; -- cgit v1.2.3 From 956f8b445061667c3545baa24778f890d1d522f4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Apr 2020 20:07:16 -0700 Subject: drivers/base/memory: rename MMOP_ONLINE_KEEP to MMOP_ONLINE Patch series "mm/memory_hotplug: allow to specify a default online_type", v3. Distributions nowadays use udev rules ([1] [2]) to specify if and how to online hotplugged memory. The rules seem to get more complex with many special cases. Due to the various special cases, CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE cannot be used. All memory hotplug is handled via udev rules. Every time we hotplug memory, the udev rule will come to the same conclusion. Especially Hyper-V (but also soon virtio-mem) add a lot of memory in separate memory blocks and wait for memory to get onlined by user space before continuing to add more memory blocks (to not add memory faster than it is getting onlined). This of course slows down the whole memory hotplug process. To make the job of distributions easier and to avoid udev rules that get more and more complicated, let's extend the mechanism provided by - /sys/devices/system/memory/auto_online_blocks - "memhp_default_state=" on the kernel cmdline to be able to specify also "online_movable" as well as "online_kernel" === Example /usr/libexec/config-memhotplug === #!/bin/bash VIRT=`systemd-detect-virt --vm` ARCH=`uname -p` sense_virtio_mem() { if [ -d "/sys/bus/virtio/drivers/virtio_mem/" ]; then DEVICES=`find /sys/bus/virtio/drivers/virtio_mem/ -maxdepth 1 -type l | wc -l` if [ $DEVICES != "0" ]; then return 0 fi fi return 1 } if [ ! -e "/sys/devices/system/memory/auto_online_blocks" ]; then echo "Memory hotplug configuration support missing in the kernel" exit 1 fi if grep "memhp_default_state=" /proc/cmdline > /dev/null; then echo "Memory hotplug configuration overridden in kernel cmdline (memhp_default_state=)" exit 1 fi if [ $VIRT == "microsoft" ]; then echo "Detected Hyper-V on $ARCH" # Hyper-V wants all memory in ZONE_NORMAL ONLINE_TYPE="online_kernel" elif sense_virtio_mem; then echo "Detected virtio-mem on $ARCH" # virtio-mem wants all memory in ZONE_NORMAL ONLINE_TYPE="online_kernel" elif [ $ARCH == "s390x" ] || [ $ARCH == "s390" ]; then echo "Detected $ARCH" # standby memory should not be onlined automatically ONLINE_TYPE="offline" elif [ $ARCH == "ppc64" ] || [ $ARCH == "ppc64le" ]; then echo "Detected" $ARCH # PPC64 onlines all hotplugged memory right from the kernel ONLINE_TYPE="offline" elif [ $VIRT == "none" ]; then echo "Detected bare-metal on $ARCH" # Bare metal users expect hotplugged memory to be unpluggable. We assume # that ZONE imbalances on such enterpise servers cannot happen and is # properly documented ONLINE_TYPE="online_movable" else # TODO: Hypervisors that want to unplug DIMMs and can guarantee that ZONE # imbalances won't happen echo "Detected $VIRT on $ARCH" # Usually, ballooning is used in virtual environments, so memory should go to # ZONE_NORMAL. However, sometimes "movable_node" is relevant. ONLINE_TYPE="online" fi echo "Selected online_type:" $ONLINE_TYPE # Configure what to do with memory that will be hotplugged in the future echo $ONLINE_TYPE 2>/dev/null > /sys/devices/system/memory/auto_online_blocks if [ $? != "0" ]; then echo "Memory hotplug cannot be configured (e.g., old kernel or missing permissions)" # A backup udev rule should handle old kernels if necessary exit 1 fi # Process all already pluggedd blocks (e.g., DIMMs, but also Hyper-V or virtio-mem) if [ $ONLINE_TYPE != "offline" ]; then for MEMORY in /sys/devices/system/memory/memory*; do STATE=`cat $MEMORY/state` if [ $STATE == "offline" ]; then echo $ONLINE_TYPE > $MEMORY/state fi done fi === Example /usr/lib/systemd/system/config-memhotplug.service === [Unit] Description=Configure memory hotplug behavior DefaultDependencies=no Conflicts=shutdown.target Before=sysinit.target shutdown.target After=systemd-modules-load.service ConditionPathExists=|/sys/devices/system/memory/auto_online_blocks [Service] ExecStart=/usr/libexec/config-memhotplug Type=oneshot TimeoutSec=0 RemainAfterExit=yes [Install] WantedBy=sysinit.target === Example modification to the 40-redhat.rules [2] === : diff --git a/40-redhat.rules b/40-redhat.rules-new : index 2c690e5..168fd03 100644 : --- a/40-redhat.rules : +++ b/40-redhat.rules-new : @@ -6,6 +6,9 @@ SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", ATTR{online} : # Memory hotadd request : SUBSYSTEM!="memory", GOTO="memory_hotplug_end" : ACTION!="add", GOTO="memory_hotplug_end" : +# memory hotplug behavior configured : +PROGRAM=="grep online /sys/devices/system/memory/auto_online_blocks", GOTO="memory_hotplug_end" : + : PROGRAM="/bin/uname -p", RESULT=="s390*", GOTO="memory_hotplug_end" : : ENV{.state}="online" === [1] https://github.com/lnykryn/systemd-rhel/pull/281 [2] https://github.com/lnykryn/systemd-rhel/blob/staging/rules/40-redhat.rules This patch (of 8): The name is misleading and it's not really clear what is "kept". Let's just name it like the online_type name we expose to user space ("online"). Add some documentation to the types. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Yang Reviewed-by: Baoquan He Acked-by: Pankaj Gupta Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Wei Yang Cc: Vitaly Kuznetsov Cc: Yumei Huang Cc: Igor Mammedov Cc: Eduardo Habkost Cc: Benjamin Herrenschmidt Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Michael Ellerman (powerpc) Cc: Paul Mackerras Cc: Stephen Hemminger Cc: Wei Liu Link: http://lkml.kernel.org/r/20200319131221.14044-1-david@redhat.com Link: http://lkml.kernel.org/r/20200317104942.11178-2-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index f4d59155f3d4..261dbf010d5d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -47,9 +47,13 @@ enum { /* Types for control the zone type of onlined and offlined memory */ enum { + /* Offline the memory. */ MMOP_OFFLINE = -1, - MMOP_ONLINE_KEEP, + /* Online the memory. Zone depends, see default_zone_for_pfn(). */ + MMOP_ONLINE, + /* Online the memory to ZONE_NORMAL. */ MMOP_ONLINE_KERNEL, + /* Online the memory to ZONE_MOVABLE. */ MMOP_ONLINE_MOVABLE, }; -- cgit v1.2.3 From efc978ad0e05ed6401c7854811750bf55b67f4b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Apr 2020 20:07:20 -0700 Subject: drivers/base/memory: map MMOP_OFFLINE to 0 Historically, we used the value -1. Just treat 0 as the special case now. Clarify a comment (which was wrong, when we come via device_online() the first time, the online_type would have been 0 / MEM_ONLINE). The default is now always MMOP_OFFLINE. This removes the last user of the manual "-1", which didn't use the enum value. This is a preparation to use the online_type as an array index. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Yang Reviewed-by: Baoquan He Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: Greg Kroah-Hartman Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Wei Yang Cc: Benjamin Herrenschmidt Cc: Eduardo Habkost Cc: Haiyang Zhang Cc: Igor Mammedov Cc: "K. Y. Srinivasan" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Yumei Huang Link: http://lkml.kernel.org/r/20200317104942.11178-3-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 261dbf010d5d..c2e06ed5e0e9 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -48,7 +48,7 @@ enum { /* Types for control the zone type of onlined and offlined memory */ enum { /* Offline the memory. */ - MMOP_OFFLINE = -1, + MMOP_OFFLINE = 0, /* Online the memory. Zone depends, see default_zone_for_pfn(). */ MMOP_ONLINE, /* Online the memory to ZONE_NORMAL. */ -- cgit v1.2.3 From 862919e568356cc36288a11b42cd88ec3a7100e9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Apr 2020 20:07:40 -0700 Subject: mm/memory_hotplug: convert memhp_auto_online to store an online_type ... and rename it to memhp_default_online_type. This is a preparation for more detailed default online behavior. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Yang Reviewed-by: Baoquan He Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: Greg Kroah-Hartman Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Wei Yang Cc: Benjamin Herrenschmidt Cc: Eduardo Habkost Cc: Haiyang Zhang Cc: Igor Mammedov Cc: "K. Y. Srinivasan" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Yumei Huang Link: http://lkml.kernel.org/r/20200317104942.11178-8-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c2e06ed5e0e9..c6e090b34c4b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -117,7 +117,8 @@ extern int arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions); extern u64 max_mem_size; -extern bool memhp_auto_online; +/* Default online_type (MMOP_*) when new memory blocks are added. */ +extern int memhp_default_online_type; /* If movable_node boot option specified */ extern bool movable_node_enabled; static inline bool movable_node_is_enabled(void) -- cgit v1.2.3 From 5f47adf762b78cae97de58d9ff01d2d44db09467 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Apr 2020 20:07:44 -0700 Subject: mm/memory_hotplug: allow to specify a default online_type For now, distributions implement advanced udev rules to essentially - Don't online any hotplugged memory (s390x) - Online all memory to ZONE_NORMAL (e.g., most virt environments like hyperv) - Online all memory to ZONE_MOVABLE in case the zone imbalance is taken care of (e.g., bare metal, special virt environments) In summary: All memory is usually onlined the same way, however, the kernel always has to ask user space to come up with the same answer. E.g., Hyper-V always waits for a memory block to get onlined before continuing, otherwise it might end up adding memory faster than onlining it, which can result in strange OOM situations. This waiting slows down adding of a bigger amount of memory. Let's allow to specify a default online_type, not just "online" and "offline". This allows distributions to configure the default online_type when booting up and be done with it. We can now specify "offline", "online", "online_movable" and "online_kernel" via - "memhp_default_state=" on the kernel cmdline - /sys/devices/system/memory/auto_online_blocks just like we are able to specify for a single memory block via /sys/devices/system/memory/memoryX/state Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Wei Yang Reviewed-by: Baoquan He Acked-by: Michal Hocko Acked-by: Pankaj Gupta Cc: Greg Kroah-Hartman Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Wei Yang Cc: Benjamin Herrenschmidt Cc: Eduardo Habkost Cc: Haiyang Zhang Cc: Igor Mammedov Cc: "K. Y. Srinivasan" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Yumei Huang Link: http://lkml.kernel.org/r/20200317104942.11178-9-david@redhat.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c6e090b34c4b..ef55115320fb 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -117,6 +117,8 @@ extern int arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions); extern u64 max_mem_size; +extern int memhp_online_type_from_str(const char *str); + /* Default online_type (MMOP_*) when new memory blocks are added. */ extern int memhp_default_online_type; /* If movable_node boot option specified */ -- cgit v1.2.3 From 552657b7b3343851916fde7e4fd6bfb6516d2bcb Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Mon, 6 Apr 2020 20:08:33 -0700 Subject: mm: fix ambiguous comments for better code readability The parameter of remap_pfn_range() @pfn passed from the caller is actually a page-frame number converted by corresponding physical address of kernel memory, the original comment is ambiguous that may mislead the users. Meanwhile, there is an ambiguous typo "VMM" in the comment of vm_area_struct. So fixing them will make the code more readable. Signed-off-by: chenqiwu Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/1583026921-15279-1-git-send-email-qiwuchen55@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dd555e6d23f3..4aba6c0c2ba8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -289,8 +289,8 @@ struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ /* - * This struct defines a memory VMM memory area. There is one of these - * per VM-area/task. A VM area is any part of the process virtual memory + * This struct describes a virtual memory area. There is one of these + * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). */ -- cgit v1.2.3 From 3f3673d7d324d872d9d8ddb73b3e5e47fbf12e0d Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 6 Apr 2020 20:08:43 -0700 Subject: include/linux/swapops.h: correct guards for non_swap_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DEVICE_PRIVATE is defined, but neither CONFIG_MEMORY_FAILURE nor CONFIG_MIGRATION, then non_swap_entry() will return 0, meaning that the condition (non_swap_entry(entry) && is_device_private_entry(entry)) in zap_pte_range() will never be true even if the entry is a device private one. Equally any other code depending on non_swap_entry() will not function as expected. I originally spotted this just by looking at the code, I haven't actually observed any problems. Looking a bit more closely it appears that actually this situation (currently at least) cannot occur: DEVICE_PRIVATE depends on ZONE_DEVICE ZONE_DEVICE depends on MEMORY_HOTREMOVE MEMORY_HOTREMOVE depends on MIGRATION Fixes: 5042db43cc26 ("mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory") Signed-off-by: Steven Price Signed-off-by: Andrew Morton Cc: Jérôme Glisse Cc: Arnd Bergmann Cc: Dan Williams Cc: John Hubbard Link: http://lkml.kernel.org/r/20200305130550.22693-1-steven.price@arm.com Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 9a6f06de183b..d9b7c9132c2f 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -350,7 +350,8 @@ static inline void num_poisoned_pages_inc(void) } #endif -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \ + defined(CONFIG_DEVICE_PRIVATE) static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; -- cgit v1.2.3 From 1d90b6491014ead775146726b81a78ed993c3188 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 6 Apr 2020 20:08:46 -0700 Subject: include/linux/memremap.h: remove stale comments Fixes: 80a72d0af05a ("memremap: remove the data field in struct dev_pagemap") Fixes: fdc029b19dfd ("memremap: remove the dev field in struct dev_pagemap") Signed-off-by: Ira Weiny Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Cc: Jason Gunthorpe Cc: Dan Williams Link: http://lkml.kernel.org/r/20200316213205.145333-1-ira.weiny@intel.com Signed-off-by: Linus Torvalds --- include/linux/memremap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 60d97e8fd3c0..8b37c4c9222c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -98,8 +98,6 @@ struct dev_pagemap_ops { * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref - * @dev: host device of the mapping for debug - * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table -- cgit v1.2.3 From 6218d740ac1bc723d57900b865d3e52b83550c2b Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 6 Apr 2020 20:08:52 -0700 Subject: mm: remove dummy struct bootmem_data/bootmem_data_t Both bootmem_data and bootmem_data_t structures are no longer defined. Remove the dummy forward declarations. Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Acked-by: Mike Rapoport Link: http://lkml.kernel.org/r/20200326022617.26208-1-longman@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f3f264826423..e9892bf9eba9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -664,7 +664,6 @@ struct deferred_split { * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ -struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; struct zonelist node_zonelists[MAX_ZONELISTS]; -- cgit v1.2.3 From d919b33dafb3e222d23671b2bb06d119aede625f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 6 Apr 2020 20:09:01 -0700 Subject: proc: faster open/read/close with "permanent" files Now that "struct proc_ops" exist we can start putting there stuff which could not fly with VFS "struct file_operations"... Most of fs/proc/inode.c file is dedicated to make open/read/.../close reliable in the event of disappearing /proc entries which usually happens if module is getting removed. Files like /proc/cpuinfo which never disappear simply do not need such protection. Save 2 atomic ops, 1 allocation, 1 free per open/read/close sequence for such "permanent" files. Enable "permanent" flag for /proc/cpuinfo /proc/kmsg /proc/modules /proc/slabinfo /proc/stat /proc/sysvipc/* /proc/swaps More will come once I figure out foolproof way to prevent out module authors from marking their stuff "permanent" for performance reasons when it is not. This should help with scalability: benchmark is "read /proc/cpuinfo R times by N threads scattered over the system". N R t, s (before) t, s (after) ----------------------------------------------------- 64 4096 1.582458 1.530502 -3.2% 256 4096 6.371926 6.125168 -3.9% 1024 4096 25.64888 24.47528 -4.6% Benchmark source: #include #include #include #include #include #include #include #include const int NR_CPUS = sysconf(_SC_NPROCESSORS_ONLN); int N; const char *filename; int R; int xxx = 0; int glue(int n) { cpu_set_t m; CPU_ZERO(&m); CPU_SET(n, &m); return sched_setaffinity(0, sizeof(cpu_set_t), &m); } void f(int n) { glue(n % NR_CPUS); while (*(volatile int *)&xxx == 0) { } for (int i = 0; i < R; i++) { int fd = open(filename, O_RDONLY); char buf[4096]; ssize_t rv = read(fd, buf, sizeof(buf)); asm volatile ("" :: "g" (rv)); close(fd); } } int main(int argc, char *argv[]) { if (argc < 4) { std::cerr << "usage: " << argv[0] << ' ' << "N /proc/filename R "; return 1; } N = atoi(argv[1]); filename = argv[2]; R = atoi(argv[3]); for (int i = 0; i < NR_CPUS; i++) { if (glue(i) == 0) break; } std::vector T; T.reserve(N); for (int i = 0; i < N; i++) { T.emplace_back(f, i); } auto t0 = std::chrono::system_clock::now(); { *(volatile int *)&xxx = 1; for (auto& t: T) { t.join(); } } auto t1 = std::chrono::system_clock::now(); std::chrono::duration dt = t1 - t0; std::cout << dt.count() << ' '; return 0; } P.S.: Explicit randomization marker is added because adding non-function pointer will silently disable structure layout randomization. [akpm@linux-foundation.org: coding style fixes] Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Cc: Al Viro Cc: Joe Perches Link: http://lkml.kernel.org/r/20200222201539.GA22576@avx2 Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 40a7982b7285..45c05fd9c99d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -5,6 +5,7 @@ #ifndef _LINUX_PROC_FS_H #define _LINUX_PROC_FS_H +#include #include #include @@ -12,7 +13,21 @@ struct proc_dir_entry; struct seq_file; struct seq_operations; +enum { + /* + * All /proc entries using this ->proc_ops instance are never removed. + * + * If in doubt, ignore this flag. + */ +#ifdef MODULE + PROC_ENTRY_PERMANENT = 0U, +#else + PROC_ENTRY_PERMANENT = 1U << 0, +#endif +}; + struct proc_ops { + unsigned int proc_flags; int (*proc_open)(struct inode *, struct file *); ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); @@ -25,7 +40,7 @@ struct proc_ops { #endif int (*proc_mmap)(struct file *, struct vm_area_struct *); unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); -}; +} __randomize_layout; #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From b829a0f0f2f2094c1e40637259c44b854e6ebe96 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Apr 2020 20:09:17 -0700 Subject: seq_file: remove m->version The process maps file was the only user of version (introduced back in 2005). Now that it uses ppos instead, we can remove it. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200317193201.9924-4-adobriyan@gmail.com Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 770c2bf3aa43..1672cf6f7614 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -21,7 +21,6 @@ struct seq_file { size_t pad_until; loff_t index; loff_t read_pos; - u64 version; struct mutex lock; const struct seq_operations *op; int poll_event; -- cgit v1.2.3 From 889b3c1245de48ed0cacf7aebb25c489d3e4a3e9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 6 Apr 2020 20:09:33 -0700 Subject: compiler: remove CONFIG_OPTIMIZE_INLINING entirely Commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") made this always-on option. We released v5.4 and v5.5 including that commit. Remove the CONFIG option and clean up the code now. Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Cc: Arnd Bergmann Cc: Borislav Petkov Cc: David Miller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200220110807.32534-2-masahiroy@kernel.org Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 72393a8c1a6c..e970f97a7fcb 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -129,22 +129,13 @@ struct ftrace_likely_data { #define __compiler_offsetof(a, b) __builtin_offsetof(a, b) /* - * Force always-inline if the user requests it so via the .config. * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 * semantics rather than c99. This prevents multiple symbol definition errors * of extern inline functions at link time. * A lot of inline functions can cause havoc with function tracing. - * Do not use __always_inline here, since currently it expands to inline again - * (which would break users of __always_inline). */ -#if !defined(CONFIG_OPTIMIZE_INLINING) -#define inline inline __attribute__((__always_inline__)) __gnu_inline \ - __inline_maybe_unused notrace -#else -#define inline inline __gnu_inline \ - __inline_maybe_unused notrace -#endif +#define inline inline __gnu_inline __inline_maybe_unused notrace /* * gcc provides both __inline__ and __inline as alternate spellings of -- cgit v1.2.3 From af9c5d2e3b355854ff0e4acfbfbfadcd5198a349 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 6 Apr 2020 20:09:37 -0700 Subject: compiler.h: fix error in BUILD_BUG_ON() reporting compiletime_assert() uses __LINE__ to create a unique function name. This means that if you have more than one BUILD_BUG_ON() in the same source line (which can happen if they appear e.g. in a macro), then the error message from the compiler might output the wrong condition. For this source file: #include #define macro() \ BUILD_BUG_ON(1); \ BUILD_BUG_ON(0); void foo() { macro(); } gcc would output: ./include/linux/compiler.h:350:38: error: call to `__compiletime_assert_9' declared with attribute error: BUILD_BUG_ON failed: 0 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) However, it was not the BUILD_BUG_ON(0) that failed, so it should say 1 instead of 0. With this patch, we use __COUNTER__ instead of __LINE__, so each BUILD_BUG_ON() gets a different function name and the correct condition is printed: ./include/linux/compiler.h:350:38: error: call to `__compiletime_assert_0' declared with attribute error: BUILD_BUG_ON failed: 1 _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) Signed-off-by: Vegard Nossum Signed-off-by: Andrew Morton Reviewed-by: Masahiro Yamada Reviewed-by: Daniel Santos Cc: Rasmus Villemoes Cc: Ian Abbott Cc: Joe Perches Link: http://lkml.kernel.org/r/20200331112637.25047-1-vegard.nossum@oracle.com Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5e88e7e33abe..034b0a644efc 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -347,7 +347,7 @@ static inline void *offset_to_ptr(const int *off) * compiler has support to do so. */ #define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) #define compiletime_assert_atomic_type(t) \ compiletime_assert(__native_word(t), \ -- cgit v1.2.3 From f80ac98a641a03097cbc9fdfd4b6a41a8dd3b7ae Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 6 Apr 2020 20:09:43 -0700 Subject: bitops: always inline sign extension helpers With CONFIG_CC_OPTIMIZE_FOR_SIZE, objtool reports: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.o: warning: objtool: i915_gem_execbuffer2_ioctl()+0x5b7: call to gen8_canonical_addr() with UACCESS enabled This means i915_gem_execbuffer2_ioctl() is calling gen8_canonical_addr() from the user_access_begin/end critical region (i.e, with SMAP disabled). While it's probably harmless in this case, in general we like to avoid extra function calls in SMAP-disabled regions because it can open up inadvertent security holes. Fix the warning by changing the sign extension helpers to __always_inline. This convinces GCC to inline gen8_canonical_addr(). The sign extension functions are trivial anyway, so it makes sense to always inline them. With my test optimize-for-size-based config, this actually shrinks the text size of i915_gem_execbuffer.o by 45 bytes -- and no change for vmlinux. Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Al Viro Cc: Chris Wilson Link: http://lkml.kernel.org/r/740179324b2b18b750b16295c48357f00b5fa9ed.1582982020.git.jpoimboe@redhat.com Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 47f54b459c26..9acf654f0b19 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -162,7 +162,7 @@ static inline __u8 ror8(__u8 word, unsigned int shift) * * This is safe to use for 16- and 8-bit types as well. */ -static inline __s32 sign_extend32(__u32 value, int index) +static __always_inline __s32 sign_extend32(__u32 value, int index) { __u8 shift = 31 - index; return (__s32)(value << shift) >> shift; @@ -173,7 +173,7 @@ static inline __s32 sign_extend32(__u32 value, int index) * @value: value to sign extend * @index: 0 based bit index (0<=index<64) to sign bit */ -static inline __s64 sign_extend64(__u64 value, int index) +static __always_inline __s64 sign_extend64(__u64 value, int index) { __u8 shift = 63 - index; return (__s64)(value << shift) >> shift; -- cgit v1.2.3 From 505a0ef15f96c6c43ec719c9fc1833d98957bb39 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 6 Apr 2020 20:10:22 -0700 Subject: kasan: stackdepot: move filter_irq_stacks() to stackdepot.c filter_irq_stacks() can be used by other tools (e.g. KMSAN), so it needs to be moved to a common location. lib/stackdepot.c seems a good place, as filter_irq_stacks() is usually applied to the output of stack_trace_save(). This patch has been previously mailed as part of KMSAN RFC patch series. [glider@google.co: nds32: linker script: add SOFTIRQENTRY_TEXT\ Link: http://lkml.kernel.org/r/20200311121002.241430-1-glider@google.com [glider@google.com: add IRQENTRY_TEXT and SOFTIRQENTRY_TEXT to linker script] Link: http://lkml.kernel.org/r/20200311121124.243352-1-glider@google.com Signed-off-by: Alexander Potapenko Signed-off-by: Andrew Morton Cc: Vegard Nossum Cc: Dmitry Vyukov Cc: Marco Elver Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20200220141916.55455-3-glider@google.com Signed-off-by: Linus Torvalds --- include/linux/stackdepot.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3efa97d482cb..24d49c732341 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,4 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + #endif -- cgit v1.2.3 From 7e2345200262e4a6056580f0231cccdaffc825f3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 6 Apr 2020 20:10:25 -0700 Subject: percpu_counter: fix a data race at vm_committed_as "vm_committed_as.count" could be accessed concurrently as reported by KCSAN, BUG: KCSAN: data-race in __vm_enough_memory / percpu_counter_add_batch write to 0xffffffff9451c538 of 8 bytes by task 65879 on cpu 35: percpu_counter_add_batch+0x83/0xd0 percpu_counter_add_batch at lib/percpu_counter.c:91 __vm_enough_memory+0xb9/0x260 dup_mm+0x3a4/0x8f0 copy_process+0x2458/0x3240 _do_fork+0xaa/0x9f0 __do_sys_clone+0x125/0x160 __x64_sys_clone+0x70/0x90 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffffffff9451c538 of 8 bytes by task 66773 on cpu 19: __vm_enough_memory+0x199/0x260 percpu_counter_read_positive at include/linux/percpu_counter.h:81 (inlined by) __vm_enough_memory at mm/util.c:839 mmap_region+0x1b2/0xa10 do_mmap+0x45c/0x700 vm_mmap_pgoff+0xc0/0x130 ksys_mmap_pgoff+0x6e/0x300 __x64_sys_mmap+0x33/0x40 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe The read is outside percpu_counter::lock critical section which results in a data race. Fix it by adding a READ_ONCE() in percpu_counter_read_positive() which could also service as the existing compiler memory barrier. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Acked-by: Marco Elver Link: http://lkml.kernel.org/r/1582302724-2804-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 4f052496cdfd..0a4f54dd4737 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -78,9 +78,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { - s64 ret = fbc->count; + /* Prevent reloads of fbc->count */ + s64 ret = READ_ONCE(fbc->count); - barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; return 0; -- cgit v1.2.3 From 295bcca84916cb5079140a89fccb472bb8d1f6e2 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Mon, 6 Apr 2020 20:10:38 -0700 Subject: linux/bits.h: add compile time sanity check of GENMASK inputs GENMASK() and GENMASK_ULL() are supposed to be called with the high bit as the first argument and the low bit as the second argument. Mixing them will return a mask with zero bits set. Recent commits show getting this wrong is not uncommon, see e.g. commit aa4c0c9091b0 ("net: stmmac: Fix misuses of GENMASK macro") and commit 9bdd7bb3a844 ("clocksource/drivers/npcm: Fix misuse of GENMASK macro"). To prevent such mistakes from appearing again, add compile time sanity checking to the arguments of GENMASK() and GENMASK_ULL(). If both arguments are known at compile time, and the low bit is higher than the high bit, break the build to detect the mistake immediately. Since GENMASK() is used in declarations, BUILD_BUG_ON_ZERO() must be used instead of BUILD_BUG_ON(). __builtin_constant_p does not evaluate is argument, it only checks if it is a constant or not at compile time, and __builtin_choose_expr does not evaluate the expression that is not chosen. Therefore, GENMASK(x++, 0) does only evaluate x++ once. Commit 95b980d62d52 ("linux/bits.h: make BIT(), GENMASK(), and friends available in assembly") made the macros in linux/bits.h available in assembly. Since BUILD_BUG_OR_ZERO() is not asm compatible, disable the checks if the file is included in an asm file. Due to bugs in GCC versions before 4.9 [0], disable the check if building with a too old GCC compiler. [0]: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 Signed-off-by: Rikard Falkeborn Signed-off-by: Andrew Morton Reviewed-by: Masahiro Yamada Reviewed-by: Kees Cook Cc: Borislav Petkov Cc: Geert Uytterhoeven Cc: Haren Myneni Cc: Joe Perches Cc: Johannes Berg Cc: lkml Cc: Ingo Molnar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200308193954.2372399-1-rikard.falkeborn@gmail.com Signed-off-by: Linus Torvalds --- include/linux/bits.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bits.h b/include/linux/bits.h index a740bbcf3cd2..4671fbf28842 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,12 +18,30 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) \ +#if !defined(__ASSEMBLY__) && \ + (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#include +#define GENMASK_INPUT_CHECK(h, l) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __builtin_constant_p((l) > (h)), (l) > (h), 0))) +#else +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK_INPUT_CHECK(h, l) 0 +#endif + +#define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ +#define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) #endif /* __LINUX_BITS_H */ -- cgit v1.2.3 From a13f58a0cafa7b0416a2898bc3b0defbb305d108 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 3 Mar 2020 11:54:27 +0100 Subject: locking/refcount: Document interaction with PID_MAX_LIMIT Document the circumstances under which refcount_t's saturation mechanism works deterministically. Acked-by: Kees Cook Acked-by: Will Deacon Signed-off-by: Jann Horn Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200303105427.260620-1-jannh@google.com --- include/linux/refcount.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 0ac50cf62d06..0e3ee25eb156 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -38,11 +38,24 @@ * atomic operations, then the count will continue to edge closer to 0. If it * reaches a value of 1 before /any/ of the threads reset it to the saturated * value, then a concurrent refcount_dec_and_test() may erroneously free the - * underlying object. Given the precise timing details involved with the - * round-robin scheduling of each thread manipulating the refcount and the need - * to hit the race multiple times in succession, there doesn't appear to be a - * practical avenue of attack even if using refcount_add() operations with - * larger increments. + * underlying object. + * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently + * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK). + * With the current PID limit, if no batched refcounting operations are used and + * the attacker can't repeatedly trigger kernel oopses in the middle of refcount + * operations, this makes it impossible for a saturated refcount to leave the + * saturation range, even if it is possible for multiple uses of the same + * refcount to nest in the context of a single task: + * + * (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT = + * 0x40000000 / 0x400000 = 0x100 = 256 + * + * If hundreds of references are added/removed with a single refcounting + * operation, it may potentially be possible to leave the saturation range; but + * given the precise timing details involved with the round-robin scheduling of + * each thread manipulating the refcount and the need to hit the race multiple + * times in succession, there doesn't appear to be a practical avenue of attack + * even if using refcount_add() operations with larger increments. * * Memory ordering * =============== -- cgit v1.2.3 From 63f818f46af9f8b3f17b9695501e8d08959feb60 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Apr 2020 09:43:04 -0500 Subject: proc: Use a dedicated lock in struct pid syzbot wrote: > ======================================================== > WARNING: possible irq lock inversion dependency detected > 5.6.0-syzkaller #0 Not tainted > -------------------------------------------------------- > swapper/1/0 just changed the state of lock: > ffffffff898090d8 (tasklist_lock){.+.?}-{2:2}, at: send_sigurg+0x9f/0x320 fs/fcntl.c:840 > but this lock took another, SOFTIRQ-unsafe lock in the past: > (&pid->wait_pidfd){+.+.}-{2:2} > > > and interrupts could create inverse lock ordering between them. > > > other info that might help us debug this: > Possible interrupt unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&pid->wait_pidfd); > local_irq_disable(); > lock(tasklist_lock); > lock(&pid->wait_pidfd); > > lock(tasklist_lock); > > *** DEADLOCK *** > > 4 locks held by swapper/1/0: The problem is that because wait_pidfd.lock is taken under the tasklist lock. It must always be taken with irqs disabled as tasklist_lock can be taken from interrupt context and if wait_pidfd.lock was already taken this would create a lock order inversion. Oleg suggested just disabling irqs where I have added extra calls to wait_pidfd.lock. That should be safe and I think the code will eventually do that. It was rightly pointed out by Christian that sharing the wait_pidfd.lock was a premature optimization. It is also true that my pre-merge window testing was insufficient. So remove the premature optimization and give struct pid a dedicated lock of it's own for struct pid things. I have verified that lockdep sees all 3 paths where we take the new pid->lock and lockdep does not complain. It is my current day dream that one day pid->lock can be used to guard the task lists as well and then the tasklist_lock won't need to be held to deliver signals. That will require taking pid->lock with irqs disabled. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/00000000000011d66805a25cd73f@google.com/ Cc: Oleg Nesterov Cc: Christian Brauner Reported-by: syzbot+343f75cdeea091340956@syzkaller.appspotmail.com Reported-by: syzbot+832aabf700bc3ec920b9@syzkaller.appspotmail.com Reported-by: syzbot+f675f964019f884dbd0f@syzkaller.appspotmail.com Reported-by: syzbot+a9fb1457d720a55d6dc5@syzkaller.appspotmail.com Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc") Signed-off-by: "Eric W. Biederman" --- include/linux/pid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 01a0d4e28506..cc896f0fc4e3 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -60,6 +60,7 @@ struct pid { refcount_t count; unsigned int level; + spinlock_t lock; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; -- cgit v1.2.3 From ab6f762f0f53162d41497708b33c9a3236d3609e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 3 Mar 2020 20:30:02 +0900 Subject: printk: queue wake_up_klogd irq_work only if per-CPU areas are ready printk_deferred(), similarly to printk_safe/printk_nmi, does not immediately attempt to print a new message on the consoles, avoiding calls into non-reentrant kernel paths, e.g. scheduler or timekeeping, which potentially can deadlock the system. Those printk() flavors, instead, rely on per-CPU flush irq_work to print messages from safer contexts. For same reasons (recursive scheduler or timekeeping calls) printk() uses per-CPU irq_work in order to wake up user space syslog/kmsg readers. However, only printk_safe/printk_nmi do make sure that per-CPU areas have been initialised and that it's safe to modify per-CPU irq_work. This means that, for instance, should printk_deferred() be invoked "too early", that is before per-CPU areas are initialised, printk_deferred() will perform illegal per-CPU access. Lech Perczak [0] reports that after commit 1b710b1b10ef ("char/random: silence a lockdep splat with printk()") user-space syslog/kmsg readers are not able to read new kernel messages. The reason is printk_deferred() being called too early (as was pointed out by Petr and John). Fix printk_deferred() and do not queue per-CPU irq_work before per-CPU areas are initialized. Link: https://lore.kernel.org/lkml/aa0732c6-5c4e-8a8b-a1c1-75ebe3dca05b@camlintechnologies.com/ Reported-by: Lech Perczak Signed-off-by: Sergey Senozhatsky Tested-by: Jann Horn Reviewed-by: Petr Mladek Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Cc: John Ogness Signed-off-by: Linus Torvalds --- include/linux/printk.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 1e6108b8d15f..e061635e0409 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -202,7 +202,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; -extern void printk_safe_init(void); extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); #else @@ -269,10 +268,6 @@ static inline void dump_stack(void) { } -static inline void printk_safe_init(void) -{ -} - static inline void printk_safe_flush(void) { } -- cgit v1.2.3 From 2370ae4b1d5aa7eb70bd7539a420e791d4b0123b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 10 Apr 2020 14:32:25 -0700 Subject: docs: mm: slab.h: fix a broken cross-reference There is a typo at the cross-reference link, causing this warning: include/linux/slab.h:11: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/0aeac24235d356ebd935d11e147dcc6edbb6465c.1586359676.git.mchehab+huawei@kernel.org Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 03a389358562..6d454886bcaf 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -501,7 +501,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * :ref:`Documentation/core-api/mm-api.rst ` * * The recommended usage of the @flags is described at - * :ref:`Documentation/core-api/memory-allocation.rst ` + * :ref:`Documentation/core-api/memory-allocation.rst ` * * Below is a brief outline of the most useful GFP flags * -- cgit v1.2.3 From 8676af1ff2d28e64e5636147821bda7524cf007d Mon Sep 17 00:00:00 2001 From: Aslan Bakirov Date: Fri, 10 Apr 2020 14:32:42 -0700 Subject: mm: cma: NUMA node interface I've noticed that there is no interface exposed by CMA which would let me to declare contigous memory on particular NUMA node. This patchset adds the ability to try to allocate contiguous memory on a specific node. It will fallback to other nodes if the specified one doesn't work. Implement a new method for declaring contigous memory on particular node and keep cma_declare_contiguous() as a wrapper. [akpm@linux-foundation.org: build fix] Signed-off-by: Aslan Bakirov Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Andreas Schaufler Cc: Mike Kravetz Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-2-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/cma.h | 14 ++++++++++++-- include/linux/memblock.h | 3 +++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 190184b5ff32..6ff79fefd01f 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -4,6 +4,7 @@ #include #include +#include /* * There is always at least global CMA area and a few optional @@ -24,10 +25,19 @@ extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); extern const char *cma_get_name(const struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t base, +extern int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma, + int nid); +static inline int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma) +{ + return cma_declare_contiguous_nid(base, size, limit, alignment, + order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); +} extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 079d17d96410..6bc37a731d27 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -348,6 +348,9 @@ static inline int memblock_get_region_node(const struct memblock_region *r) phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); +phys_addr_t memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, -- cgit v1.2.3 From cf11e85fc08cc6a4fe3ac2ba2e610c962bf20bc3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 10 Apr 2020 14:32:45 -0700 Subject: mm: hugetlb: optionally allocate gigantic hugepages using cma Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime") has added the run-time allocation of gigantic pages. However it actually works only at early stages of the system loading, when the majority of memory is free. After some time the memory gets fragmented by non-movable pages, so the chances to find a contiguous 1GB block are getting close to zero. Even dropping caches manually doesn't help a lot. At large scale rebooting servers in order to allocate gigantic hugepages is quite expensive and complex. At the same time keeping some constant percentage of memory in reserved hugepages even if the workload isn't using it is a big waste: not all workloads can benefit from using 1 GB pages. The following solution can solve the problem: 1) On boot time a dedicated cma area* is reserved. The size is passed as a kernel argument. 2) Run-time allocations of gigantic hugepages are performed using the cma allocator and the dedicated cma area In this case gigantic hugepages can be allocated successfully with a high probability, however the memory isn't completely wasted if nobody is using 1GB hugepages: it can be used for pagecache, anon memory, THPs, etc. * On a multi-node machine a per-node cma area is allocated on each node. Following gigantic hugetlb allocation are using the first available numa node if the mask isn't specified by a user. Usage: 1) configure the kernel to allocate a cma area for hugetlb allocations: pass hugetlb_cma=10G as a kernel argument 2) allocate hugetlb pages as usual, e.g. echo 10 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages If the option isn't enabled or the allocation of the cma area failed, the current behavior of the system is preserved. x86 and arm-64 are covered by this patch, other architectures can be trivially added later. The patch contains clean-ups and fixes proposed and implemented by Aslan Bakirov and Randy Dunlap. It also contains ideas and suggestions proposed by Rik van Riel, Michal Hocko and Mike Kravetz. Thanks! Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Tested-by: Andreas Schaufler Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Aslan Bakirov Cc: Randy Dunlap Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-3-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5ea05879a0a9..43a1cef8f0f1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, return ptl; } +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) +extern void __init hugetlb_cma_reserve(int order); +extern void __init hugetlb_cma_check(void); +#else +static inline __init void hugetlb_cma_reserve(int order) +{ +} +static inline __init void hugetlb_cma_check(void) +{ +} +#endif + #endif /* _LINUX_HUGETLB_H */ -- cgit v1.2.3 From 8cd3984d81d5fd5e18bccb12d7d228a114ec2508 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:33:01 -0700 Subject: mm/memory.c: add vm_insert_pages() Add the ability to insert multiple pages at once to a user VM with lower PTE spinlock operations. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. [akpm@linux-foundation.org: pte_alloc() no longer takes the `addr' argument] [arjunroy@google.com: add missing page_count() check to vm_insert_pages()] Link: http://lkml.kernel.org/r/20200214005929.104481-1-arjunroy.kdev@gmail.com [arjunroy@google.com: vm_insert_pages() checks if pte_index defined] Link: http://lkml.kernel.org/r/20200228054714.204424-2-arjunroy.kdev@gmail.com Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-2-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e2f938c5a9d8..ed896cedd4c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2689,6 +2689,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, -- cgit v1.2.3 From c62da0c35d58518ddb26ff641d2485596567fd96 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:05 -0700 Subject: mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS There are many platforms with exact same value for VM_DATA_DEFAULT_FLAGS This creates a default value for VM_DATA_DEFAULT_FLAGS in line with the existing VM_STACK_DEFAULT_FLAGS. While here, also define some more macros with standard VMA access flag combinations that are used frequently across many platforms. Apart from simplification, this reduces code duplication as well. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Ley Foon Tan Cc: Jonas Bonn Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Walmsley Cc: Heiko Carstens Cc: Rich Felker Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Jeff Dike Cc: Chris Zankel Link: http://lkml.kernel.org/r/1583391014-8170-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed896cedd4c4..33076fa149c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -343,6 +343,20 @@ extern unsigned int kobjsize(const void *objp); /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif -- cgit v1.2.3 From 6cb4d9a2870d2062e34c93bfef4d52fca3fe42d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:09 -0700 Subject: mm/vma: introduce VM_ACCESS_FLAGS There are many places where all basic VMA access flags (read, write, exec) are initialized or checked against as a group. One such example is during page fault. Existing vma_is_accessible() wrapper already creates the notion of VMA accessibility as a group access permissions. Hence lets just create VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code duplication but also extend the VMA accessibility concept in general. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Nick Hu Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Guan Xuetao Cc: Dave Hansen Cc: Thomas Gleixner Cc: Rob Springer Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/1583391014-8170-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 33076fa149c8..4db1522d7c48 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -369,6 +369,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + + /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -646,7 +650,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) static inline bool vma_is_accessible(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + return vma->vm_flags & VM_ACCESS_FLAGS; } #ifdef CONFIG_SHMEM -- cgit v1.2.3 From 78e7c5af080b86e9f28afac5a8307ddab1d2c1a3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:13 -0700 Subject: mm/special: create generic fallbacks for pte_special() and pte_mkspecial() Currently there are many platforms that dont enable ARCH_HAS_PTE_SPECIAL but required to define quite similar fallback stubs for special page table entry helpers such as pte_special() and pte_mkspecial(), as they get build in generic MM without a config check. This creates two generic fallback stub definitions for these helpers, eliminating much code duplication. mips platform has a special case where pte_special() and pte_mkspecial() visibility is wider than what ARCH_HAS_PTE_SPECIAL enablement requires. This restricts those symbol visibility in order to avoid redefinitions which is now exposed through this new generic stubs and subsequent build failure. arm platform set_pte_at() definition needs to be moved into a C file just to prevent a build failure. [anshuman.khandual@arm.com: use defined(CONFIG_ARCH_HAS_PTE_SPECIAL) in mips per Thomas] Link: http://lkml.kernel.org/r/1583851924-21603-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Guo Ren [csky] Acked-by: Geert Uytterhoeven [m68k] Acked-by: Stafford Horne [openrisc] Acked-by: Helge Deller [parisc] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Russell King Cc: Brian Cain Cc: Tony Luck Cc: Fenghua Yu Cc: Sam Creasey Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: "James E.J. Bottomley" Cc: "David S. Miller" Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Guan Xuetao Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Bogendoerfer Link: http://lkml.kernel.org/r/1583802551-15406-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4db1522d7c48..5a323422d783 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1927,6 +1927,18 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline int pte_special(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} +#endif + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { -- cgit v1.2.3 From 96c6b598135e7cec66161e8943823470c7c8954e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:17 -0700 Subject: mm/memory_hotplug: drop the flags field from struct mhp_restrictions Patch series "Allow setting caching mode in arch_add_memory() for P2PDMA", v4. Currently, the page tables created using memremap_pages() are always created with the PAGE_KERNEL cacheing mode. However, the P2PDMA code is creating pages for PCI BAR memory which should never be accessed through the cache and instead use either WC or UC. This still works in most cases, on x86, because the MTRR registers typically override the caching settings in the page tables for all of the IO memory to be UC-. However, this tends not to work so well on other arches or some rare x86 machines that have firmware which does not setup the MTRR registers in this way. Instead of this, this series proposes a change to arch_add_memory() to take the pgprot required by the mapping which allows us to explicitly set pagetable entries for P2PDMA memory to UC. This changes is pretty routine for most of the arches: x86_64, arm64 and powerpc simply need to thread the pgprot through to where the page tables are setup. x86_32 unfortunately sets up the page tables at boot so must use _set_memory_prot() to change their caching mode. ia64, s390 and sh don't appear to have an easy way to change the page tables so, for now at least, we just return -EINVAL on such mappings and thus they will not support P2PDMA memory until the work for this is done. This should be fine as they don't yet support ZONE_DEVICE. This patch (of 7): This variable is not used anywhere and should therefore be removed from the structure. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20200306170846.9333-2-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ef55115320fb..7c1bcff11672 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -59,11 +59,9 @@ enum { /* * Restrictions for the memory hotplug: - * flags: MHP_ flags * altmap: alternative allocator for memmap array */ struct mhp_restrictions { - unsigned long flags; struct vmem_altmap *altmap; }; -- cgit v1.2.3 From f5637d3b42ab0465ef71d5fb8461bce97fba95e8 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:21 -0700 Subject: mm/memory_hotplug: rename mhp_restrictions to mhp_params The mhp_restrictions struct really doesn't specify anything resembling a restriction anymore so rename it to be mhp_params as it is a list of extended parameters. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-3-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7c1bcff11672..75f0f6304735 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -58,10 +58,10 @@ enum { }; /* - * Restrictions for the memory hotplug: - * altmap: alternative allocator for memmap array + * Extended parameters for memory hotplug: + * altmap: alternative allocator for memmap array (optional) */ -struct mhp_restrictions { +struct mhp_params { struct vmem_altmap *altmap; }; @@ -112,7 +112,7 @@ extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions); + struct mhp_params *params); extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); @@ -133,17 +133,17 @@ extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) + unsigned long nr_pages, struct mhp_params *params) { - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA -- cgit v1.2.3 From bfeb022f8fe4c5afdcfd7a3d868fac9765f9bcad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:36 -0700 Subject: mm/memory_hotplug: add pgprot_t to mhp_params devm_memremap_pages() is currently used by the PCI P2PDMA code to create struct page mappings for IO memory. At present, these mappings are created with PAGE_KERNEL which implies setting the PAT bits to be WB. However, on x86, an mtrr register will typically override this and force the cache type to be UC-. In the case firmware doesn't set this register it is effectively WB and will typically result in a machine check exception when it's accessed. Other arches are not currently likely to function correctly seeing they don't have any MTRR registers to fall back on. To solve this, provide a way to specify the pgprot value explicitly to arch_add_memory(). Of the arches that support MEMORY_HOTPLUG: x86_64, and arm64 need a simple change to pass the pgprot_t down to their respective functions which set up the page tables. For x86_32, set the page tables explicitly using _set_memory_prot() (seeing they are already mapped). For ia64, s390 and sh, reject anything but PAGE_KERNEL settings -- this should be fine, for now, seeing these architectures don't support ZONE_DEVICE. A check in __add_pages() is also added to ensure the pgprot parameter was set for all arches. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Dan Williams Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-7-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 75f0f6304735..93d9ada74ddd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -60,9 +60,12 @@ enum { /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) + * pgprot: page protection flags to apply to newly created page tables + * (required) */ struct mhp_params { struct vmem_altmap *altmap; + pgprot_t pgprot; }; /* -- cgit v1.2.3 From 149ed3d404c9bd00f0fadc35215a9e7a54c5cfd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 10 Apr 2020 14:34:00 -0700 Subject: =?UTF-8?q?change=20email=20address=20for=20Pali=20Roh=C3=A1r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For security reasons I stopped using gmail account and kernel address is now up-to-date alias to my personal address. People periodically send me emails to address which they found in source code of drivers, so this change reflects state where people can contact me. [ Added .mailmap entry as per Joe Perches - Linus ] Signed-off-by: Pali Rohár Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Joe Perches Link: http://lkml.kernel.org/r/20200307104237.8199-1-pali@kernel.org Signed-off-by: Linus Torvalds --- include/linux/power/bq2415x_charger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 7a91b357e3ac..4ca08321e251 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár */ #ifndef BQ2415X_CHARGER_H -- cgit v1.2.3 From eec8fd0277e37cf447b88c6be181e81df867bcf1 Mon Sep 17 00:00:00 2001 From: Odin Ugedal Date: Fri, 3 Apr 2020 19:55:28 +0200 Subject: device_cgroup: Cleanup cgroup eBPF device filter code Original cgroup v2 eBPF code for filtering device access made it possible to compile with CONFIG_CGROUP_DEVICE=n and still use the eBPF filtering. Change commit 4b7d4d453fc4 ("device_cgroup: Export devcgroup_check_permission") reverted this, making it required to set it to y. Since the device filtering (and all the docs) for cgroup v2 is no longer a "device controller" like it was in v1, someone might compile their kernel with CONFIG_CGROUP_DEVICE=n. Then (for linux 5.5+) the eBPF filter will not be invoked, and all processes will be allowed access to all devices, no matter what the eBPF filter says. Signed-off-by: Odin Ugedal Acked-by: Roman Gushchin Signed-off-by: Tejun Heo --- include/linux/device_cgroup.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index fa35b52e0002..9a72214496e5 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include -#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -11,16 +10,10 @@ #define DEVCG_DEV_CHAR 2 #define DEVCG_DEV_ALL 4 /* this represents all devices */ -#ifdef CONFIG_CGROUP_DEVICE -int devcgroup_check_permission(short type, u32 major, u32 minor, - short access); -#else -static inline int devcgroup_check_permission(short type, u32 major, u32 minor, - short access) -{ return 0; } -#endif #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) +int devcgroup_check_permission(short type, u32 major, u32 minor, + short access); static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; @@ -61,6 +54,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev) } #else +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ return 0; } static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } static inline int devcgroup_inode_mknod(int mode, dev_t dev) -- cgit v1.2.3 From 07d8350ede4c4c29634b26c163a1eecdf39dfcfb Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Fri, 27 Mar 2020 21:41:16 +0530 Subject: genirq: Remove setup_irq() and remove_irq() Now that all the users of setup_irq() & remove_irq() have been replaced by request_irq() & free_irq() respectively, delete them. Signed-off-by: afzal mohammed Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Link: https://lkml.kernel.org/r/0aa8771ada1ac8e1312f6882980c9c08bd023148.1585320721.git.afzal.mohd.ma@gmail.com --- include/linux/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 9315fbb87db3..c63c2aa915ff 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -573,8 +573,6 @@ enum { #define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS struct irqaction; -extern int setup_irq(unsigned int irq, struct irqaction *new); -extern void remove_irq(unsigned int irq, struct irqaction *act); extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); -- cgit v1.2.3 From b6467ab142b708dd076f6186ca274f14af379c72 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Apr 2020 23:40:58 -0700 Subject: KVM: Check validity of resolved slot when searching memslots Check that the resolved slot (somewhat confusingly named 'start') is a valid/allocated slot before doing the final comparison to see if the specified gfn resides in the associated slot. The resolved slot can be invalid if the binary search loop terminated because the search index was incremented beyond the number of used slots. This bug has existed since the binary search algorithm was introduced, but went unnoticed because KVM statically allocated memory for the max number of slots, i.e. the access would only be truly out-of-bounds if all possible slots were allocated and the specified gfn was less than the base of the lowest memslot. Commit 36947254e5f98 ("KVM: Dynamically size memslot array based on number of used slots") eliminated the "all possible slots allocated" condition and made the bug embarrasingly easy to hit. Fixes: 9c1a5d38780e6 ("kvm: optimize GFN to memslot lookup with large slots amount") Reported-by: syzbot+d889b59b2bb87d4047a2@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200408064059.8957-2-sean.j.christopherson@intel.com> Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6d58beb65454..01276e3d01b9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1048,7 +1048,7 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } - if (gfn >= memslots[start].base_gfn && + if (start < slots->used_slots && gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); return &memslots[start]; -- cgit v1.2.3 From 3c1d1613be80c2e17f1ddf672df1d8a8caebfd0d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 6 Apr 2020 14:25:31 +0200 Subject: i2c: remove i2c_new_probed_device API All in-tree users have been converted to the new i2c_new_scanned_device function, so remove this deprecated one. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 456fc17ecb1c..45d36ba4826b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -461,12 +461,6 @@ i2c_new_scanned_device(struct i2c_adapter *adap, unsigned short const *addr_list, int (*probe)(struct i2c_adapter *adap, unsigned short addr)); -struct i2c_client * -i2c_new_probed_device(struct i2c_adapter *adap, - struct i2c_board_info *info, - unsigned short const *addr_list, - int (*probe)(struct i2c_adapter *adap, unsigned short addr)); - /* Common custom probe functions */ int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); -- cgit v1.2.3 From 20d60f6364474a978ab2a2146fb4c2bd9b6bbe3f Mon Sep 17 00:00:00 2001 From: Maciej Grochowski Date: Tue, 14 Apr 2020 00:17:03 -0400 Subject: include/linux/dmaengine: Typos fixes in API documentation Signed-off-by: Maciej Grochowski Link: https://lore.kernel.org/r/20200414041703.6661-1-maciek.grochowski@gmail.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 21065c04c4ac..31e58ec9f741 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -83,9 +83,9 @@ enum dma_transfer_direction { /** * Interleaved Transfer Request * ---------------------------- - * A chunk is collection of contiguous bytes to be transfered. + * A chunk is collection of contiguous bytes to be transferred. * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG). - * ICGs may or maynot change between chunks. + * ICGs may or may not change between chunks. * A FRAME is the smallest series of contiguous {chunk,icg} pairs, * that when repeated an integral number of times, specifies the transfer. * A transfer template is specification of a Frame, the number of times @@ -1069,7 +1069,7 @@ static inline int dmaengine_terminate_all(struct dma_chan *chan) * dmaengine_synchronize() needs to be called before it is safe to free * any memory that is accessed by previously submitted descriptors or before * freeing any resources accessed from within the completion callback of any - * perviously submitted descriptors. + * previously submitted descriptors. * * This function can be called from atomic context as well as from within a * complete callback of a descriptor submitted on the same channel. @@ -1091,7 +1091,7 @@ static inline int dmaengine_terminate_async(struct dma_chan *chan) * * Synchronizes to the DMA channel termination to the current context. When this * function returns it is guaranteed that all transfers for previously issued - * descriptors have stopped and and it is safe to free the memory assoicated + * descriptors have stopped and it is safe to free the memory associated * with them. Furthermore it is guaranteed that all complete callback functions * for a previously submitted descriptor have finished running and it is safe to * free resources accessed from within the complete callbacks. -- cgit v1.2.3 From d87f639258a6a5980183f11876c884931ad93da2 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Feb 2020 16:14:11 -0800 Subject: ext4: use non-movable memory for superblock readahead Since commit a8ac900b8163 ("ext4: use non-movable memory for the superblock") buffers for ext4 superblock were allocated using the sb_bread_unmovable() helper which allocated buffer heads out of non-movable memory blocks. It was necessarily to not block page migrations and do not cause cma allocation failures. However commit 85c8f176a611 ("ext4: preload block group descriptors") broke this by introducing pre-reading of the ext4 superblock. The problem is that __breadahead() is using __getblk() underneath, which allocates buffer heads out of movable memory. It resulted in page migration failures I've seen on a machine with an ext4 partition and a preallocated cma area. Fix this by introducing sb_breadahead_unmovable() and __breadahead_gfp() helpers which use non-movable memory for buffer head allocations and use them for the ext4 superblock readahead. Reviewed-by: Andreas Dilger Fixes: 85c8f176a611 ("ext4: preload block group descriptors") Signed-off-by: Roman Gushchin Link: https://lore.kernel.org/r/20200229001411.128010-1-guro@fb.com Signed-off-by: Theodore Ts'o --- include/linux/buffer_head.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e0b020eaf32e..15b765a181b8 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -189,6 +189,8 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); +void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, + gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -319,6 +321,12 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline void +sb_breadahead_unmovable(struct super_block *sb, sector_t block) +{ + __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { -- cgit v1.2.3 From 96806229ca033f85310bc5c203410189f8a1d2ee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 10 Apr 2020 11:13:26 +0100 Subject: irqchip/gic-v4.1: Add support for VPENDBASER's Dirty+Valid signaling When a vPE is made resident, the GIC starts parsing the virtual pending table to deliver pending interrupts. This takes place asynchronously, and can at times take a long while. Long enough that the vcpu enters the guest and hits WFI before any interrupt has been signaled yet. The vcpu then exits, blocks, and now gets a doorbell. Rince, repeat. In order to avoid the above, a (optional on GICv4, mandatory on v4.1) feature allows the GIC to feedback to the hypervisor whether it is done parsing the VPT by clearing the GICR_VPENDBASER.Dirty bit. The hypervisor can then wait until the GIC is ready before actually running the vPE. Plug the detection code as well as polling on vPE schedule. While at it, tidy-up the kernel message that displays the GICv4 optional features. Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 765d9b769b69..6c36b6cc3edf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -243,6 +243,7 @@ #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) #define GICR_TYPER_DirectLPIS (1U << 3) #define GICR_TYPER_LAST (1U << 4) #define GICR_TYPER_RVPEID (1U << 7) @@ -686,6 +687,7 @@ struct rdists { bool has_vlpis; bool has_rvpeid; bool has_direct_lpi; + bool has_vpend_valid_dirty; }; struct irq_domain; -- cgit v1.2.3 From 3302363a27fb38a3581921a74aff855f4dcbfe0a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 1 Apr 2020 12:46:22 -0400 Subject: virtio/test: fix up after IOTLB changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow building vringh without IOTLB (that's the case for userspace builds, will be useful for CAIF/VOD down the road too). Update for API tweaks. Don't include vringh with userspace builds. Cc: Jason Wang Cc: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/linux/vringh.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index bd0503ca6f8f..9e2763d7c159 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -14,8 +14,10 @@ #include #include #include +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include #include +#endif #include /* virtio_ring with information needed for host access. */ @@ -254,6 +256,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } +#if IS_REACHABLE(CONFIG_VHOST_IOTLB) + void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); int vringh_init_iotlb(struct vringh *vrh, u64 features, @@ -284,4 +288,6 @@ void vringh_notify_disable_iotlb(struct vringh *vrh); int vringh_need_notify_iotlb(struct vringh *vrh); +#endif /* CONFIG_VHOST_IOTLB */ + #endif /* _LINUX_VRINGH_H */ -- cgit v1.2.3 From 425a5070239aac22ed8fa4732eca624293f88546 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Apr 2020 16:26:21 -0400 Subject: vdpa: allow a 32 bit vq alignment get_vq_align returns u16 now, but that's not enough for systems/devices with 64K pages. All callers assign it to a u32 variable anyway, so let's just change the return value type to u32. Reported-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 733acfb7ef84..5453af87a33e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -164,7 +164,7 @@ struct vdpa_config_ops { u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); /* Device ops */ - u16 (*get_vq_align)(struct vdpa_device *vdev); + u32 (*get_vq_align)(struct vdpa_device *vdev); u64 (*get_features)(struct vdpa_device *vdev); int (*set_features)(struct vdpa_device *vdev, u64 features); void (*set_config_cb)(struct vdpa_device *vdev, -- cgit v1.2.3 From 15064e70909cd8cca092ed6dd888bdbd9fefaae3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 17 Apr 2020 03:17:26 -0400 Subject: virtio: drop vringh.h dependency Most virtio drivers don't depend on vringh, let's not pull that dependency, include it directly as needed. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 15f906e4a748..a493eac08393 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,7 +9,6 @@ #include #include #include -#include /** * virtqueue - a queue to register buffers for sending or receiving. -- cgit v1.2.3 From 2b07021a940ce1cdec736ec0cacad6af77717afc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 Apr 2020 16:54:48 +0200 Subject: debugfs: remove return value of debugfs_create_u32() No one checks the return value of debugfs_create_u32(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200416145448.GA1380878@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a274d95fa66e..63cb3606dea7 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -103,8 +103,8 @@ void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); -struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); +void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, + u32 *value); void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); struct dentry *debugfs_create_ulong(const char *name, umode_t mode, @@ -250,12 +250,8 @@ static inline void debugfs_create_u8(const char *name, umode_t mode, static inline void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { } -static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, - u32 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_u32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) { } static inline void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { } -- cgit v1.2.3 From 23cf1ee1f1869966b75518c59b5cbda4c6c92450 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 31 Mar 2020 17:02:33 -0400 Subject: svcrdma: Fix leak of svc_rdma_recv_ctxt objects Utilize the xpo_release_rqst transport method to ensure that each rqstp's svc_rdma_recv_ctxt object is released even when the server cannot return a Reply for that rqstp. Without this fix, each RPC whose Reply cannot be sent leaks one svc_rdma_recv_ctxt. This is a 2.5KB structure, a 4KB DMA-mapped Receive buffer, and any pages that might be part of the Reply message. The leak is infrequent unless the network fabric is unreliable or Kerberos is in use, as GSS sequence window overruns, which result in connection loss, are more common on fast transports. Fixes: 3a88092ee319 ("svcrdma: Preserve Receive buffer until svc_rdma_sendto") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 78fe2ac6dc6c..cbcfbd0521e3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -170,6 +170,7 @@ extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *ctxt); extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); +extern void svc_rdma_release_rqst(struct svc_rqst *rqstp); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ -- cgit v1.2.3 From c4b4c2a78a9fc0c532c58504e8cb5441224ff1d9 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Mon, 13 Apr 2020 13:12:10 +0800 Subject: buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason. free_more_memory func has been completely removed in commit bc48f001de12 ("buffer: eliminate the need to call free_more_memory() in __getblk_slow()") So comment and `WB_REASON_FREE_MORE_MEM` reason about free_more_memory are no longer needed. Fixes: bc48f001de12 ("buffer: eliminate the need to call free_more_memory() in __getblk_slow()") Reviewed-by: Jan Kara Signed-off-by: Zhiqiang Liu Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 4fc87dee005a..ee577a83cfe6 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -54,7 +54,6 @@ enum wb_reason { WB_REASON_SYNC, WB_REASON_PERIODIC, WB_REASON_LAPTOP_TIMER, - WB_REASON_FREE_MORE_MEM, WB_REASON_FS_FREE_SPACE, /* * There is no bdi forker thread any more and works are done -- cgit v1.2.3 From 0a368bf00e3a7c57a57efc1bf79b79facb97639c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 16:40:21 -0500 Subject: bio: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index c1c0f9ea4e63..a0ee494a6329 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -319,7 +319,7 @@ struct bio_integrity_payload { struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ + struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; #if defined(CONFIG_BLK_DEV_INTEGRITY) -- cgit v1.2.3 From f36aaf8be421099103193c49796a14213d3be315 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 16:43:39 -0500 Subject: blk-mq: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f389d7c724bd..b45148ba3291 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -173,7 +173,7 @@ struct blk_mq_hw_ctx { * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also * blk_mq_hw_ctx_size(). */ - struct srcu_struct srcu[0]; + struct srcu_struct srcu[]; }; /** -- cgit v1.2.3 From 5a58ec8cfc8621f5bdbd610202f62f817e5da204 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 16:45:36 -0500 Subject: blk_types: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 70254ae11769..31eb92876be7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -198,7 +198,7 @@ struct bio { * double allocations for a small number of bio_vecs. This member * MUST obviously be kept at the very end of the bio. */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[]; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) -- cgit v1.2.3 From e76018cb604ace486de9cf85898c14bb2b47faff Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 16:48:10 -0500 Subject: can: dev: peak_canfd.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/can/dev/peak_canfd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index 511a37302fea..5fd627e9da19 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -189,7 +189,7 @@ struct __packed pucan_rx_msg { u8 client; __le16 flags; __le32 can_id; - u8 d[0]; + u8 d[]; }; /* uCAN error types */ @@ -266,7 +266,7 @@ struct __packed pucan_tx_msg { u8 client; __le16 flags; __le32 can_id; - u8 d[0]; + u8 d[]; }; /* build the cmd opcode_channel field with respect to the correct endianness */ -- cgit v1.2.3 From 1fa0949bede6de2b595da535c3ce69de8e130db2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:03:49 -0500 Subject: digsig.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/digsig.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/digsig.h b/include/linux/digsig.h index 594fc66a395a..2ace69e41088 100644 --- a/include/linux/digsig.h +++ b/include/linux/digsig.h @@ -29,7 +29,7 @@ struct pubkey_hdr { uint32_t timestamp; /* key made, always 0 for now */ uint8_t algo; uint8_t nmpi; - char mpi[0]; + char mpi[]; } __packed; struct signature_hdr { @@ -39,7 +39,7 @@ struct signature_hdr { uint8_t hash; uint8_t keyid[8]; uint8_t nmpi; - char mpi[0]; + char mpi[]; } __packed; #if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE) -- cgit v1.2.3 From a2008395fe2ebd9cd82f220d034d36cc887f35fe Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:17:52 -0500 Subject: dirent.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/dirent.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dirent.h b/include/linux/dirent.h index fc61f3cff72f..99002220cd45 100644 --- a/include/linux/dirent.h +++ b/include/linux/dirent.h @@ -7,7 +7,7 @@ struct linux_dirent64 { s64 d_off; unsigned short d_reclen; unsigned char d_type; - char d_name[0]; + char d_name[]; }; #endif -- cgit v1.2.3 From 192199464d6cccb084356add54b3a48d6dde9f96 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:21:19 -0500 Subject: enclosure.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/enclosure.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 564e96f625ff..1c630e2c2756 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -101,7 +101,7 @@ struct enclosure_device { struct device edev; struct enclosure_component_callbacks *cb; int components; - struct enclosure_component component[0]; + struct enclosure_component component[]; }; static inline struct enclosure_device * -- cgit v1.2.3 From beb69f15a095245c5cc62389eea93002b41d2eb9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:23:01 -0500 Subject: energy_model.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/energy_model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index d249b88a4d5a..ade6486a3382 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -36,7 +36,7 @@ struct em_cap_state { struct em_perf_domain { struct em_cap_state *table; int nr_cap_states; - unsigned long cpus[0]; + unsigned long cpus[]; }; #ifdef CONFIG_ENERGY_MODEL -- cgit v1.2.3 From 5299a11a9378e8c68e3b8e2040f7aa7e401d50b7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:24:53 -0500 Subject: ethtool.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c1d379bf6ee1..a23b26eab479 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -35,7 +35,7 @@ struct compat_ethtool_rxnfc { compat_u64 data; struct compat_ethtool_rx_flow_spec fs; u32 rule_cnt; - u32 rule_locs[0]; + u32 rule_locs[]; }; #endif /* CONFIG_COMPAT */ @@ -462,7 +462,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops); struct ethtool_rx_flow_rule { struct flow_rule *rule; - unsigned long priv[0]; + unsigned long priv[]; }; struct ethtool_rx_flow_spec_input { -- cgit v1.2.3 From 89f60a5d9bf5a6b9b16dfdd56a91c4a2d7b8830d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:43:59 -0500 Subject: genalloc.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/genalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 5b14a0f38124..0bd581003cd5 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -76,7 +76,7 @@ struct gen_pool_chunk { void *owner; /* private data to retrieve at alloc time */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ - unsigned long bits[0]; /* bitmap for allocating memory chunk */ + unsigned long bits[]; /* bitmap for allocating memory chunk */ }; /* -- cgit v1.2.3 From 0ead33642f1df89699f2e4dda8eea59c326b68f6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 17:59:00 -0500 Subject: igmp.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 463047d0190b..faa6586a5783 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -38,7 +38,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[0]; + __be32 sl_addr[]; }; #define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ -- cgit v1.2.3 From 1d9e13e8ef05029c61d52ad9a6f48f14771d14b7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 18:00:04 -0500 Subject: ihex.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/ihex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 98cb5ce0b0a0..b824877e6d1b 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -18,7 +18,7 @@ struct ihex_binrec { __be32 addr; __be16 len; - uint8_t data[0]; + uint8_t data[]; } __attribute__((packed)); static inline uint16_t ihex_binrec_size(const struct ihex_binrec *p) -- cgit v1.2.3 From 7856e9f12f1f59cc6abb25f92b336528d0660ebb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 18:01:11 -0500 Subject: irq.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/irq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 9315fbb87db3..fa8ad93029ad 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1043,7 +1043,7 @@ struct irq_chip_generic { unsigned long unused; struct irq_domain *domain; struct list_head list; - struct irq_chip_type chip_types[0]; + struct irq_chip_type chip_types[]; }; /** @@ -1079,7 +1079,7 @@ struct irq_domain_chip_generic { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; - struct irq_chip_generic *gc[0]; + struct irq_chip_generic *gc[]; }; /* Generic chip callback functions */ -- cgit v1.2.3 From 312322722872324939f0d0347a6e41807c2d4c56 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 16:58:49 -0500 Subject: lib: cpu_rmap: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/cpu_rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index 02edeafcb2bf..be8aea04d023 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -28,7 +28,7 @@ struct cpu_rmap { struct { u16 index; u16 dist; - } near[0]; + } near[]; }; #define CPU_RMAP_DIST_INF 0xffff -- cgit v1.2.3 From 859b494111b196853fd8c1852c6b57ef33738b50 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 18:32:01 -0500 Subject: list_lru.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/list_lru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index d5ceb2839a2d..9dcaa3e582c9 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -34,7 +34,7 @@ struct list_lru_one { struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ - struct list_lru_one *lru[0]; + struct list_lru_one *lru[]; }; struct list_lru_node { -- cgit v1.2.3 From 307ed94c37f842676d336cf5f2162022f4d7cdc4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 18:36:10 -0500 Subject: memcontrol.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1b4150ff64be..d275c72c4f8e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -106,7 +106,7 @@ struct lruvec_stat { */ struct memcg_shrinker_map { struct rcu_head rcu; - unsigned long map[0]; + unsigned long map[]; }; /* @@ -148,7 +148,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[0]; + struct mem_cgroup_threshold entries[]; }; struct mem_cgroup_thresholds { -- cgit v1.2.3 From 1223f3db71ba7bbcf2e77c7a5d4f440c2a2fa9c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:07:49 -0500 Subject: platform_data: wilco-ec.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/platform_data/wilco-ec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index 25f46a939637..3e268e636b5b 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -83,7 +83,7 @@ struct wilco_ec_response { u16 result; u16 data_size; u8 reserved[2]; - u8 data[0]; + u8 data[]; } __packed; /** -- cgit v1.2.3 From 70f1451ec98ee43d2c66d2caa5ae6935ee97f90a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:08:58 -0500 Subject: posix_acl.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/posix_acl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 540595a321a7..90797f1b421d 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -28,7 +28,7 @@ struct posix_acl { refcount_t a_refcount; struct rcu_head a_rcu; unsigned int a_count; - struct posix_acl_entry a_entries[0]; + struct posix_acl_entry a_entries[]; }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ -- cgit v1.2.3 From a1c4b9247ddfb62fe3a23eb53d250382e82fae77 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:12:17 -0500 Subject: rio.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/rio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rio.h b/include/linux/rio.h index 317bace5ac64..2cd637268b4f 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -100,7 +100,7 @@ struct rio_switch { u32 port_ok; struct rio_switch_ops *ops; spinlock_t lock; - struct rio_dev *nextdev[0]; + struct rio_dev *nextdev[]; }; /** @@ -201,7 +201,7 @@ struct rio_dev { u8 hopcount; struct rio_dev *prev; atomic_t state; - struct rio_switch rswitch[0]; /* RIO switch info */ + struct rio_switch rswitch[]; /* RIO switch info */ }; #define rio_dev_g(n) list_entry(n, struct rio_dev, global_list) -- cgit v1.2.3 From 9dd8bb5f8c449e87cc0084a118673c6d4182bab2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:13:20 -0500 Subject: rslib.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/rslib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rslib.h b/include/linux/rslib.h index 5974cedd008c..238bb85243d3 100644 --- a/include/linux/rslib.h +++ b/include/linux/rslib.h @@ -54,7 +54,7 @@ struct rs_codec { */ struct rs_control { struct rs_codec *codec; - uint16_t buffers[0]; + uint16_t buffers[]; }; /* General purpose RS codec, 8-bit data width, symbol width 1-15 bit */ -- cgit v1.2.3 From fe946db6ca851a0cd8c2f9c9dd96ef74e051cf2f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:14:37 -0500 Subject: sched: topology.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/sched/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index af9319e4cfb9..95253ad792b0 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -142,7 +142,7 @@ struct sched_domain { * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) */ - unsigned long span[0]; + unsigned long span[]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -- cgit v1.2.3 From 5c91aa1df00ec4fa283c35e92736392df3137d81 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:22:24 -0500 Subject: skbuff.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a2ac7072dbb..3000c526f552 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4162,7 +4162,7 @@ struct skb_ext { refcount_t refcnt; u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ u8 chunks; /* same */ - char data[0] __aligned(8); + char data[] __aligned(8); }; struct skb_ext *__skb_ext_alloc(void); -- cgit v1.2.3 From 16c3380f8c2e7ed3d75a30776a89aabf5512027a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:23:10 -0500 Subject: swap.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b835d8dbea0e..e1bbf7a16b27 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -275,7 +275,7 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ - struct plist_node avail_lists[0]; /* + struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the -- cgit v1.2.3 From 4ea19ecf322c2f98ef87fc980b3851625b082ac2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:25:06 -0500 Subject: ti_wilink_st.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/ti_wilink_st.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index eb6cbdf10e50..44a7f9169ac6 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -295,7 +295,7 @@ struct bts_header { u32 magic; u32 version; u8 future[24]; - u8 actions[0]; + u8 actions[]; } __attribute__ ((packed)); /** @@ -305,7 +305,7 @@ struct bts_header { struct bts_action { u16 type; u16 size; - u8 data[0]; + u8 data[]; } __attribute__ ((packed)); struct bts_action_send { @@ -315,7 +315,7 @@ struct bts_action_send { struct bts_action_wait { u32 msec; u32 size; - u8 data[0]; + u8 data[]; } __attribute__ ((packed)); struct bts_action_delay { -- cgit v1.2.3 From 06ccf63da5d8e90e4dff8b741972a9b279b5bf4c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:38:18 -0500 Subject: tpm_eventlog.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/tpm_eventlog.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 131ea1bad458..c253461b1c4e 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -28,7 +28,7 @@ struct tcpa_event { u32 event_type; u8 pcr_value[20]; /* SHA1 */ u32 event_size; - u8 event_data[0]; + u8 event_data[]; }; enum tcpa_event_types { @@ -55,7 +55,7 @@ enum tcpa_event_types { struct tcpa_pc_event { u32 event_id; u32 event_size; - u8 event_data[0]; + u8 event_data[]; }; enum tcpa_pc_event_ids { @@ -102,7 +102,7 @@ struct tcg_pcr_event { struct tcg_event_field { u32 event_size; - u8 event[0]; + u8 event[]; } __packed; struct tcg_pcr_event2_head { -- cgit v1.2.3 From 43951585e1308b322c8ee31a4aafd08213f5c5d7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Mar 2020 19:41:14 -0500 Subject: xattr.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva --- include/linux/xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4cf6e11f4a3c..47eaa34f8761 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -73,7 +73,7 @@ struct simple_xattr { struct list_head list; char *name; size_t size; - char value[0]; + char value[]; }; /* -- cgit v1.2.3 From a07479147be03d2450376ebaff9ea1a0682f25d6 Mon Sep 17 00:00:00 2001 From: Lars Engebretsen Date: Wed, 15 Apr 2020 12:10:43 +0200 Subject: iio: core: remove extra semi-colon from devm_iio_device_register() macro This change removes the semi-colon from the devm_iio_device_register() macro which seems to have been added by accident. Fixes: 63b19547cc3d9 ("iio: Use macro magic to avoid manual assign of driver_module") Signed-off-by: Lars Engebretsen Cc: Reviewed-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index eed58ed2f368..4e7848415c4f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -600,7 +600,7 @@ void iio_device_unregister(struct iio_dev *indio_dev); * 0 on success, negative error number on failure. */ #define devm_iio_device_register(dev, indio_dev) \ - __devm_iio_device_register((dev), (indio_dev), THIS_MODULE); + __devm_iio_device_register((dev), (indio_dev), THIS_MODULE) int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, struct module *this_mod); void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev); -- cgit v1.2.3 From bdebd6a2831b6fab69eb85cee74a8ba77f1a1cc2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 20 Apr 2020 18:14:11 -0700 Subject: vmalloc: fix remap_vmalloc_range() bounds checks remap_vmalloc_range() has had various issues with the bounds checks it promises to perform ("This function checks that addr is a valid vmalloc'ed area, and that it is big enough to cover the vma") over time, e.g.: - not detecting pgoff< Signed-off-by: Andrew Morton Cc: stable@vger.kernel.org Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Link: http://lkml.kernel.org/r/20200415222312.236431-1-jannh@google.com Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0507a162ccd0..a95d3cc74d79 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -137,7 +137,7 @@ extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, - unsigned long size); + unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -- cgit v1.2.3 From b9663b7ca6ff780555108394c9c1b409f63b99a7 Mon Sep 17 00:00:00 2001 From: Voon Weifeng Date: Mon, 20 Apr 2020 23:42:52 +0800 Subject: net: stmmac: Enable SERDES power up/down sequence This patch is to enable Intel SERDES power up/down sequence. The SERDES converts 8/10 bits data to SGMII signal. Below is an example of HW configuration for SGMII mode. The SERDES is located in the PHY IF in the diagram below. <-----------------GBE Controller---------->|<--External PHY chip--> +----------+ +----+ +---+ +----------+ | EQoS | <-GMII->| DW | < ------ > |PHY| <-SGMII-> | External | | MAC | |xPCS| |IF | | PHY | +----------+ +----+ +---+ +----------+ ^ ^ ^ ^ | | | | +---------------------MDIO-------------------------+ PHY IF configuration and status registers are accessible through mdio address 0x15 which is defined as mdio_adhoc_addr. During D0, The driver will need to power up PHY IF by changing the power state to P0. Likewise, for D3, the driver sets PHY IF power state to P3. Signed-off-by: Voon Weifeng Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fbafb353e9be..bd964c31d333 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -177,6 +177,8 @@ struct plat_stmmacenet_data { struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); + int (*serdes_powerup)(struct net_device *ndev, void *priv); + void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); -- cgit v1.2.3 From 01b2bafe57b19d9119413f138765ef57990921ce Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 12:10:50 -0300 Subject: pnp: Use list_for_each_entry() instead of open coding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aside from good practice, this avoids a warning from gcc 10: ./include/linux/kernel.h:997:3: warning: array subscript -31 is outside array bounds of ‘struct list_head[1]’ [-Warray-bounds] 997 | ((type *)(__mptr - offsetof(type, member))); }) | ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/list.h:493:2: note: in expansion of macro ‘container_of’ 493 | container_of(ptr, type, member) | ^~~~~~~~~~~~ ./include/linux/pnp.h:275:30: note: in expansion of macro ‘list_entry’ 275 | #define global_to_pnp_dev(n) list_entry(n, struct pnp_dev, global_list) | ^~~~~~~~~~ ./include/linux/pnp.h:281:11: note: in expansion of macro ‘global_to_pnp_dev’ 281 | (dev) != global_to_pnp_dev(&pnp_global); \ | ^~~~~~~~~~~~~~~~~ arch/x86/kernel/rtc.c:189:2: note: in expansion of macro ‘pnp_for_each_dev’ 189 | pnp_for_each_dev(dev) { Because the common code doesn't cast the starting list_head to the containing struct. Signed-off-by: Jason Gunthorpe [ rjw: Whitespace adjustments ] Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b18dca67253d..c2a7cfbca713 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -220,10 +220,8 @@ struct pnp_card { #define global_to_pnp_card(n) list_entry(n, struct pnp_card, global_list) #define protocol_to_pnp_card(n) list_entry(n, struct pnp_card, protocol_list) #define to_pnp_card(n) container_of(n, struct pnp_card, dev) -#define pnp_for_each_card(card) \ - for((card) = global_to_pnp_card(pnp_cards.next); \ - (card) != global_to_pnp_card(&pnp_cards); \ - (card) = global_to_pnp_card((card)->global_list.next)) +#define pnp_for_each_card(card) \ + list_for_each_entry(card, &pnp_cards, global_list) struct pnp_card_link { struct pnp_card *card; @@ -276,14 +274,9 @@ struct pnp_dev { #define card_to_pnp_dev(n) list_entry(n, struct pnp_dev, card_list) #define protocol_to_pnp_dev(n) list_entry(n, struct pnp_dev, protocol_list) #define to_pnp_dev(n) container_of(n, struct pnp_dev, dev) -#define pnp_for_each_dev(dev) \ - for((dev) = global_to_pnp_dev(pnp_global.next); \ - (dev) != global_to_pnp_dev(&pnp_global); \ - (dev) = global_to_pnp_dev((dev)->global_list.next)) -#define card_for_each_dev(card,dev) \ - for((dev) = card_to_pnp_dev((card)->devices.next); \ - (dev) != card_to_pnp_dev(&(card)->devices); \ - (dev) = card_to_pnp_dev((dev)->card_list.next)) +#define pnp_for_each_dev(dev) list_for_each_entry(dev, &pnp_global, global_list) +#define card_for_each_dev(card, dev) \ + list_for_each_entry(dev, &(card)->devices, card_list) #define pnp_dev_name(dev) (dev)->name static inline void *pnp_get_drvdata(struct pnp_dev *pdev) @@ -437,14 +430,10 @@ struct pnp_protocol { }; #define to_pnp_protocol(n) list_entry(n, struct pnp_protocol, protocol_list) -#define protocol_for_each_card(protocol,card) \ - for((card) = protocol_to_pnp_card((protocol)->cards.next); \ - (card) != protocol_to_pnp_card(&(protocol)->cards); \ - (card) = protocol_to_pnp_card((card)->protocol_list.next)) -#define protocol_for_each_dev(protocol,dev) \ - for((dev) = protocol_to_pnp_dev((protocol)->devices.next); \ - (dev) != protocol_to_pnp_dev(&(protocol)->devices); \ - (dev) = protocol_to_pnp_dev((dev)->protocol_list.next)) +#define protocol_for_each_card(protocol, card) \ + list_for_each_entry(card, &(protocol)->cards, protocol_list) +#define protocol_for_each_dev(protocol, dev) \ + list_for_each_entry(dev, &(protocol)->devices, protocol_list) extern struct bus_type pnp_bus_type; -- cgit v1.2.3 From 62d0fd591db1f9dcf68fb963b3a94af085a6b166 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 22 Apr 2020 01:13:55 +0900 Subject: arch: split MODULE_ARCH_VERMAGIC definitions out to As the bug report [1] pointed out, must be included after . I believe we should not impose any include order restriction. We often sort include directives alphabetically, but it is just coding style convention. Technically, we can include header files in any order by making every header self-contained. Currently, arch-specific MODULE_ARCH_VERMAGIC is defined in , which is not included from . Hence, the straight-forward fix-up would be as follows: |--- a/include/linux/vermagic.h |+++ b/include/linux/vermagic.h |@@ -1,5 +1,6 @@ | /* SPDX-License-Identifier: GPL-2.0 */ | #include |+#include | | /* Simply sanity version stamp for modules. */ | #ifdef CONFIG_SMP This works enough, but for further cleanups, I split MODULE_ARCH_VERMAGIC definitions into . With this, and will be orthogonal, and the location of MODULE_ARCH_VERMAGIC definitions will be consistent. For arc and ia64, MODULE_PROC_FAMILY is only used for defining MODULE_ARCH_VERMAGIC. I squashed it. For hexagon, nds32, and xtensa, I removed entirely because they contained nothing but MODULE_ARCH_VERMAGIC definition. Kbuild will automatically generate at build-time, wrapping . [1] https://lore.kernel.org/lkml/20200411155623.GA22175@zn.tnic Reported-by: Borislav Petkov Signed-off-by: Masahiro Yamada Acked-by: Jessica Yu --- include/linux/vermagic.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 9aced11e9000..dc236577b92f 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -1,5 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VERMAGIC_H +#define _LINUX_VERMAGIC_H + #include +#include /* Simply sanity version stamp for modules. */ #ifdef CONFIG_SMP @@ -24,9 +28,6 @@ #else #define MODULE_VERMAGIC_MODVERSIONS "" #endif -#ifndef MODULE_ARCH_VERMAGIC -#define MODULE_ARCH_VERMAGIC "" -#endif #ifdef RANDSTRUCT_PLUGIN #include #define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED @@ -41,3 +42,4 @@ MODULE_ARCH_VERMAGIC \ MODULE_RANDSTRUCT_PLUGIN +#endif /* _LINUX_VERMAGIC_H */ -- cgit v1.2.3 From a2806ef77ff9a965a70d6c194bb3a4801bbdb07d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 13 Apr 2020 18:32:46 +0300 Subject: tracing: Remove DECLARE_TRACE_NOARGS This macro was intentionally broken so that the kernel code is not poluted with such noargs macro used simply as markers. This use case can be satisfied by using dummy no inline functions. Just remove it. Link: http://lkml.kernel.org/r/20200413153246.8511-1-nborisov@suse.com Signed-off-by: Nikolay Borisov Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 1fb11daa5c53..a1fecf311621 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -156,8 +156,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * Note, the proto and args passed in includes "__data" as the first parameter. * The reason for this is to handle the "void" prototype. If a tracepoint * has a "void" prototype, then it is invalid to declare a function - * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just - * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". + * as "(void *, void)". */ #define __DO_TRACE(tp, proto, args, cond, rcuidle) \ do { \ @@ -373,25 +372,6 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) # define __tracepoint_string #endif -/* - * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype - * (void). "void" is a special value in a function prototype and can - * not be combined with other arguments. Since the DECLARE_TRACE() - * macro adds a data element at the beginning of the prototype, - * we need a way to differentiate "(void *data, proto)" from - * "(void *data, void)". The second prototype is invalid. - * - * DECLARE_TRACE_NOARGS() passes "void" as the tracepoint prototype - * and "void *__data" as the callback prototype. - * - * DECLARE_TRACE() passes "proto" as the tracepoint protoype and - * "void *__data, proto" as the callback prototype. - */ -#define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , \ - cpu_online(raw_smp_processor_id()), \ - void *__data, __data) - #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ -- cgit v1.2.3 From 31c9590ae468478fe47dc0f5f0d3562b2f69450e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 21:06:23 -0400 Subject: SUNRPC: Add "@len" parameter to gss_unwrap() Refactor: This is a pre-requisite to fixing the client-side ralign computation in gss_unwrap_resp_priv(). The length value is passed in explicitly rather that as the value of buf->len. This will subsequently allow gss_unwrap_kerberos_v1() to compute a slack and align value, instead of computing it in gss_unwrap_resp_priv(). Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 2 ++ include/linux/sunrpc/gss_krb5.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 48c1b1674cbf..e9a79518d652 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -66,6 +66,7 @@ u32 gss_wrap( u32 gss_unwrap( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -126,6 +127,7 @@ struct gss_api_ops { u32 (*gss_unwrap)( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index c1d77dd8ed41..e8f8ffe7448b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -83,7 +83,7 @@ struct gss_krb5_enctype { u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, + u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *headskip, u32 *tailskip); /* v2 decryption function */ }; @@ -255,7 +255,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *outbuf, struct page **pages); u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, struct xdr_buf *buf); @@ -312,7 +312,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, struct page **pages); u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); -- cgit v1.2.3 From a7e429a6fa6d612d1dacde96c885dc1bb4a9f400 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 14:38:19 -0400 Subject: SUNRPC: Fix GSS privacy computation of auth->au_ralign When the au_ralign field was added to gss_unwrap_resp_priv, the wrong calculation was used. Setting au_rslack == au_ralign is probably correct for kerberos_v1 privacy, but kerberos_v2 privacy adds additional GSS data after the clear text RPC message. au_ralign needs to be smaller than au_rslack in that fairly common case. When xdr_buf_trim() is restored to gss_unwrap_kerberos_v2(), it does exactly what I feared it would: it trims off part of the clear text RPC message. However, that's because rpc_prepare_reply_pages() does not set up the rq_rcv_buf's tail correctly because au_ralign is too large. Fixing the au_ralign computation also corrects the alignment of rq_rcv_buf->pages so that the client does not have to shift reply data payloads after they are received. Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e9a79518d652..bc07e51f20d1 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -21,6 +21,7 @@ struct gss_ctx { struct gss_api_mech *mech_type; void *internal_ctx_id; + unsigned int slack, align; }; #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) -- cgit v1.2.3 From 0a8e7b7d08466b5fc52f8e96070acc116d82a8bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Apr 2020 17:36:22 -0400 Subject: SUNRPC: Revert 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") I've noticed that when krb5i or krb5p security is in use, retransmitted requests are missing the server's duplicate reply cache. The computed checksum on the retransmitted request does not match the cached checksum, resulting in the server performing the retransmitted request again instead of returning the cached reply. The assumptions made when removing xdr_buf_trim() were not correct. In the send paths, the upper layer has already set the segment lengths correctly, and shorting the buffer's content is simply a matter of reducing buf->len. xdr_buf_trim() is the right answer in the receive/unwrap path on both the client and the server. The buffer segment lengths have to be shortened one-by-one. On the server side in particular, head.iov_len needs to be updated correctly to enable nfsd_cache_csum() to work correctly. The simple buf->len computation doesn't do that, and that results in checksumming stale data in the buffer. The problem isn't noticed until there's significant instability of the RPC transport. At that point, the reliability of retransmit detection on the server becomes crucial. Fixes: 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 01bb41908c93..22c207b2425f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From 0821009445a8261ac4d32a6df4b83938e007c765 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Apr 2020 10:40:12 -0700 Subject: dmaengine: fix channel index enumeration When the channel register code was changed to allow hotplug operations, dynamic indexing wasn't taken into account. When channels are randomly plugged and unplugged out of order, the serial indexing breaks. Convert channel indexing to using IDA tracking in order to allow dynamic assignment. The previous code does not cause any regression bug for existing channel allocation besides idxd driver since the hotplug usage case is only used by idxd at this point. With this change, the chan->idr_ref is also not needed any longer. We can have a device with no channels registered due to hot plug. The channel device release code no longer should attempt to free the dma device id on the last channel release. Fixes: e81274cd6b52 ("dmaengine: add support to dynamic register/unregister of channels") Reported-by: Yixin Zhang Signed-off-by: Dave Jiang Tested-by: Yixin Zhang Link: https://lore.kernel.org/r/158679961260.7674.8485924270472851852.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 31e58ec9f741..e1c03339918f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -341,13 +341,11 @@ struct dma_chan { * @chan: driver channel device * @device: sysfs device * @dev_id: parent dma_device dev_id - * @idr_ref: reference count to gate release of dma_device dev_id */ struct dma_chan_dev { struct dma_chan *chan; struct device device; int dev_id; - atomic_t *idr_ref; }; /** @@ -835,6 +833,8 @@ struct dma_device { int dev_id; struct device *dev; struct module *owner; + struct ida chan_ida; + struct mutex chan_mutex; /* to protect chan_ida */ u32 src_addr_widths; u32 dst_addr_widths; -- cgit v1.2.3 From a78d163978567adc2733465289293dad479d842a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 24 Apr 2020 17:08:30 +0200 Subject: vsock/virtio: fix multiple packet delivery to monitoring devices In virtio_transport.c, if the virtqueue is full, the transmitting packet is queued up and it will be sent in the next iteration. This causes the same packet to be delivered multiple times to monitoring devices. We want to continue to deliver packets to monitoring devices before it is put in the virtqueue, to avoid that replies can appear in the packet capture before the transmitted packet. This patch fixes the issue, adding a new flag (tap_delivered) in struct virtio_vsock_pkt, to check if the packet is already delivered to monitoring devices. In vhost/vsock.c, we are splitting packets, so we must set 'tap_delivered' to false when we queue up the same virtio_vsock_pkt to handle the remaining bytes. Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 71c81e0dc8f2..dc636b727179 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,7 @@ struct virtio_vsock_pkt { u32 len; u32 off; bool reply; + bool tap_delivered; }; struct virtio_vsock_pkt_info { -- cgit v1.2.3 From 501be6c1c72417eab05e7413671a38ea991a8ebc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 25 Mar 2020 21:16:03 +0100 Subject: drm/tegra: Fix SMMU support on Tegra124 and Tegra210 When testing whether or not to enable the use of the SMMU, consult the supported DMA mask rather than the actually configured DMA mask, since the latter might already have been restricted. Fixes: 2d9384ff9177 ("drm/tegra: Relax IOMMU usage criteria on old Tegra") Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 62d216ff1097..c230b4e70d75 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -17,9 +17,12 @@ enum host1x_class { HOST1X_CLASS_GR3D = 0x60, }; +struct host1x; struct host1x_client; struct iommu_group; +u64 host1x_get_dma_mask(struct host1x *host1x); + /** * struct host1x_client_ops - host1x client operations * @init: host1x client initialization code -- cgit v1.2.3 From 9495b7e92f716ab2bd6814fab5e97ab4a39adfdd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:09:54 +0200 Subject: driver core: platform: Initialize dma_parms for platform devices It's currently the platform driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common platform bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422100954.31211-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bdc35753ef7c..77a2aada106d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,6 +25,7 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; + struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3 From f458488425f1cc9a396aa1d09bb00c48783936da Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:10:13 +0200 Subject: amba: Initialize dma_parms for amba devices It's currently the amba driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common amba bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Russell King Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422101013.31267-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 26f0ecf401ea..0bbfd647f5c6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,6 +65,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3 From 7c4310ff56422ea43418305d22bbc5fe19150ec4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 3 Apr 2020 14:33:41 +1100 Subject: SUNRPC: defer slow parts of rpc_free_client() to a workqueue. The rpciod workqueue is on the write-out path for freeing dirty memory, so it is important that it never block waiting for memory to be allocated - this can lead to a deadlock. rpc_execute() - which is often called by an rpciod work item - calls rcp_task_release_client() which can lead to rpc_free_client(). rpc_free_client() makes two calls which could potentially block wating for memory allocation. rpc_clnt_debugfs_unregister() calls into debugfs and will block while any of the debugfs files are being accessed. In particular it can block while any of the 'open' methods are being called and all of these use malloc for one thing or another. So this can deadlock if the memory allocation waits for NFS to complete some writes via rpciod. rpc_clnt_remove_pipedir() can take the inode_lock() and while it isn't obvious that memory allocations can happen while the lock it held, it is safer to assume they might and to not let rpciod call rpc_clnt_remove_pipedir(). So this patch moves these two calls (together with the final kfree() and rpciod_down()) into a work-item to be run from the system work-queue. rpciod can continue its important work, and the final stages of the free can happen whenever they happen. I have seen this deadlock on a 4.12 based kernel where debugfs used synchronize_srcu() when removing objects. synchronize_srcu() requires a workqueue and there were no free workther threads and none could be allocated. While debugsfs no longer uses SRCU, I believe the deadlock is still possible. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ca7e108248e2..7bd124e06b36 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -71,7 +71,13 @@ struct rpc_clnt { #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *cl_debugfs; /* debugfs directory */ #endif - struct rpc_xprt_iter cl_xpi; + /* cl_work is only needed after cl_xpi is no longer used, + * and that are of similar size + */ + union { + struct rpc_xprt_iter cl_xpi; + struct work_struct cl_work; + }; const struct cred *cl_cred; }; -- cgit v1.2.3 From dff58530c4ca8ce7ee5a74db431c6e35362cf682 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 24 Apr 2020 17:45:50 -0400 Subject: NFSv4.1: fix handling of backchannel binding in BIND_CONN_TO_SESSION Currently, if the client sends BIND_CONN_TO_SESSION with NFS4_CDFC4_FORE_OR_BOTH but only gets NFS4_CDFS4_FORE back it ignores that it wasn't able to enable a backchannel. To make sure, the client sends BIND_CONN_TO_SESSION as the first operation on the connections (ie., no other session compounds haven't been sent before), and if the client's request to bind the backchannel is not satisfied, then reset the connection and retry. Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ include/linux/sunrpc/clnt.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 440230488025..e5f3e7d8d3d5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1317,11 +1317,13 @@ struct nfs41_impl_id { struct nfstime4 date; }; +#define MAX_BIND_CONN_TO_SESSION_RETRIES 3 struct nfs41_bind_conn_to_session_args { struct nfs_client *client; struct nfs4_sessionid sessionid; u32 dir; bool use_conn_in_rdma_mode; + int retries; }; struct nfs41_bind_conn_to_session_res { diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7bd124e06b36..02e7a5863d28 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,4 +242,9 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } +static inline void rpc_task_close_connection(struct rpc_task *task) +{ + if (task->tk_xprt) + xprt_force_disconnect(task->tk_xprt); +} #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From b31d1d2b1c3a8452f425b09ebd374ecd3ddd5179 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Mon, 27 Apr 2020 15:59:02 -0700 Subject: platform/chrome: cros_ec_sensorhub: Allocate sensorhub resource before claiming sensors Allocate callbacks array before enumerating the sensors: The probe routine for these sensors (for instance cros_ec_sensors_probe) can be called within the sensorhub probe routine (cros_ec_sensors_probe()) Fixes: 145d59baff594 ("platform/chrome: cros_ec_sensorhub: Add FIFO support") Signed-off-by: Gwendal Grignou Reported-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_sensorhub.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index c588be843f61..0ecce6aa69d5 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -185,6 +185,7 @@ int cros_ec_sensorhub_register_push_data(struct cros_ec_sensorhub *sensorhub, void cros_ec_sensorhub_unregister_push_data(struct cros_ec_sensorhub *sensorhub, u8 sensor_num); +int cros_ec_sensorhub_ring_allocate(struct cros_ec_sensorhub *sensorhub); int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub); void cros_ec_sensorhub_ring_remove(void *arg); int cros_ec_sensorhub_ring_fifo_enable(struct cros_ec_sensorhub *sensorhub, -- cgit v1.2.3 From 6f49c2515e2258f08f2b905c9772dbf729610415 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 7 Apr 2020 21:20:34 -0700 Subject: dma-buf: fix documentation build warnings Fix documentation warnings in dma-buf.[hc]: ../drivers/dma-buf/dma-buf.c:678: warning: Function parameter or member 'importer_ops' not described in 'dma_buf_dynamic_attach' ../drivers/dma-buf/dma-buf.c:678: warning: Function parameter or member 'importer_priv' not described in 'dma_buf_dynamic_attach' ../include/linux/dma-buf.h:339: warning: Incorrect use of kernel-doc format: * @move_notify Signed-off-by: Randy Dunlap Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/7bcbe6fe-0b4b-87da-d003-b68a26eb4cf0@infradead.org --- include/linux/dma-buf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 1ade486fc2bb..57bcef6f988a 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -329,13 +329,12 @@ struct dma_buf { /** * struct dma_buf_attach_ops - importer operations for an attachment - * @move_notify: [optional] notification that the DMA-buf is moving * * Attachment operations implemented by the importer. */ struct dma_buf_attach_ops { /** - * @move_notify + * @move_notify: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. -- cgit v1.2.3 From cfde141ea3faa30e362bbdb5c28001bbbdb0b8e0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 30 Apr 2020 15:01:52 +0200 Subject: mptcp: move option parsing into mptcp_incoming_options() The mptcp_options_received structure carries several per packet flags (mp_capable, mp_join, etc.). Such fields must be cleared on each packet, even on dropped ones or packet not carrying any MPTCP options, but the current mptcp code clears them only on TCP option reset. On several races/corner cases we end-up with stray bits in incoming options, leading to WARN_ON splats. e.g.: [ 171.164906] Bad mapping: ssn=32714 map_seq=1 map_data_len=32713 [ 171.165006] WARNING: CPU: 1 PID: 5026 at net/mptcp/subflow.c:533 warn_bad_map (linux-mptcp/net/mptcp/subflow.c:533 linux-mptcp/net/mptcp/subflow.c:531) [ 171.167632] Modules linked in: ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel geneve ip6_udp_tunnel udp_tunnel macsec macvtap tap ipvlan macvlan 8021q garp mrp xfrm_interface veth netdevsim nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun binfmt_misc intel_rapl_msr intel_rapl_common rfkill kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel joydev virtio_balloon pcspkr i2c_piix4 sunrpc ip_tables xfs libcrc32c crc32c_intel serio_raw virtio_console ata_generic virtio_blk virtio_net net_failover failover ata_piix libata [ 171.199464] CPU: 1 PID: 5026 Comm: repro Not tainted 5.7.0-rc1.mptcp_f227fdf5d388+ #95 [ 171.200886] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 [ 171.202546] RIP: 0010:warn_bad_map (linux-mptcp/net/mptcp/subflow.c:533 linux-mptcp/net/mptcp/subflow.c:531) [ 171.206537] Code: c1 ea 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 1d 8b 55 3c 44 89 e6 48 c7 c7 20 51 13 95 e8 37 8b 22 fe <0f> 0b 48 83 c4 08 5b 5d 41 5c c3 89 4c 24 04 e8 db d6 94 fe 8b 4c [ 171.220473] RSP: 0018:ffffc90000150560 EFLAGS: 00010282 [ 171.221639] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 171.223108] RDX: 0000000000000000 RSI: 0000000000000008 RDI: fffff5200002a09e [ 171.224388] RBP: ffff8880aa6e3c00 R08: 0000000000000001 R09: fffffbfff2ec9955 [ 171.225706] R10: ffffffff9764caa7 R11: fffffbfff2ec9954 R12: 0000000000007fca [ 171.227211] R13: ffff8881066f4a7f R14: ffff8880aa6e3c00 R15: 0000000000000020 [ 171.228460] FS: 00007f8623719740(0000) GS:ffff88810be00000(0000) knlGS:0000000000000000 [ 171.230065] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 171.231303] CR2: 00007ffdab190a50 CR3: 00000001038ea006 CR4: 0000000000160ee0 [ 171.232586] Call Trace: [ 171.233109] [ 171.233531] get_mapping_status (linux-mptcp/net/mptcp/subflow.c:691) [ 171.234371] mptcp_subflow_data_available (linux-mptcp/net/mptcp/subflow.c:736 linux-mptcp/net/mptcp/subflow.c:832) [ 171.238181] subflow_state_change (linux-mptcp/net/mptcp/subflow.c:1085 (discriminator 1)) [ 171.239066] tcp_fin (linux-mptcp/net/ipv4/tcp_input.c:4217) [ 171.240123] tcp_data_queue (linux-mptcp/./include/linux/compiler.h:199 linux-mptcp/net/ipv4/tcp_input.c:4822) [ 171.245083] tcp_rcv_established (linux-mptcp/./include/linux/skbuff.h:1785 linux-mptcp/./include/net/tcp.h:1774 linux-mptcp/./include/net/tcp.h:1847 linux-mptcp/net/ipv4/tcp_input.c:5238 linux-mptcp/net/ipv4/tcp_input.c:5730) [ 171.254089] tcp_v4_rcv (linux-mptcp/./include/linux/spinlock.h:393 linux-mptcp/net/ipv4/tcp_ipv4.c:2009) [ 171.258969] ip_protocol_deliver_rcu (linux-mptcp/net/ipv4/ip_input.c:204 (discriminator 1)) [ 171.260214] ip_local_deliver_finish (linux-mptcp/./include/linux/rcupdate.h:651 linux-mptcp/net/ipv4/ip_input.c:232) [ 171.261389] ip_local_deliver (linux-mptcp/./include/linux/netfilter.h:307 linux-mptcp/./include/linux/netfilter.h:301 linux-mptcp/net/ipv4/ip_input.c:252) [ 171.265884] ip_rcv (linux-mptcp/./include/linux/netfilter.h:307 linux-mptcp/./include/linux/netfilter.h:301 linux-mptcp/net/ipv4/ip_input.c:539) [ 171.273666] process_backlog (linux-mptcp/./include/linux/rcupdate.h:651 linux-mptcp/net/core/dev.c:6135) [ 171.275328] net_rx_action (linux-mptcp/net/core/dev.c:6572 linux-mptcp/net/core/dev.c:6640) [ 171.280472] __do_softirq (linux-mptcp/./arch/x86/include/asm/jump_label.h:25 linux-mptcp/./include/linux/jump_label.h:200 linux-mptcp/./include/trace/events/irq.h:142 linux-mptcp/kernel/softirq.c:293) [ 171.281379] do_softirq_own_stack (linux-mptcp/arch/x86/entry/entry_64.S:1083) [ 171.282358] We could address the issue clearing explicitly the relevant fields in several places - tcp_parse_option, tcp_fast_parse_options, possibly others. Instead we move the MPTCP option parsing into the already existing mptcp ingress hook, so that we need to clear the fields in a single place. This allows us dropping an MPTCP hook from the TCP code and removing the quite large mptcp_options_received from the tcp_sock struct. On the flip side, the MPTCP sockets will traverse the option space twice (in tcp_parse_option() and in mptcp_incoming_options(). That looks acceptable: we already do that for syn and 3rd ack packets, plain TCP socket will benefit from it, and even MPTCP sockets will experience better code locality, reducing the jumps between TCP and MPTCP code. v1 -> v2: - rebased on current '-net' tree Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/tcp.h | 51 --------------------------------------------------- 1 file changed, 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 421c99c12291..4f8159e90ce1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -78,47 +78,6 @@ struct tcp_sack_block { #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ -#if IS_ENABLED(CONFIG_MPTCP) -struct mptcp_options_received { - u64 sndr_key; - u64 rcvr_key; - u64 data_ack; - u64 data_seq; - u32 subflow_seq; - u16 data_len; - u16 mp_capable : 1, - mp_join : 1, - dss : 1, - add_addr : 1, - rm_addr : 1, - family : 4, - echo : 1, - backup : 1; - u32 token; - u32 nonce; - u64 thmac; - u8 hmac[20]; - u8 join_id; - u8 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - __unused:2; - u8 addr_id; - u8 rm_id; - union { - struct in_addr addr; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - struct in6_addr addr6; -#endif - }; - u64 ahmac; - u16 port; -}; -#endif - struct tcp_options_received { /* PAWS/RTTM data */ int ts_recent_stamp;/* Time we stored ts_recent (for aging) */ @@ -136,9 +95,6 @@ struct tcp_options_received { u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ -#if IS_ENABLED(CONFIG_MPTCP) - struct mptcp_options_received mptcp; -#endif }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -148,13 +104,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) #if IS_ENABLED(CONFIG_SMC) rx_opt->smc_ok = 0; #endif -#if IS_ENABLED(CONFIG_MPTCP) - rx_opt->mptcp.mp_capable = 0; - rx_opt->mptcp.mp_join = 0; - rx_opt->mptcp.add_addr = 0; - rx_opt->mptcp.rm_addr = 0; - rx_opt->mptcp.dss = 0; -#endif } /* This is the max number of SACKS that we'll generate and process. It's safe -- cgit v1.2.3 From 54261af473be4c5481f6196064445d2945f2bdab Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 30 Apr 2020 17:52:40 +0200 Subject: security: Fix the default value of fs_context_parse_param hook security_fs_context_parse_param is called by vfs_parse_fs_param and a succussful return value (i.e 0) implies that a parameter will be consumed by the LSM framework. This stops all further parsing of the parmeter by VFS. Furthermore, if an LSM hook returns a success, the remaining LSM hooks are not invoked for the parameter. The current default behavior of returning success means that all the parameters are expected to be parsed by the LSM hook and none of them end up being populated by vfs in fs_context This was noticed when lsm=bpf is supplied on the command line before any other LSM. As the bpf lsm uses this default value to implement a default hook, this resulted in a failure to parse any fs_context parameters and a failure to mount the root filesystem. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Mikko Ylinen Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..1bdd027766d4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) -LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, +LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) -- cgit v1.2.3 From 9d82973e032e246ff5663c9805fbb5407ae932e3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 4 May 2020 09:16:37 -0700 Subject: gcc-10 warnings: fix low-hanging fruit Due to a bug-report that was compiler-dependent, I updated one of my machines to gcc-10. That shows a lot of new warnings. Happily they seem to be mostly the valid kind, but it's going to cause a round of churn for getting rid of them.. This is the really low-hanging fruit of removing a couple of zero-sized arrays in some core code. We have had a round of these patches before, and we'll have many more coming, and there is nothing special about these except that they were particularly trivial, and triggered more warnings than most. Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/tty.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..45cc10cdf6dd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,7 +983,7 @@ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ - unsigned char f_handle[0]; + unsigned char f_handle[]; }; static inline struct file *get_file(struct file *f) diff --git a/include/linux/tty.h b/include/linux/tty.h index bd5fe0e907e8..a99e9b8e4e31 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -66,7 +66,7 @@ struct tty_buffer { int read; int flags; /* Data points here */ - unsigned long data[0]; + unsigned long data[]; }; /* Values for .flags field of tty_buffer */ -- cgit v1.2.3 From 2f5a55c52c00fcded796db5f961057ba3fec8910 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 2 May 2020 14:18:35 +0200 Subject: i2c: use my kernel.org address from now on The old email is still active, but for easier handling, I am going to use my kernel.org address from now on. Also, add a mailmap for the now defunct Pengutronix address. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 45d36ba4826b..49d29054e657 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -2,7 +2,7 @@ /* * i2c.h - definitions for the Linux i2c bus interface * Copyright (C) 1995-2000 Simon G. Vogl - * Copyright (C) 2013-2019 Wolfram Sang + * Copyright (C) 2013-2019 Wolfram Sang * * With some changes from Kyösti Mälkki and * Frodo Looijaard -- cgit v1.2.3 From 115f32512f13c0280161908e9de45a97a87673bb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 1 May 2020 00:35:50 +0530 Subject: bus: mhi: Fix parsing of mhi_flags With the current parsing of mhi_flags, the following statement always return false: eob = !!(flags & MHI_EOB); This is due to the fact that 'enum mhi_flags' starts with index 0 and we are using direct AND operation to extract each bit. Fix this by using BIT() macros for defining the flags so that the reset of the code need not be touched. Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer") Reported-by: Dan Carpenter Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index ad1996001965..5642806360f3 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -53,9 +53,9 @@ enum mhi_callback { * @MHI_CHAIN: Linked transfer */ enum mhi_flags { - MHI_EOB, - MHI_EOT, - MHI_CHAIN, + MHI_EOB = BIT(0), + MHI_EOT = BIT(1), + MHI_CHAIN = BIT(2), }; /** -- cgit v1.2.3 From 85a087df4a719ebab940efa3c79625e68161f57b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:52 +0530 Subject: bus: mhi: core: Remove link_status() callback If the MHI core detects invalid data due to a PCI read, it calls into the controller via link_status() to double check that the link is infact down. All in all, this is pretty pointless, and racy. There are no good reasons for this, and only drawbacks. Its pointless because chances are, the controller is going to do the same thing to determine if the link is down - attempt a PCI access and compare the result. This does not make the link status decision any smarter. Its racy because its possible that the link was down at the time of the MHI core access, but then recovered before the controller access. In this case, the controller will indicate the link is not down, and the MHI core will precede to use a bad value as the MHI core does not attempt to retry the access. Retrying the access in the MHI core is a bad idea because again, it is racy - what if the link is down again? Furthermore, there may be some higher level state associated with the link status, that is now invalid because the link went down. The only reason why the MHI core could see "invalid" data when doing a PCI access, that is actually valid, is if the register actually contained the PCI spec defined sentinel for an invalid access. In this case, it is arguable that the MHI implementation broken, and should be fixed, not worked around. Therefore, remove the link_status() callback before anyone attempts to implement it. Signed-off-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Reviewed-by: Hemant Kumar Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5642806360f3..c80ba559face 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -335,7 +335,6 @@ struct mhi_controller_config { * @syserr_worker: System error worker * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) - * @link_status: CB function to query link status of the device (required) * @wake_get: CB function to assert device wake (optional) * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) @@ -417,7 +416,6 @@ struct mhi_controller { void (*status_cb)(struct mhi_controller *mhi_cntrl, enum mhi_callback cb); - int (*link_status)(struct mhi_controller *mhi_cntrl); void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_toggle)(struct mhi_controller *mhi_cntrl); -- cgit v1.2.3 From 45723a44845c90c8e859fd0e2b0bb492322b5d0b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:53 +0530 Subject: bus: mhi: core: Offload register accesses to the controller When reading or writing MHI registers, the core assumes that the physical link is a memory mapped PCI link. This assumption may not hold for all MHI devices. The controller knows what is the physical link (ie PCI, I2C, SPI, etc), and therefore knows the proper methods to access that link. The controller can also handle link specific error scenarios, such as reading -1 when the PCI link went down. Therefore, it is appropriate that the MHI core requests the controller to make register accesses on behalf of the core, which abstracts the core from link specifics, and end up removing an unnecessary assumption. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-5-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c80ba559face..84a6c9e72f52 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -342,6 +342,8 @@ struct mhi_controller_config { * @runtimet_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer + * @read_reg: Read a MHI register via the physical link (required) + * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) @@ -425,6 +427,10 @@ struct mhi_controller { struct mhi_buf_info *buf); void (*unmap_single)(struct mhi_controller *mhi_cntrl, struct mhi_buf_info *buf); + int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 *out); + void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 val); size_t buffer_len; bool bounce_buf; -- cgit v1.2.3 From af2e58818082ac0db29539444ca17eb1e77f6000 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:54 +0530 Subject: bus: mhi: core: Fix typo in comment There is a typo - "runtimet" should be "runtime". Fix it. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-6-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 84a6c9e72f52..3d7c3c26eeb9 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -339,7 +339,7 @@ struct mhi_controller_config { * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) * @runtime_get: CB function to controller runtime resume (required) - * @runtimet_put: CB function to decrement pm usage (required) + * @runtime_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer * @read_reg: Read a MHI register via the physical link (required) -- cgit v1.2.3 From 81aabbb9fb7b4b1efd073b62f0505d3adad442f3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 4 May 2020 10:21:44 -0700 Subject: bpf, sockmap: bpf_tcp_ingress needs to subtract bytes from sg.size In bpf_tcp_ingress we used apply_bytes to subtract bytes from sg.size which is used to track total bytes in a message. But this is not correct because apply_bytes is itself modified in the main loop doing the mem_charge. Then at the end of this we have sg.size incorrectly set and out of sync with actual sk values. Then we can get a splat if we try to cork the data later and again try to redirect the msg to ingress. To fix instead of trying to track msg.size do the easy thing and include it as part of the sk_msg_xfer logic so that when the msg is moved the sg.size is always correct. To reproduce the below users will need ingress + cork and hit an error path that will then try to 'free' the skmsg. [ 173.699981] BUG: KASAN: null-ptr-deref in sk_msg_free_elem+0xdd/0x120 [ 173.699987] Read of size 8 at addr 0000000000000008 by task test_sockmap/5317 [ 173.700000] CPU: 2 PID: 5317 Comm: test_sockmap Tainted: G I 5.7.0-rc1+ #43 [ 173.700005] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [ 173.700009] Call Trace: [ 173.700021] dump_stack+0x8e/0xcb [ 173.700029] ? sk_msg_free_elem+0xdd/0x120 [ 173.700034] ? sk_msg_free_elem+0xdd/0x120 [ 173.700042] __kasan_report+0x102/0x15f [ 173.700052] ? sk_msg_free_elem+0xdd/0x120 [ 173.700060] kasan_report+0x32/0x50 [ 173.700070] sk_msg_free_elem+0xdd/0x120 [ 173.700080] __sk_msg_free+0x87/0x150 [ 173.700094] tcp_bpf_send_verdict+0x179/0x4f0 [ 173.700109] tcp_bpf_sendpage+0x3ce/0x5d0 Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/158861290407.14306.5327773422227552482.stgit@john-Precision-5820-Tower --- include/linux/skmsg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8a709f63c5e5..ad31c9fb7158 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -187,6 +187,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; dst->sg.size += size; + src->sg.size -= size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } -- cgit v1.2.3 From 9274124f023b5c56dc4326637d4f787968b03607 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 May 2020 12:48:54 -0400 Subject: net: stricter validation of untrusted gso packets Syzkaller again found a path to a kernel crash through bad gso input: a packet with transport header extending beyond skb_headlen(skb). Tighten validation at kernel entry: - Verify that the transport header lies within the linear section. To avoid pulling linux/tcp.h, verify just sizeof tcphdr. tcp_gso_segment will call pskb_may_pull (th->doff * 4) before use. - Match the gso_type against the ip_proto found by the flow dissector. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 0d1fe9297ac6..6f6ade63b04c 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include +#include #include static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, @@ -28,17 +30,25 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, bool little_endian) { unsigned int gso_type = 0; + unsigned int thlen = 0; + unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; + ip_proto = IPPROTO_TCP; + thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; + ip_proto = IPPROTO_TCP; + thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; + ip_proto = IPPROTO_UDP; + thlen = sizeof(struct udphdr); break; default: return -EINVAL; @@ -57,16 +67,22 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + + if (skb_transport_offset(skb) + thlen > skb_headlen(skb)) + return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ if (gso_type && skb->network_header) { + struct flow_keys_basic keys; + if (!skb->protocol) virtio_net_hdr_set_proto(skb, hdr); retry: - skb_probe_transport_header(skb); - if (!skb_transport_header_was_set(skb)) { + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, + 0)) { /* UFO does not specify ipv4 or 6: try both */ if (gso_type & SKB_GSO_UDP && skb->protocol == htons(ETH_P_IP)) { @@ -75,6 +91,12 @@ retry: } return -EINVAL; } + + if (keys.control.thoff + thlen > skb_headlen(skb) || + keys.basic.ip_proto != ip_proto) + return -EINVAL; + + skb_set_transport_header(skb, keys.control.thoff); } } -- cgit v1.2.3 From eb7ae5e06bb6e6ac6bb86872d27c43ebab92f6b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:54 +0200 Subject: bdi: move bdi_dev_name out of line bdi_dev_name is not a fast path function, move it out of line. This prepares for using it from modular callers without having to export an implementation detail like bdi_unknown_name. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f88197c1ffc2..c9ad5c3b7b4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -extern const char *bdi_unknown_name; - -static inline const char *bdi_dev_name(struct backing_dev_info *bdi) -{ - if (!bdi || !bdi->dev) - return bdi_unknown_name; - return dev_name(bdi->dev); -} +const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ -- cgit v1.2.3 From 54163a346d4a0a1b93f2ff6dc1f488419a605fa9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 May 2020 08:17:53 -0500 Subject: KVM: Introduce kvm_make_all_cpus_request_except() This allows making request to all other vcpus except the one specified in the parameter. Signed-off-by: Suravee Suthikulpanit Message-Id: <1588771076-73790-2-git-send-email-suravee.suthikulpanit@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 01276e3d01b9..131cc1527d68 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -813,8 +813,11 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); +bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except); bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); -- cgit v1.2.3 From 6bd87eec23cbc9ed222bed0f5b5b02bf300e9a8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:56 +0200 Subject: bdi: add a ->dev_name field to struct backing_dev_info Cache a copy of the name for the life time of the backing_dev_info structure so that we can reference it even after unregistering. Fixes: 68f23b89067f ("memcg: fix a crash in wb_workfn when a device disappears") Reported-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ee577a83cfe6..7367150f962a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -219,6 +219,7 @@ struct backing_dev_info { wait_queue_head_t wb_waitq; struct device *dev; + char dev_name[64]; struct device *owner; struct timer_list laptop_mode_wb_timer; -- cgit v1.2.3 From 2c864c78c2386ada7433268cdfa8cb77cfe31bf3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 May 2020 14:02:15 -0700 Subject: ptp: fix struct member comment for do_aux_work The do_aux_work callback had documentation in the structure comment which referred to it as "do_work". Signed-off-by: Jacob Keller Cc: Richard Cochran Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 121a7eda4593..c602670bbffb 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -105,10 +105,10 @@ struct ptp_system_timestamp { * parameter func: the desired function to use. * parameter chan: the function channel index to use. * - * @do_work: Request driver to perform auxiliary (periodic) operations - * Driver should return delay of the next auxiliary work scheduling - * time (>=0) or negative value in case further scheduling - * is not required. + * @do_aux_work: Request driver to perform auxiliary (periodic) operations + * Driver should return delay of the next auxiliary work + * scheduling time (>=0) or negative value in case further + * scheduling is not required. * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). -- cgit v1.2.3 From 59566b0b622e3e6ea928c0b8cac8a5601b00b383 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 30 Apr 2020 20:21:47 -0400 Subject: x86/ftrace: Have ftrace trampolines turn read-only at the end of system boot up Booting one of my machines, it triggered the following crash: Kernel/User page tables isolation: enabled ftrace: allocating 36577 entries in 143 pages Starting tracer 'function' BUG: unable to handle page fault for address: ffffffffa000005c #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 2014067 P4D 2014067 PUD 2015063 PMD 7b253067 PTE 7b252061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.4.0-test+ #24 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 RIP: 0010:text_poke_early+0x4a/0x58 Code: 34 24 48 89 54 24 08 e8 bf 72 0b 00 48 8b 34 24 48 8b 4c 24 08 84 c0 74 0b 48 89 df f3 a4 48 83 c4 10 5b c3 9c 58 fa 48 89 df a4 50 9d 48 83 c4 10 5b e9 d6 f9 ff ff 0 41 57 49 RSP: 0000:ffffffff82003d38 EFLAGS: 00010046 RAX: 0000000000000046 RBX: ffffffffa000005c RCX: 0000000000000005 RDX: 0000000000000005 RSI: ffffffff825b9a90 RDI: ffffffffa000005c RBP: ffffffffa000005c R08: 0000000000000000 R09: ffffffff8206e6e0 R10: ffff88807b01f4c0 R11: ffffffff8176c106 R12: ffffffff8206e6e0 R13: ffffffff824f2440 R14: 0000000000000000 R15: ffffffff8206eac0 FS: 0000000000000000(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa000005c CR3: 0000000002012000 CR4: 00000000000006b0 Call Trace: text_poke_bp+0x27/0x64 ? mutex_lock+0x36/0x5d arch_ftrace_update_trampoline+0x287/0x2d5 ? ftrace_replace_code+0x14b/0x160 ? ftrace_update_ftrace_func+0x65/0x6c __register_ftrace_function+0x6d/0x81 ftrace_startup+0x23/0xc1 register_ftrace_function+0x20/0x37 func_set_flag+0x59/0x77 __set_tracer_option.isra.19+0x20/0x3e trace_set_options+0xd6/0x13e apply_trace_boot_options+0x44/0x6d register_tracer+0x19e/0x1ac early_trace_init+0x21b/0x2c9 start_kernel+0x241/0x518 ? load_ucode_intel_bsp+0x21/0x52 secondary_startup_64+0xa4/0xb0 I was able to trigger it on other machines, when I added to the kernel command line of both "ftrace=function" and "trace_options=func_stack_trace". The cause is the "ftrace=function" would register the function tracer and create a trampoline, and it will set it as executable and read-only. Then the "trace_options=func_stack_trace" would then update the same trampoline to include the stack tracer version of the function tracer. But since the trampoline already exists, it updates it with text_poke_bp(). The problem is that text_poke_bp() called while system_state == SYSTEM_BOOTING, it will simply do a memcpy() and not the page mapping, as it would think that the text is still read-write. But in this case it is not, and we take a fault and crash. Instead, lets keep the ftrace trampolines read-write during boot up, and then when the kernel executable text is set to read-only, the ftrace trampolines get set to read-only as well. Link: https://lkml.kernel.org/r/20200430202147.4dc6e2de@oasis.local.home Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ab4bd15cbcdb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -210,6 +210,29 @@ struct ftrace_ops { #endif }; +extern struct ftrace_ops __rcu *ftrace_ops_list; +extern struct ftrace_ops ftrace_list_end; + +/* + * Traverse the ftrace_global_list, invoking all entries. The reason that we + * can use rcu_dereference_raw_check() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw_check() calls are needed to handle + * concurrent insertions into the ftrace_global_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ +#define do_for_each_ftrace_op(op, list) \ + op = rcu_dereference_raw_check(list); \ + do + +/* + * Optimized for just a single item in the list (as that is the normal case). + */ +#define while_for_each_ftrace_op(op) \ + while (likely(op = rcu_dereference_raw_check((op)->next)) && \ + unlikely((op) != &ftrace_list_end)) + /* * Type of the current tracing. */ -- cgit v1.2.3 From 3d8c11efd528d56972d44ed0de51c4e11a9a4fa9 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 12 May 2020 13:55:02 +0900 Subject: efi: cper: Add support for printing Firmware Error Record Reference While debugging a boot failure, the following unknown error record was seen in the boot logs. <...> BERT: Error records from previous boot: [Hardware Error]: event severity: fatal [Hardware Error]: Error 0, type: fatal [Hardware Error]: section type: unknown, 81212a96-09ed-4996-9471-8d729c8e69ed [Hardware Error]: section length: 0x290 [Hardware Error]: 00000000: 00000001 00000000 00000000 00020002 ................ [Hardware Error]: 00000010: 00020002 0000001f 00000320 00000000 ........ ....... [Hardware Error]: 00000020: 00000000 00000000 00000000 00000000 ................ [Hardware Error]: 00000030: 00000000 00000000 00000000 00000000 ................ <...> On further investigation, it was found that the error record with UUID (81212a96-09ed-4996-9471-8d729c8e69ed) has been defined in the UEFI Specification at least since v2.4 and has recently had additional fields defined in v2.7 Section N.2.10 Firmware Error Record Reference. Add support for parsing and printing the defined fields to give users a chance to figure out what went wrong. Signed-off-by: Punit Agrawal Cc: Ard Biesheuvel Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: James Morse Cc: linux-acpi@vger.kernel.org Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20200512045502.3810339-1-punit1.agrawal@toshiba.co.jp Signed-off-by: Ard Biesheuvel --- include/linux/cper.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cper.h b/include/linux/cper.h index 4f005d95ce88..8537e9282a65 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -521,6 +521,15 @@ struct cper_sec_pcie { u8 aer_info[96]; }; +/* Firmware Error Record Reference, UEFI v2.7 sec N.2.10 */ +struct cper_sec_fw_err_rec_ref { + u8 record_type; + u8 revision; + u8 reserved[6]; + u64 record_identifier; + guid_t record_identifier_guid; +}; + /* Reset to default packing */ #pragma pack() -- cgit v1.2.3 From 04fd61a4e01028210a91f0efc408c8bc61a3018c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 13 May 2020 17:50:34 -0700 Subject: mm, memcg: fix inconsistent oom event behavior A recent commit 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") changed the behavior of memcg events, which will now consider subtrees in memory.events. But oom_kill event is a special one as it is used in both cgroup1 and cgroup2. In cgroup1, it is displayed in memory.oom_control. The file memory.oom_control is in both root memcg and non root memcg, that is different with memory.event as it only in non-root memcg. That commit is okay for cgroup2, but it is not okay for cgroup1 as it will cause inconsistent behavior between root memcg and non-root memcg. Here's an example on why this behavior is inconsistent in cgroup1. root memcg / memcg foo / memcg bar Suppose there's an oom_kill in memcg bar, then the oon_kill will be root memcg : memory.oom_control(oom_kill) 0 / memcg foo : memory.oom_control(oom_kill) 1 / memcg bar : memory.oom_control(oom_kill) 1 For the non-root memcg, its memory.oom_control(oom_kill) includes its descendants' oom_kill, but for root memcg, it doesn't include its descendants' oom_kill. That means, memory.oom_control(oom_kill) has different meanings in different memcgs. That is inconsistent. Then the user has to know whether the memcg is root or not. If we can't fully support it in cgroup1, for example by adding memory.events.local into cgroup1 as well, then let's don't touch its original behavior. Fixes: 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") Reported-by: Randy Dunlap Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Chris Down Acked-by: Michal Hocko Cc: Link: http://lkml.kernel.org/r/20200502141055.7378-1-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d275c72c4f8e..977edd3b7bd8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -783,6 +783,8 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, atomic_long_inc(&memcg->memory_events[event]); cgroup_file_notify(&memcg->events_file); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + break; if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && -- cgit v1.2.3 From 625236ba3832ae947cb3ebb7acc1f30788b274ef Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 12 May 2020 19:46:07 +0200 Subject: security: Fix the default value of secid_to_secctx hook security_secid_to_secctx is called by the bpf_lsm hook and a successful return value (i.e 0) implies that the parameter will be consumed by the LSM framework. The current behaviour return success when the pointer isn't initialized when CONFIG_BPF_LSM is enabled, with the default return from kernel/bpf/bpf_lsm.c. This is the internal error: [ 1229.341488][ T2659] usercopy: Kernel memory exposure attempt detected from null address (offset 0, size 280)! [ 1229.374977][ T2659] ------------[ cut here ]------------ [ 1229.376813][ T2659] kernel BUG at mm/usercopy.c:99! [ 1229.378398][ T2659] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 1229.380348][ T2659] Modules linked in: [ 1229.381654][ T2659] CPU: 0 PID: 2659 Comm: systemd-journal Tainted: G B W 5.7.0-rc5-next-20200511-00019-g864e0c6319b8-dirty #13 [ 1229.385429][ T2659] Hardware name: linux,dummy-virt (DT) [ 1229.387143][ T2659] pstate: 80400005 (Nzcv daif +PAN -UAO BTYPE=--) [ 1229.389165][ T2659] pc : usercopy_abort+0xc8/0xcc [ 1229.390705][ T2659] lr : usercopy_abort+0xc8/0xcc [ 1229.392225][ T2659] sp : ffff000064247450 [ 1229.393533][ T2659] x29: ffff000064247460 x28: 0000000000000000 [ 1229.395449][ T2659] x27: 0000000000000118 x26: 0000000000000000 [ 1229.397384][ T2659] x25: ffffa000127049e0 x24: ffffa000127049e0 [ 1229.399306][ T2659] x23: ffffa000127048e0 x22: ffffa000127048a0 [ 1229.401241][ T2659] x21: ffffa00012704b80 x20: ffffa000127049e0 [ 1229.403163][ T2659] x19: ffffa00012704820 x18: 0000000000000000 [ 1229.405094][ T2659] x17: 0000000000000000 x16: 0000000000000000 [ 1229.407008][ T2659] x15: 0000000000000000 x14: 003d090000000000 [ 1229.408942][ T2659] x13: ffff80000d5b25b2 x12: 1fffe0000d5b25b1 [ 1229.410859][ T2659] x11: 1fffe0000d5b25b1 x10: ffff80000d5b25b1 [ 1229.412791][ T2659] x9 : ffffa0001034bee0 x8 : ffff00006ad92d8f [ 1229.414707][ T2659] x7 : 0000000000000000 x6 : ffffa00015eacb20 [ 1229.416642][ T2659] x5 : ffff0000693c8040 x4 : 0000000000000000 [ 1229.418558][ T2659] x3 : ffffa0001034befc x2 : d57a7483a01c6300 [ 1229.420610][ T2659] x1 : 0000000000000000 x0 : 0000000000000059 [ 1229.422526][ T2659] Call trace: [ 1229.423631][ T2659] usercopy_abort+0xc8/0xcc [ 1229.425091][ T2659] __check_object_size+0xdc/0x7d4 [ 1229.426729][ T2659] put_cmsg+0xa30/0xa90 [ 1229.428132][ T2659] unix_dgram_recvmsg+0x80c/0x930 [ 1229.429731][ T2659] sock_recvmsg+0x9c/0xc0 [ 1229.431123][ T2659] ____sys_recvmsg+0x1cc/0x5f8 [ 1229.432663][ T2659] ___sys_recvmsg+0x100/0x160 [ 1229.434151][ T2659] __sys_recvmsg+0x110/0x1a8 [ 1229.435623][ T2659] __arm64_sys_recvmsg+0x58/0x70 [ 1229.437218][ T2659] el0_svc_common.constprop.1+0x29c/0x340 [ 1229.438994][ T2659] do_el0_svc+0xe8/0x108 [ 1229.440587][ T2659] el0_svc+0x74/0x88 [ 1229.441917][ T2659] el0_sync_handler+0xe4/0x8b4 [ 1229.443464][ T2659] el0_sync+0x17c/0x180 [ 1229.444920][ T2659] Code: aa1703e2 aa1603e1 910a8260 97ecc860 (d4210000) [ 1229.447070][ T2659] ---[ end trace 400497d91baeaf51 ]--- [ 1229.448791][ T2659] Kernel panic - not syncing: Fatal exception [ 1229.450692][ T2659] Kernel Offset: disabled [ 1229.452061][ T2659] CPU features: 0x240002,20002004 [ 1229.453647][ T2659] Memory Limit: none [ 1229.455015][ T2659] ---[ end Kernel panic - not syncing: Fatal exception ]--- Rework the so the default return value is -EOPNOTSUPP. There are likely other callbacks such as security_inode_getsecctx() that may have the same problem, and that someone that understand the code better needs to audit them. Thank you Arnd for helping me figure out what went wrong. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Anders Roxell Signed-off-by: Alexei Starovoitov Acked-by: James Morris Cc: Arnd Bergmann Link: https://lore.kernel.org/bpf/20200512174607.9630-1-anders.roxell@linaro.org --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..21f4fff9e4cd 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -243,7 +243,7 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, 0, secid_to_secctx, u32 secid, char **secdata, +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) -- cgit v1.2.3 From cc8a677a76f419016b5e231207d09b073f9b1d3f Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 14 May 2020 08:57:33 +0800 Subject: net: phy: broadcom: fix BCM54XX_SHD_SCR3_TRDDAPD value for BCM54810 Set the correct bit when checking for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54810 PHY. Fixes: 0ececcfc9267 ("net: phy: broadcom: Allow BCM54810 to use bcm54xx_adjust_rxrefclk()") Signed-off-by: Kevin Lo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6462c5447872..f4b77018c625 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -245,6 +245,7 @@ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +#define BCM54810_SHD_SCR3_TRDDAPD 0x0100 /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) -- cgit v1.2.3 From 8695e0b1b964f6d7caee667f14dceb7e8a4a3b3c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:53:29 -0500 Subject: i2c: mux: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c-mux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index c5a977320f82..98ef73b7c8fd 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -29,7 +29,7 @@ struct i2c_mux_core { int num_adapters; int max_adapters; - struct i2c_adapter *adapter[0]; + struct i2c_adapter *adapter[]; }; struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, -- cgit v1.2.3 From a9a3ed1eff3601b63aea4fb462d8b3b92c7c1e7e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 22 Apr 2020 18:11:30 +0200 Subject: x86: Fix early boot crash on gcc-10, third try MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... or the odyssey of trying to disable the stack protector for the function which generates the stack canary value. The whole story started with Sergei reporting a boot crash with a kernel built with gcc-10: Kernel panic — not syncing: stack-protector: Kernel stack is corrupted in: start_secondary CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.6.0-rc5—00235—gfffb08b37df9 #139 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./H77M—D3H, BIOS F12 11/14/2013 Call Trace: dump_stack panic ? start_secondary __stack_chk_fail start_secondary secondary_startup_64 -—-[ end Kernel panic — not syncing: stack—protector: Kernel stack is corrupted in: start_secondary This happens because gcc-10 tail-call optimizes the last function call in start_secondary() - cpu_startup_entry() - and thus emits a stack canary check which fails because the canary value changes after the boot_init_stack_canary() call. To fix that, the initial attempt was to mark the one function which generates the stack canary with: __attribute__((optimize("-fno-stack-protector"))) ... start_secondary(void *unused) however, using the optimize attribute doesn't work cumulatively as the attribute does not add to but rather replaces previously supplied optimization options - roughly all -fxxx options. The key one among them being -fno-omit-frame-pointer and thus leading to not present frame pointer - frame pointer which the kernel needs. The next attempt to prevent compilers from tail-call optimizing the last function call cpu_startup_entry(), shy of carving out start_secondary() into a separate compilation unit and building it with -fno-stack-protector, was to add an empty asm(""). This current solution was short and sweet, and reportedly, is supported by both compilers but we didn't get very far this time: future (LTO?) optimization passes could potentially eliminate this, which leads us to the third attempt: having an actual memory barrier there which the compiler cannot ignore or move around etc. That should hold for a long time, but hey we said that about the other two solutions too so... Reported-by: Sergei Trofimovich Signed-off-by: Borislav Petkov Tested-by: Kalle Valo Cc: Link: https://lkml.kernel.org/r/20200314164451.346497-1-slyfox@gentoo.org --- include/linux/compiler.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..448c91bf543b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -356,4 +356,10 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * This is needed in functions which generate the stack canary, see + * arch/x86/kernel/smpboot.c::start_secondary() for an example. + */ +#define prevent_tail_call_optimization() mb() + #endif /* __LINUX_COMPILER_H */ -- cgit v1.2.3 From e8da08a088236aff4b51d4ec97c750051f9fe417 Mon Sep 17 00:00:00 2001 From: Benjamin Thiel Date: Sat, 16 May 2020 15:26:47 +0200 Subject: efi: Pull up arch-specific prototype efi_systab_show_arch() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull up arch-specific prototype efi_systab_show_arch() in order to fix a -Wmissing-prototypes warning: arch/x86/platform/efi/efi.c:957:7: warning: no previous prototype for ‘efi_systab_show_arch’ [-Wmissing-prototypes] char *efi_systab_show_arch(char *str) Signed-off-by: Benjamin Thiel Link: https://lore.kernel.org/r/20200516132647.14568-1-b.thiel@posteo.de Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 251f1f783cdf..9430d01c0c3d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1245,4 +1245,6 @@ struct linux_efi_memreserve { void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); +char *efi_systab_show_arch(char *str); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 17d00e839d3b592da9659c1977d45f85b77f986a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 27 Dec 2019 07:01:53 +0200 Subject: net/mlx5: Add command entry handling completion When FW response to commands is very slow and all command entries in use are waiting for completion we can have a race where commands can get timeout before they get out of the queue and handled. Timeout completion on uninitialized command will cause releasing command's buffers before accessing it for initialization and then we will get NULL pointer exception while trying access it. It may also cause releasing buffers of another command since we may have timeout completion before even allocating entry index for this command. Add entry handling completion to avoid this race. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6f8f79ef829b..9b1f29f26c27 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -743,6 +743,7 @@ struct mlx5_cmd_work_ent { struct delayed_work cb_timeout_work; void *context; int idx; + struct completion handling; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; -- cgit v1.2.3 From d43b7007dbd1195a5b6b83213e49b1516aaf6f5e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 18 Mar 2020 21:44:32 +0200 Subject: net/mlx5: Fix a race when moving command interface to events mode After driver creates (via FW command) an EQ for commands, the driver will be informed on new commands completion by EQE. However, due to a race in driver's internal command mode metadata update, some new commands will still be miss-handled by driver as if we are in polling mode. Such commands can get two non forced completion, leading to already freed command entry access. CREATE_EQ command, that maps EQ to the command queue must be posted to the command queue while it is empty and no other command should be posted. Add SW mechanism that once the CREATE_EQ command is about to be executed, all other commands will return error without being sent to the FW. Allow sending other commands only after successfully changing the driver's internal command mode metadata. We can safely return error to all other commands while creating the command EQ, as all other commands might be sent from the user/application during driver load. Application can rerun them later after driver's load was finished. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9b1f29f26c27..c03778c75dfa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -284,6 +284,7 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; int mode; + u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct dma_pool *pool; struct mlx5_cmd_debug dbg; @@ -875,10 +876,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); } +enum { + CMD_ALLOWED_OPCODE_ALL, +}; + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; -- cgit v1.2.3 From f7936ddd35d8b849daf0372770c7c9dbe7910fca Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 19 Mar 2020 21:43:13 +0200 Subject: net/mlx5: Avoid processing commands before cmdif is ready When driver is reloading during recovery flow, it can't get new commands till command interface is up again. Otherwise we may get to null pointer trying to access non initialized command structures. Add cmdif state to avoid processing commands while cmdif is not ready. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c03778c75dfa..8397b6558dc7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -213,6 +213,12 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; +enum mlx5_cmdif_state { + MLX5_CMDIF_STATE_UNINITIALIZED, + MLX5_CMDIF_STATE_UP, + MLX5_CMDIF_STATE_DOWN, +}; + struct mlx5_cmd_first { __be32 data[4]; }; @@ -258,6 +264,7 @@ struct mlx5_cmd_stats { struct mlx5_cmd { struct mlx5_nb nb; + enum mlx5_cmdif_state state; void *cmd_alloc_buf; dma_addr_t alloc_dma; int alloc_size; @@ -882,6 +889,8 @@ enum { int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); -- cgit v1.2.3 From d031781bdabe1027858a3220f868866586bf6e7c Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 6 May 2020 03:24:30 -0700 Subject: ieee80211: Fix incorrect mask for default PE duration Fixes bitmask for HE opration's default PE duration. Fixes: daa5b83513a7 ("mac80211: update HE operation fields to D3.0") Signed-off-by: Pradeep Kumar Chitrapu Link: https://lore.kernel.org/r/20200506102430.5153-1-pradeepc@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 16268ef1cbcc..5d3e48d02033 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2047,7 +2047,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) } /* HE Operation defines */ -#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000003 +#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000007 #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000008 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x00003ff0 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 4 -- cgit v1.2.3 From 4c559f15efcc43b996f4da528cd7f9483aaca36d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 May 2020 14:14:23 +0200 Subject: netfilter: nf_conntrack_pptp: prevent buffer overflows in debug code Dan Carpenter says: "Smatch complains that the value for "cmd" comes from the network and can't be trusted." Add pptp_msg_name() helper function that checks for the array boundary. Fixes: f09943fefe6b ("[NETFILTER]: nf_conntrack/nf_nat: add PPTP helper port") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_pptp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index fcc409de31a4..6a4ff6d5ebc2 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -10,7 +10,7 @@ #include #include -extern const char *const pptp_msg_name[]; +extern const char *const pptp_msg_name(u_int16_t msg); /* state of the control session */ enum pptp_ctrlsess_state { -- cgit v1.2.3 From 6dd912f82680761d8fb6b1bb274a69d4c7010988 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 25 May 2020 15:07:40 -0400 Subject: net: check untrusted gso_size at kernel entry Syzkaller again found a path to a kernel crash through bad gso input: a packet with gso size exceeding len. These packets are dropped in tcp_gso_segment and udp[46]_ufo_fragment. But they may affect gso size calculations earlier in the path. Now that we have thlen as of commit 9274124f023b ("net: stricter validation of untrusted gso packets"), check gso_size at entry too. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 6f6ade63b04c..88997022a4b5 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -31,6 +31,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, { unsigned int gso_type = 0; unsigned int thlen = 0; + unsigned int p_off = 0; unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { @@ -68,7 +69,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; - if (skb_transport_offset(skb) + thlen > skb_headlen(skb)) + p_off = skb_transport_offset(skb) + thlen; + if (p_off > skb_headlen(skb)) return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. @@ -92,17 +94,25 @@ retry: return -EINVAL; } - if (keys.control.thoff + thlen > skb_headlen(skb) || + p_off = keys.control.thoff + thlen; + if (p_off > skb_headlen(skb) || keys.basic.ip_proto != ip_proto) return -EINVAL; skb_set_transport_header(skb, keys.control.thoff); + } else if (gso_type) { + p_off = thlen; + if (p_off > skb_headlen(skb)) + return -EINVAL; } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + if (skb->len - p_off <= gso_size) + return -EINVAL; + skb_shinfo(skb)->gso_size = gso_size; skb_shinfo(skb)->gso_type = gso_type; -- cgit v1.2.3 From 4946ea5c1237036155c3b3a24f049fd5f849f8f6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 27 May 2020 12:24:10 +0200 Subject: netfilter: nf_conntrack_pptp: fix compilation warning with W=1 build >> include/linux/netfilter/nf_conntrack_pptp.h:13:20: warning: 'const' type qualifier on return type has no effect [-Wignored-qualifiers] extern const char *const pptp_msg_name(u_int16_t msg); ^~~~~~ Reported-by: kbuild test robot Fixes: 4c559f15efcc ("netfilter: nf_conntrack_pptp: prevent buffer overflows in debug code") Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_pptp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 6a4ff6d5ebc2..a28aa289afdc 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -10,7 +10,7 @@ #include #include -extern const char *const pptp_msg_name(u_int16_t msg); +const char *pptp_msg_name(u_int16_t msg); /* state of the control session */ enum pptp_ctrlsess_state { -- cgit v1.2.3 From f17936993af054b16725d0c54baa58115f9e052a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 27 May 2020 15:54:55 +0300 Subject: fanotify: turn off support for FAN_DIR_MODIFY FAN_DIR_MODIFY has been enabled by commit 44d705b0370b ("fanotify: report name info for FAN_DIR_MODIFY event") in 5.7-rc1. Now we are planning further extensions to the fanotify API and during that we realized that FAN_DIR_MODIFY may behave slightly differently to be more consistent with extensions we plan. So until we finalize these extensions, let's not bind our hands with exposing FAN_DIR_MODIFY to userland. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3049a6c06d9e..b79fa9bb7359 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,8 +47,7 @@ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ - FAN_DIR_MODIFY) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ -- cgit v1.2.3 From 6988f31d558aa8c744464a7f6d91d34ada48ad12 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 27 May 2020 22:20:47 -0700 Subject: mm: remove VM_BUG_ON(PageSlab()) from page_mapcount() Replace superfluous VM_BUG_ON() with comment about correct usage. Technically reverts commit 1d148e218a0d ("mm: add VM_BUG_ON_PAGE() to page_mapcount()"), but context lines have changed. Function isolate_migratepages_block() runs some checks out of lru_lock when choose pages for migration. After checking PageLRU() it checks extra page references by comparing page_count() and page_mapcount(). Between these two checks page could be removed from lru, freed and taken by slab. As a result this race triggers VM_BUG_ON(PageSlab()) in page_mapcount(). Race window is tiny. For certain workload this happens around once a year. page:ffffea0105ca9380 count:1 mapcount:0 mapping:ffff88ff7712c180 index:0x0 compound_mapcount: 0 flags: 0x500000000008100(slab|head) raw: 0500000000008100 dead000000000100 dead000000000200 ffff88ff7712c180 raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 page dumped because: VM_BUG_ON_PAGE(PageSlab(page)) ------------[ cut here ]------------ kernel BUG at ./include/linux/mm.h:628! invalid opcode: 0000 [#1] SMP NOPTI CPU: 77 PID: 504 Comm: kcompactd1 Tainted: G W 4.19.109-27 #1 Hardware name: Yandex T175-N41-Y3N/MY81-EX0-Y3N, BIOS R05 06/20/2019 RIP: 0010:isolate_migratepages_block+0x986/0x9b0 The code in isolate_migratepages_block() was added in commit 119d6d59dcc0 ("mm, compaction: avoid isolating pinned pages") before adding VM_BUG_ON into page_mapcount(). This race has been predicted in 2015 by Vlastimil Babka (see link below). [akpm@linux-foundation.org: comment tweaks, per Hugh] Fixes: 1d148e218a0d ("mm: add VM_BUG_ON_PAGE() to page_mapcount()") Signed-off-by: Konstantin Khlebnikov Signed-off-by: Andrew Morton Acked-by: Hugh Dickins Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Link: http://lkml.kernel.org/r/159032779896.957378.7852761411265662220.stgit@buzz Link: https://lore.kernel.org/lkml/557710E1.6060103@suse.cz/ Link: https://lore.kernel.org/linux-mm/158937872515.474360.5066096871639561424.stgit@buzz/T/ (v1) Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..f3fe7371855c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -782,6 +782,11 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); +/* + * Mapcount of compound page as a whole, does not include mapped sub-pages. + * + * Must be called only for compound pages or any their tail sub-pages. + */ static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -801,10 +806,16 @@ static inline void page_mapcount_reset(struct page *page) int __page_mapcount(struct page *page); +/* + * Mapcount of 0-order page; when compound sub-page, includes + * compound_mapcount(). + * + * Result is undefined for pages which cannot be mapped into userspace. + * For example SLAB or special types of pages. See function page_has_type(). + * They use this place in struct page differently. + */ static inline int page_mapcount(struct page *page) { - VM_BUG_ON_PAGE(PageSlab(page), page); - if (unlikely(PageCompound(page))) return __page_mapcount(page); return atomic_read(&page->_mapcount) + 1; -- cgit v1.2.3 From 7c6d2ecbda83150b2036a2b36b21381ad4667762 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 May 2020 14:57:47 -0700 Subject: net: be more gentle about silly gso requests coming from user Recent change in virtio_net_hdr_to_skb() broke some packetdrill tests. When --mss=XXX option is set, packetdrill always provide gso_type & gso_size for its inbound packets, regardless of packet size. if (packet->tcp && packet->mss) { if (packet->ipv4) gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; gso.gso_size = packet->mss; } Since many other programs could do the same, relax virtio_net_hdr_to_skb() to no longer return an error, but instead ignore gso settings. This keeps Willem intent to make sure no malicious packet could reach gso stack. Note that TCP stack has a special logic in tcp_set_skb_tso_segs() to clear gso_size for small packets. Fixes: 6dd912f82680 ("net: check untrusted gso_size at kernel entry") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 88997022a4b5..e8a924eeea3d 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -109,16 +109,17 @@ retry: if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + struct skb_shared_info *shinfo = skb_shinfo(skb); - if (skb->len - p_off <= gso_size) - return -EINVAL; - - skb_shinfo(skb)->gso_size = gso_size; - skb_shinfo(skb)->gso_type = gso_type; + /* Too small packets are not really GSO ones. */ + if (skb->len - p_off > gso_size) { + shinfo->gso_size = gso_size; + shinfo->gso_type = gso_type; - /* Header must be checked, and gso_segs computed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; + /* Header must be checked, and gso_segs computed. */ + shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_segs = 0; + } } return 0; -- cgit v1.2.3 From 53aab92dec447f93489e07924e310d605a389dea Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Jul 2020 10:17:16 -0700 Subject: Input: synaptics-rmi4 - drop a duplicated word Drop the repeated word "to" in a comment. Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20200719003131.21050-1-rdunlap@infradead.org Signed-off-by: Dmitry Torokhov --- include/linux/rmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmi.h b/include/linux/rmi.h index 7b22366d0065..8ed37f93f3c8 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -206,7 +206,7 @@ struct rmi_device_platform_data_spi { * * @reset_delay_ms - after issuing a reset command to the touch sensor, the * driver waits a few milliseconds to give the firmware a chance to - * to re-initialize. You can override the default wait period here. + * re-initialize. You can override the default wait period here. * @irq: irq associated with the attn gpio line, or negative */ struct rmi_device_platform_data { -- cgit v1.2.3