From a9d887dc1c60ed67f2271d66560cdcf864c4a578 Mon Sep 17 00:00:00 2001 From: Guru Das Srinagesh Date: Tue, 2 Jun 2020 15:31:16 -0700 Subject: pwm: Convert period and duty cycle to u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because period and duty cycle are defined as ints with units of nanoseconds, the maximum time duration that can be set is limited to ~2.147 seconds. Change their definitions to u64 in the structs of the PWM framework so that higher durations may be set. Also use the right format specifiers in debug prints in both core.c, pwm-stm32-lp.c as well as video/fbdev/ssd1307fb.c. Reported-by: kbuild test robot Signed-off-by: Guru Das Srinagesh Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2635b2a55090..a13ff383fa1d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -39,7 +39,7 @@ enum pwm_polarity { * current PWM hardware state. */ struct pwm_args { - unsigned int period; + u64 period; enum pwm_polarity polarity; }; @@ -56,8 +56,8 @@ enum { * @enabled: PWM enabled status */ struct pwm_state { - unsigned int period; - unsigned int duty_cycle; + u64 period; + u64 duty_cycle; enum pwm_polarity polarity; bool enabled; }; @@ -107,13 +107,13 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm) return state.enabled; } -static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) +static inline void pwm_set_period(struct pwm_device *pwm, u64 period) { if (pwm) pwm->state.period = period; } -static inline unsigned int pwm_get_period(const struct pwm_device *pwm) +static inline u64 pwm_get_period(const struct pwm_device *pwm) { struct pwm_state state; @@ -128,7 +128,7 @@ static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) pwm->state.duty_cycle = duty; } -static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) +static inline u64 pwm_get_duty_cycle(const struct pwm_device *pwm) { struct pwm_state state; -- cgit v1.2.3 From e0bcc58d876c6ece9720310509a908b7637e37cf Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 3 Jun 2020 14:54:36 +0200 Subject: mfd: stm32: Add defines to be used for clkevent purpose Add defines to be able to enable/clear irq and configure one shot mode. Signed-off-by: Benjamin Gaignard Signed-off-by: Lee Jones --- include/linux/mfd/stm32-lptimer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 605f62264825..90b20550c1c8 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -27,10 +27,15 @@ #define STM32_LPTIM_CMPOK BIT(3) /* STM32_LPTIM_ICR - bit fields */ +#define STM32_LPTIM_ARRMCF BIT(1) #define STM32_LPTIM_CMPOKCF_ARROKCF GENMASK(4, 3) +/* STM32_LPTIM_IER - bit flieds */ +#define STM32_LPTIM_ARRMIE BIT(1) + /* STM32_LPTIM_CR - bit fields */ #define STM32_LPTIM_CNTSTRT BIT(2) +#define STM32_LPTIM_SNGSTRT BIT(1) #define STM32_LPTIM_ENABLE BIT(0) /* STM32_LPTIM_CFGR - bit fields */ -- cgit v1.2.3 From 7f8a137f736f7366820c485c5a0d34d65b9d0125 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 15 Jun 2020 14:53:22 +0100 Subject: mfd: madera: Remove unused forward declaration of madera_codec_pdata This forward declaration is redundant since the header including the full data structure is included. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/madera/pdata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h index fa9595dd42ba..601cbbc10370 100644 --- a/include/linux/mfd/madera/pdata.h +++ b/include/linux/mfd/madera/pdata.h @@ -21,7 +21,6 @@ struct gpio_desc; struct pinctrl_map; -struct madera_codec_pdata; /** * struct madera_pdata - Configuration data for Madera devices -- cgit v1.2.3 From 6c27219e34911601955b72c754adfc11c527ba7b Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 8 Jun 2020 11:17:36 +0200 Subject: mfd: Add support for the Khadas System control Microcontroller This Microcontroller is present on the Khadas VIM1, VIM2, VIM3 and Edge boards. It has multiple boot control features like password check, power-on options, power-off control and system FAN control on recent boards. This implements a very basic MFD driver with the fan control and User NVMEM cells. Signed-off-by: Neil Armstrong Signed-off-by: Lee Jones --- include/linux/mfd/khadas-mcu.h | 91 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/mfd/khadas-mcu.h (limited to 'include/linux') diff --git a/include/linux/mfd/khadas-mcu.h b/include/linux/mfd/khadas-mcu.h new file mode 100644 index 000000000000..a99ba2ed0e4e --- /dev/null +++ b/include/linux/mfd/khadas-mcu.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Khadas System control Microcontroller Register map + * + * Copyright (C) 2020 BayLibre SAS + * + * Author(s): Neil Armstrong + */ + +#ifndef MFD_KHADAS_MCU_H +#define MFD_KHADAS_MCU_H + +#define KHADAS_MCU_PASSWD_VEN_0_REG 0x00 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_1_REG 0x01 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_2_REG 0x02 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_3_REG 0x03 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_4_REG 0x04 /* RO */ +#define KHADAS_MCU_PASSWD_VEN_5_REG 0x05 /* RO */ +#define KHADAS_MCU_MAC_0_REG 0x06 /* RO */ +#define KHADAS_MCU_MAC_1_REG 0x07 /* RO */ +#define KHADAS_MCU_MAC_2_REG 0x08 /* RO */ +#define KHADAS_MCU_MAC_3_REG 0x09 /* RO */ +#define KHADAS_MCU_MAC_4_REG 0x0a /* RO */ +#define KHADAS_MCU_MAC_5_REG 0x0b /* RO */ +#define KHADAS_MCU_USID_0_REG 0x0c /* RO */ +#define KHADAS_MCU_USID_1_REG 0x0d /* RO */ +#define KHADAS_MCU_USID_2_REG 0x0e /* RO */ +#define KHADAS_MCU_USID_3_REG 0x0f /* RO */ +#define KHADAS_MCU_USID_4_REG 0x10 /* RO */ +#define KHADAS_MCU_USID_5_REG 0x11 /* RO */ +#define KHADAS_MCU_VERSION_0_REG 0x12 /* RO */ +#define KHADAS_MCU_VERSION_1_REG 0x13 /* RO */ +#define KHADAS_MCU_DEVICE_NO_0_REG 0x14 /* RO */ +#define KHADAS_MCU_DEVICE_NO_1_REG 0x15 /* RO */ +#define KHADAS_MCU_FACTORY_TEST_REG 0x16 /* R */ +#define KHADAS_MCU_BOOT_MODE_REG 0x20 /* RW */ +#define KHADAS_MCU_BOOT_EN_WOL_REG 0x21 /* RW */ +#define KHADAS_MCU_BOOT_EN_RTC_REG 0x22 /* RW */ +#define KHADAS_MCU_BOOT_EN_EXP_REG 0x23 /* RW */ +#define KHADAS_MCU_BOOT_EN_IR_REG 0x24 /* RW */ +#define KHADAS_MCU_BOOT_EN_DCIN_REG 0x25 /* RW */ +#define KHADAS_MCU_BOOT_EN_KEY_REG 0x26 /* RW */ +#define KHADAS_MCU_KEY_MODE_REG 0x27 /* RW */ +#define KHADAS_MCU_LED_MODE_ON_REG 0x28 /* RW */ +#define KHADAS_MCU_LED_MODE_OFF_REG 0x29 /* RW */ +#define KHADAS_MCU_SHUTDOWN_NORMAL_REG 0x2c /* RW */ +#define KHADAS_MCU_MAC_SWITCH_REG 0x2d /* RW */ +#define KHADAS_MCU_MCU_SLEEP_MODE_REG 0x2e /* RW */ +#define KHADAS_MCU_IR_CODE1_0_REG 0x2f /* RW */ +#define KHADAS_MCU_IR_CODE1_1_REG 0x30 /* RW */ +#define KHADAS_MCU_IR_CODE1_2_REG 0x31 /* RW */ +#define KHADAS_MCU_IR_CODE1_3_REG 0x32 /* RW */ +#define KHADAS_MCU_USB_PCIE_SWITCH_REG 0x33 /* RW */ +#define KHADAS_MCU_IR_CODE2_0_REG 0x34 /* RW */ +#define KHADAS_MCU_IR_CODE2_1_REG 0x35 /* RW */ +#define KHADAS_MCU_IR_CODE2_2_REG 0x36 /* RW */ +#define KHADAS_MCU_IR_CODE2_3_REG 0x37 /* RW */ +#define KHADAS_MCU_PASSWD_USER_0_REG 0x40 /* RW */ +#define KHADAS_MCU_PASSWD_USER_1_REG 0x41 /* RW */ +#define KHADAS_MCU_PASSWD_USER_2_REG 0x42 /* RW */ +#define KHADAS_MCU_PASSWD_USER_3_REG 0x43 /* RW */ +#define KHADAS_MCU_PASSWD_USER_4_REG 0x44 /* RW */ +#define KHADAS_MCU_PASSWD_USER_5_REG 0x45 /* RW */ +#define KHADAS_MCU_USER_DATA_0_REG 0x46 /* RW 56 bytes */ +#define KHADAS_MCU_PWR_OFF_CMD_REG 0x80 /* WO */ +#define KHADAS_MCU_PASSWD_START_REG 0x81 /* WO */ +#define KHADAS_MCU_CHECK_VEN_PASSWD_REG 0x82 /* WO */ +#define KHADAS_MCU_CHECK_USER_PASSWD_REG 0x83 /* WO */ +#define KHADAS_MCU_SHUTDOWN_NORMAL_STATUS_REG 0x86 /* RO */ +#define KHADAS_MCU_WOL_INIT_START_REG 0x87 /* WO */ +#define KHADAS_MCU_CMD_FAN_STATUS_CTRL_REG 0x88 /* WO */ + +enum { + KHADAS_BOARD_VIM1 = 0x1, + KHADAS_BOARD_VIM2, + KHADAS_BOARD_VIM3, + KHADAS_BOARD_EDGE = 0x11, + KHADAS_BOARD_EDGE_V, +}; + +/** + * struct khadas_mcu - Khadas MCU structure + * @device: device reference used for logs + * @regmap: register map + */ +struct khadas_mcu { + struct device *dev; + struct regmap *regmap; +}; + +#endif /* MFD_KHADAS_MCU_H */ -- cgit v1.2.3 From 9a67fcc8f3fd1e294922f28f20003c31d7f6cfeb Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:53 +0000 Subject: nfs: add client side only definitions for user xattrs Add client-side only definitions for user extended attributes (RFC8276). These are the access bits as used by the client code, and the CLNT procedure number definition. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 5 +++++ include/linux/nfs_fs.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e6ca9d1d2e76..db13026ac7d1 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -553,6 +553,11 @@ enum { NFSPROC4_CLNT_LAYOUTERROR, NFSPROC4_CLNT_COPY_NOTIFY, + + NFSPROC4_CLNT_GETXATTR, + NFSPROC4_CLNT_SETXATTR, + NFSPROC4_CLNT_LISTXATTRS, + NFSPROC4_CLNT_REMOVEXATTR, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6ee9119acc5d..b743988fcbd0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -212,6 +212,9 @@ struct nfs4_copy_state { #define NFS_ACCESS_EXTEND 0x0008 #define NFS_ACCESS_DELETE 0x0010 #define NFS_ACCESS_EXECUTE 0x0020 +#define NFS_ACCESS_XAREAD 0x0040 +#define NFS_ACCESS_XAWRITE 0x0080 +#define NFS_ACCESS_XALIST 0x0100 /* * Cache validity bit flags -- cgit v1.2.3 From 04a5da690e8f2da23c2ac940f2921e3aa622db82 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:54 +0000 Subject: NFSv4.2: define limits and sizes for user xattr handling Set limits for extended attributes (attribute value size and listxattr buffer size), based on the fs-independent limits (XATTR_*_MAX). Define the maximum XDR sizes for the RFC 8276 XATTR operations. In the case of operations that carry a larger payload (SETXATTR, GETXATTR, LISTXATTR), these exclude that payload, which is added as separate pages, like other operations do. Define, much like for read and write operations, the maximum overhead sizes for get/set/listxattr, and use them to limit the maximum payload size for those operations, in combination with the channel attributes. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 465fa98258a3..128e01acb4ca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -163,6 +163,11 @@ struct nfs_server { unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ unsigned int bsize; /* server block size */ +#ifdef CONFIG_NFS_V4_2 + unsigned int gxasize; /* getxattr size */ + unsigned int sxasize; /* setxattr size */ + unsigned int lxasize; /* listxattr size */ +#endif unsigned int acregmin; /* attr cache timeouts */ unsigned int acregmax; unsigned int acdirmin; -- cgit v1.2.3 From b78ef845c35dbae25e57b598901a65b13d940c81 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:55 +0000 Subject: NFSv4.2: query the server for extended attribute support Query the server for extended attribute support, and record it as the NFS_CAP_XATTR flag in the server capabilities. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 128e01acb4ca..7eae72a8762e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -286,5 +286,6 @@ struct nfs_server { #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) #define NFS_CAP_COPY_NOTIFY (1U << 27) +#define NFS_CAP_XATTR (1U << 28) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5fd0a9ef425f..dee9b1cfa972 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -150,6 +150,7 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + __u32 xattr_support; /* User xattrs supported */ }; struct nfs_fsstat { -- cgit v1.2.3 From 3e1f02123fba086d32dfd5729e6f4e2b54654acc Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:56 +0000 Subject: NFSv4.2: add client side XDR handling for extended attributes Define the argument and response structures that will be used for RFC 8276 extended attribute RPC calls, and implement the necessary functions to encode/decode the extended attribute operations. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dee9b1cfa972..9408f3252c8e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1498,7 +1498,64 @@ struct nfs42_seek_res { u32 sr_eof; u64 sr_offset; }; -#endif + +struct nfs42_setxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + u32 xattr_flags; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_setxattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +struct nfs42_getxattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; + size_t xattr_len; + struct page **xattr_pages; +}; + +struct nfs42_getxattrres { + struct nfs4_sequence_res seq_res; + size_t xattr_len; +}; + +struct nfs42_listxattrsargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + u32 count; + u64 cookie; + struct page **xattr_pages; +}; + +struct nfs42_listxattrsres { + struct nfs4_sequence_res seq_res; + struct page *scratch; + void *xattr_buf; + size_t xattr_len; + u64 cookie; + bool eof; + size_t copied; +}; + +struct nfs42_removexattrargs { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + const char *xattr_name; +}; + +struct nfs42_removexattrres { + struct nfs4_sequence_res seq_res; + struct nfs4_change_info cinfo; +}; + +#endif /* CONFIG_NFS_V4_2 */ struct nfs_page; -- cgit v1.2.3 From d2ae4f8b21c111bb795c557588d89dccd005828d Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:38:57 +0000 Subject: nfs: define nfs_access_get_cached function The only consumer of nfs_access_get_cached_rcu and nfs_access_cached calls these static functions in order to first try RCU access, and then locked access. Combine them in to a single function, and call that. Make this function available to the rest of the NFS code. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b743988fcbd0..714b577dce19 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -493,6 +493,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, + bool may_block); /* * linux/fs/nfs/symlink.c -- cgit v1.2.3 From 0f44da51aeef9c974ea744c0d9e24d54eec4e94c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:00 +0000 Subject: nfs: define and use the NFS_INO_INVALID_XATTR flag Define the NFS_INO_INVALID_XATTR flag, to be used for the NFSv4.2 xattr cache, and use it where appropriate. No functional change as yet. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 714b577dce19..943ee750d68c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -234,6 +234,7 @@ struct nfs4_copy_state { #define NFS_INO_DATA_INVAL_DEFER \ BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ +#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ -- cgit v1.2.3 From 95ad37f90c338e3fd4abf61cecfe02b6f3e080f0 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:04 +0000 Subject: NFSv4.2: add client side xattr caching. Implement client side caching for NFSv4.2 extended attributes. The cache is a per-inode hashtable, with name/value entries. There is one special entry for the listxattr cache. NFS inodes have a pointer to a cache structure. The cache structure is allocated on demand, freed when the cache is invalidated. Memory shrinkers keep the size in check. Large entries (> PAGE_SIZE) are collected by a separate shrinker, and freed more aggressively than others. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 943ee750d68c..a2c6455ea3fa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -102,6 +102,8 @@ struct nfs_delegation; struct posix_acl; +struct nfs4_xattr_cache; + /* * nfs fs inode data in memory */ @@ -188,6 +190,10 @@ struct nfs_inode { struct fscache_cookie *fscache; #endif struct inode vfs_inode; + +#ifdef CONFIG_NFS_V4_2 + struct nfs4_xattr_cache *xattr_cache; +#endif }; struct nfs4_copy_state { -- cgit v1.2.3 From a5e6f964bb2c613933de58a35ddfa306128ba004 Mon Sep 17 00:00:00 2001 From: Misono Tomohiro Date: Fri, 10 Jul 2020 17:00:03 +0900 Subject: rtc: cleanup obsolete comment about struct rtc_class_ops Commit ea369ea6d828 ("rtc: remove .open() and .release()") removes open/release callback from struct rtc_class_ops. Also commit 80d4bb515b78 ("RTC: Cleanup rtc_class_ops->irq_set_state") and commit 696160fec162 ("RTC: Cleanup rtc_class_ops->irq_set_freq()") removes irq callbacks. So, just remove related comments so that readers will not be confused. Signed-off-by: Misono Tomohiro Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200710080003.7986-1-misono.tomohiro@jp.fujitsu.com --- include/linux/rtc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index bba3db3f7efa..22d1575e4991 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -55,10 +55,6 @@ extern struct class *rtc_class; * * The (current) exceptions are mostly filesystem hooks: * - the proc() hook for procfs - * - non-ioctl() chardev hooks: open(), release() - * - * REVISIT those periodic irq calls *do* have ops_lock when they're - * issued through ioctl() ... */ struct rtc_class_ops { int (*ioctl)(struct device *, unsigned int, unsigned long); -- cgit v1.2.3 From ab91e7a6da7eeb8aa54843748652c186daee43eb Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 6 Jul 2020 17:52:24 +0800 Subject: freezer: Add unsafe versions of freezable_schedule_timeout_interruptible for NFS commit 0688e64bc600 ("NFS: Allow signal interruption of NFS4ERR_DELAYed operations") introduces nfs4_delay_interruptible which also needs an _unsafe version to avoid the following call trace for the same reason explained in commit 416ad3c9c006 ("freezer: add unsafe versions of freezable helpers for NFS") CPU: 4 PID: 3968 Comm: rm Tainted: G W 5.8.0-rc4 #1 Hardware name: Marvell OcteonTX CN96XX board (DT) Call trace: dump_backtrace+0x0/0x1dc show_stack+0x20/0x30 dump_stack+0xdc/0x150 debug_check_no_locks_held+0x98/0xa0 nfs4_delay_interruptible+0xd8/0x120 nfs4_handle_exception+0x130/0x170 nfs4_proc_rmdir+0x8c/0x220 nfs_rmdir+0xa4/0x360 vfs_rmdir.part.0+0x6c/0x1b0 do_rmdir+0x18c/0x210 __arm64_sys_unlinkat+0x64/0x7c el0_svc_common.constprop.0+0x7c/0x110 do_el0_svc+0x24/0xa0 el0_sync_handler+0x13c/0x1b8 el0_sync+0x158/0x180 Signed-off-by: He Zhe Signed-off-by: Trond Myklebust --- include/linux/freezer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 21f5aa0b217f..27828145ca09 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -207,6 +207,17 @@ static inline long freezable_schedule_timeout_interruptible(long timeout) return __retval; } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) +{ + long __retval; + + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count_unsafe(); + return __retval; +} + /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { @@ -285,6 +296,9 @@ static inline void set_freezable(void) {} #define freezable_schedule_timeout_interruptible(timeout) \ schedule_timeout_interruptible(timeout) +#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) -- cgit v1.2.3 From c6a8b84da4c28bda61b842a089651c3ec9d89a48 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:36:50 -0700 Subject: modules: linux/moduleparam.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Cc: Jessica Yu Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3ef917ff0964..1ad5aa3b86d9 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -108,7 +108,7 @@ struct kparam_array * ".") the kernel commandline parameter. Note that - is changed to _, so * the user can use "foo-bar=1" even for variable "foo_bar". * - * @perm is 0 if the the variable is not to appear in sysfs, or 0444 + * @perm is 0 if the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). -- cgit v1.2.3 From 75c88143f3b879664cc5bf68b91854c1a98f5e5b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:20 +0300 Subject: clk: at91: clk-master: add master clock support for SAMA7G5 Add master clock support (MCK1..4) for SAMA7G5. SAMA7G5's PMC has multiple master clocks feeding different subsystems. One of them feeds image subsystem and is changeable based on image subsystem needs. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-13-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 49a53a137610..77d6dabc4c3c 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -174,6 +174,7 @@ #define AT91_PMC_MOSCRCS (1 << 17) /* Main On-Chip RC [some SAM9] */ #define AT91_PMC_CFDEV (1 << 18) /* Clock Failure Detector Event [some SAM9] */ #define AT91_PMC_GCKRDY (1 << 24) /* Generated Clocks */ +#define AT91_PMC_MCKXRDY (1 << 26) /* Master Clock x [x=1..4] Ready Status */ #define AT91_PMC_IMR 0x6c /* Interrupt Mask Register */ #define AT91_PMC_FSMR 0x70 /* Fast Startup Mode Register */ -- cgit v1.2.3 From 0416824edca1cdcb6e00e6f909423bf0fc529eef Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:23 +0300 Subject: clk: at91: add macro for pll ids mask Add macro for PLL IDs mask. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-16-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 77d6dabc4c3c..dc5e85f124e0 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -59,6 +59,7 @@ #define AT91_PMC_PLL_UPDT 0x1C /* PMC PLL update register [for SAM9X60] */ #define AT91_PMC_PLL_UPDT_UPDATE (1 << 8) /* Update PLL settings */ #define AT91_PMC_PLL_UPDT_ID (1 << 0) /* PLL ID */ +#define AT91_PMC_PLL_UPDT_ID_MSK (0xf) /* PLL ID mask */ #define AT91_PMC_PLL_UPDT_STUPTIM (0xff << 16) /* Startup time */ #define AT91_CKGR_MOR 0x20 /* Main Oscillator Register [not on SAM9RL] */ -- cgit v1.2.3 From ef396df99251b848596c717b63ff4fe74a941193 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 22 Jul 2020 10:38:25 +0300 Subject: clk: at91: clk-utmi: add utmi support for sama7g5 Add UTMI support for SAMA7G5. SAMA7G5's UTMI control is done via XTALF register. Values written at bits 2..0 in this register correspond to the on board crystal oscillator frequency. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/1595403506-8209-18-git-send-email-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index dc5e85f124e0..a4f82e836a7c 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -137,6 +137,8 @@ #define AT91_PMC_PLLADIV2_ON (1 << 12) #define AT91_PMC_H32MXDIV BIT(24) +#define AT91_PMC_XTALF 0x34 /* Main XTAL Frequency Register [SAMA7G5 only] */ + #define AT91_PMC_USB 0x38 /* USB Clock Register [some SAM9 only] */ #define AT91_PMC_USBS (0x1 << 0) /* USB OHCI Input clock selection */ #define AT91_PMC_USBS_PLLA (0 << 0) -- cgit v1.2.3 From 0c2a34937f7e4c4776bb261114c475392da2355c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jun 2020 18:24:40 +0200 Subject: i2c: revert "i2c: core: Allow drivers to disable i2c-core irq mapping" This manually reverts commit d1d84bb95364ed604015c2b788caaf3dbca0262f. The only user has gone two years ago with commit 589edb56b424 ("ACPI / scan: Create platform device for INT33FE ACPI nodes") and no new user has showed up. Remove and hope we will never need it again. Signed-off-by: Wolfram Sang Acked-by: Hans de Goede Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b8b8963f8bb9..098405df431f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -231,7 +231,6 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) - * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -290,8 +289,6 @@ struct i2c_driver { int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; - - bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) -- cgit v1.2.3 From 8b7beaf9f185249f29912b5e2d7bc4147c5c2a6a Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 27 Jul 2020 13:43:39 -0600 Subject: PCI: Add Intel QuickAssist device IDs Add device IDs for the following Intel QuickAssist devices: DH895XCC, C3XXX and C62X. The defines in this patch are going to be referenced in two independent drivers, qat and vfio-pci. Signed-off-by: Giovanni Cabiddu Acked-by: Bjorn Helgaas Reviewed-by: Fiona Trahe Reviewed-by: Andy Shevchenko Signed-off-by: Alex Williamson --- include/linux/pci_ids.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0ad57693f392..f3166b1425ca 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2659,6 +2659,8 @@ #define PCI_DEVICE_ID_INTEL_80332_1 0x0332 #define PCI_DEVICE_ID_INTEL_80333_0 0x0370 #define PCI_DEVICE_ID_INTEL_80333_1 0x0372 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC 0x0435 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF 0x0443 #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 @@ -2708,6 +2710,8 @@ #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 @@ -2924,6 +2928,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 #define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 +#define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8 +#define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit v1.2.3 From f369bc3f9096f5d355e8b80540bc30ac9a602912 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 1 Aug 2020 08:17:13 +0200 Subject: vgaarb: mark vga_tryget static This symbols isn't used anywhere outside of vgaarb.c. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200801061713.307434-1-hch@lst.de --- include/linux/vgaarb.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 553b34c8b5f7..977caf96c8d2 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -109,12 +109,6 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, return vga_get(pdev, rsrc, 0); } -#if defined(CONFIG_VGA_ARB) -extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); -#else -static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } -#endif - #if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #else -- cgit v1.2.3 From 7ef5264de773279b9f23b6cc8afb5addb30e970b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:20 +0200 Subject: modules: mark ref_module static ref_module isn't used anywhere outside of module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 2e6670860d27..f1fdbeef2153 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -657,7 +657,6 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while (0) #endif /* CONFIG_MODULE_UNLOAD */ -int ref_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ -- cgit v1.2.3 From 773110470e2fa3839523384ae014f8a723c4d178 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:21 +0200 Subject: modules: mark find_symbol static find_symbol is only used in module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index f1fdbeef2153..90bdc362be36 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -590,17 +590,6 @@ struct symsearch { bool unused; }; -/* - * Search for an exported symbol by name. - * - * Must be called with module_mutex held or preemption disabled. - */ -const struct kernel_symbol *find_symbol(const char *name, - struct module **owner, - const s32 **crc, - bool gplok, - bool warn); - /* * Walk the exported symbol table * -- cgit v1.2.3 From a54e04914c211b5678602a46b3ede5d82ec1327d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:22 +0200 Subject: modules: mark each_symbol_section static each_symbol_section is only used inside of module.c. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 90bdc362be36..b79219eed83c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -590,15 +590,6 @@ struct symsearch { bool unused; }; -/* - * Walk the exported symbol table - * - * Must be called with module_mutex held or preemption disabled. - */ -bool each_symbol_section(bool (*fn)(const struct symsearch *arr, - struct module *owner, - void *data), void *data); - /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, -- cgit v1.2.3 From cd8732cdcc37d7077c4fa2c966b748c0662b607e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:25 +0200 Subject: modules: rename the licence field in struct symsearch to license Use the same spelling variant as the rest of the file. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index b79219eed83c..be04ba2f881d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,7 +586,7 @@ struct symsearch { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, - } licence; + } license; bool unused; }; -- cgit v1.2.3 From ef1dac6021cc8ec5de02ce31722bf26ac4ed5523 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 08:10:26 +0200 Subject: modules: return licensing information from find_symbol Report the GPLONLY status through a new argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index be04ba2f881d..30b0f5fcdb3c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -582,7 +582,7 @@ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; const s32 *crcs; - enum { + enum mod_license { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, -- cgit v1.2.3 From 042f649810f61c4a834f3d6d866c567f7f6b3f8c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 1 Jul 2020 11:54:43 -0400 Subject: libceph: just have osd_req_op_init() return a pointer The caller can just ignore the return. No need for this wrapper that just casts the other function to void. [ idryomov: argument alignment ] Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c60b59e9291b..83fa08a06507 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -404,7 +404,7 @@ void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); &__oreq->r_ops[__whch].typ.fld; \ }) -extern void osd_req_op_init(struct ceph_osd_request *osd_req, +struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, -- cgit v1.2.3 From 94f17c00d6687993101372f996cf6690ec9adf83 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 08:53:28 +0200 Subject: libceph: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. [ idryomov: Do the same for the CRUSH paper and replace ceph.newdream.net with ceph.io. ] Signed-off-by: Alexander A. Klimov Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 33c16f2de7f6..2f811baf78d2 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -17,7 +17,7 @@ * The algorithm was originally described in detail in this paper * (although the algorithm has evolved somewhat since then): * - * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf * * LGPL2 */ -- cgit v1.2.3 From 18f473b384a64cef69f166a3e2b73d3d2eca82c6 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 16 Jul 2020 10:05:57 -0400 Subject: ceph: periodically send perf metrics to MDSes This will send the caps/read/write/metadata metrics to any available MDS once per second, which will be the same as the userland client. It will skip the MDS sessions which don't support the metric collection, as the MDSs will close socket connections when they get an unknown type message. We can disable the metric sending via the disable_send_metrics module parameter. [ jlayton: fix up endianness bug in ceph_mdsc_send_metrics() ] URL: https://tracker.ceph.com/issues/43215 Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ebf5ba62b772..455e9b9e2adf 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -130,6 +130,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_REQUEST 24 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 #define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_METRICS 29 #define CEPH_MSG_CLIENT_CAPS 0x310 #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 -- cgit v1.2.3 From f1f565a26976612121f97464f9245307422d0ce8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:36:04 -0700 Subject: ceph: delete repeated words in fs/ceph/ Drop duplicated words "down" and "the" in fs/ceph/. [ idryomov: merge into a single patch ] Signed-off-by: Randy Dunlap Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 39e6f4c57580..fcd84e8d88f4 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -58,7 +58,7 @@ * because 10.2.z (jewel) did not care if its peers advertised this * feature bit. * - * - In the second phase we stop advertising the the bit and call it + * - In the second phase we stop advertising the bit and call it * RETIRED. This can normally be done in the *next* major release * following the one in which we marked the feature DEPRECATED. In * the above example, for 12.0.z (luminous) we can say: -- cgit v1.2.3 From 6cfde88418fe95240e32d2955b51988360ee0942 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 1 Aug 2020 10:53:42 +0200 Subject: clk: drop unused function __clk_get_flags The function __clk_get_flags has not been used since the April 2019 commit a348f05361c9 ("ARM: omap2+: hwmod: drop CLK_IS_BASIC flag usage"). Other uses were removed in June 2015, eg by commit 98d8a60eccee ("clk: Convert __clk_get_flags() to clk_hw_get_flags()"), which shows how clk_hw_get_flags can easily be used instead. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/1596272022-14173-1-git-send-email-Julia.Lawall@inria.fr Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bd1ee9039558..93a78a5256d1 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1096,7 +1096,6 @@ int clk_hw_get_parent_index(struct clk_hw *hw); int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *new_parent); unsigned int __clk_get_enable_count(struct clk *clk); unsigned long clk_hw_get_rate(const struct clk_hw *hw); -unsigned long __clk_get_flags(struct clk *clk); unsigned long clk_hw_get_flags(const struct clk_hw *hw); #define clk_hw_can_set_rate_parent(hw) \ (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) -- cgit v1.2.3 From d88ca7e1a27eb2df056bbf37ddef62e1c73d37ea Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 30 Jul 2020 19:47:14 +0900 Subject: fbmem: pull fbcon_update_vcs() out of fb_set_var() syzbot is reporting OOB read bug in vc_do_resize() [1] caused by memcpy() based on outdated old_{rows,row_size} values, for resize_screen() can recurse into vc_do_resize() which changes vc->vc_{cols,rows} that outdates old_{rows,row_size} values which were saved before calling resize_screen(). Daniel Vetter explained that resize_screen() should not recurse into fbcon_update_vcs() path due to FBINFO_MISC_USEREVENT being still set when calling resize_screen(). Instead of masking FBINFO_MISC_USEREVENT before calling fbcon_update_vcs(), we can remove FBINFO_MISC_USEREVENT by calling fbcon_update_vcs() only if fb_set_var() returned 0. This change assumes that it is harmless to call fbcon_update_vcs() when fb_set_var() returned 0 without reaching fb_notifier_call_chain(). [1] https://syzkaller.appspot.com/bug?id=c70c88cfd16dcf6e1d3c7f0ab8648b3144b5b25e Reported-and-tested-by: syzbot Suggested-by: Daniel Vetter Signed-off-by: Tetsuo Handa Reported-by: kernel test robot for missing #include Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/075b7e37-3278-cd7d-31ab-c5073cfa8e92@i-love.sakura.ne.jp --- include/linux/fb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 2b530e6d86e4..850f79e9a7cb 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -400,8 +400,6 @@ struct fb_tile_ops { #define FBINFO_HWACCEL_YPAN 0x2000 /* optional */ #define FBINFO_HWACCEL_YWRAP 0x4000 /* optional */ -#define FBINFO_MISC_USEREVENT 0x10000 /* event request - from userspace */ #define FBINFO_MISC_TILEBLITTING 0x20000 /* use tile blitting */ /* A driver may set this flag to indicate that it does want a set_par to be -- cgit v1.2.3 From a0102bda5bc0991c5c8c7c07770b236894a810fd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Jul 2020 11:03:55 -0400 Subject: ceph: move sb->wb_pagevec_pool to be a global mempool When doing some testing recently, I hit some page allocation failures on mount, when creating the wb_pagevec_pool for the mount. That requires 128k (32 contiguous pages), and after thrashing the memory during an xfstests run, sometimes that would fail. 128k for each mount seems like a lot to hold in reserve for a rainy day, so let's change this to a global mempool that gets allocated when the module is plugged in. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e5ed1c541e7f..c8645f0b797d 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; extern struct kmem_cache *ceph_dir_file_cachep; extern struct kmem_cache *ceph_mds_request_cachep; +extern mempool_t *ceph_wb_pagevec_pool; /* ceph_common.c */ extern bool libceph_compatible(void *data); -- cgit v1.2.3 From 7de62bc09fe6d100ebd6c931c3f9a6fa7e6ed10f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 15 Jul 2020 13:17:52 -0400 Subject: SUNRPC dont update timeout value on connection reset Current behaviour: every time a v3 operation is re-sent to the server we update (double) the timeout. There is no distinction between whether or not the previous timer had expired before the re-sent happened. Here's the scenario: 1. Client sends a v3 operation 2. Server RST-s the connection (prior to the timeout) (eg., connection is immediately reset) 3. Client re-sends a v3 operation but the timeout is now 120sec. As a result, an application sees 2mins pause before a retry in case server again does not reply. Instead, this patch proposes to keep track off when the minor timeout should happen and if it didn't, then don't update the new timeout. Value is updated based on the previous value to make timeouts predictable. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e64bd8222f55..a603d48d2b2c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -101,6 +101,7 @@ struct rpc_rqst { * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ + unsigned long rq_minortimeo; /* minor timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ ktime_t rq_rtt; /* round-trip time */ unsigned int rq_retries; /* # of retries */ -- cgit v1.2.3 From 262e6ae7081df304fc625cf368d5c2cbba2bb991 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 23:33:33 +0200 Subject: modules: inherit TAINT_PROPRIETARY_MODULE If a TAINT_PROPRIETARY_MODULE exports symbol, inherit the taint flag for all modules importing these symbols, and don't allow loading symbols from TAINT_PROPRIETARY_MODULE modules if the module previously imported gplonly symbols. Add a anti-circumvention devices so people don't accidentally get themselves into trouble this way. Comment from Greg: "Ah, the proven-to-be-illegal "GPL Condom" defense :)" [jeyu: pr_info -> pr_err and pr_warn as per discussion] Link: http://lore.kernel.org/r/20200730162957.GA22469@lst.de Acked-by: Daniel Vetter Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- include/linux/module.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 30b0f5fcdb3c..e30ed5fa33a7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,6 +389,7 @@ struct module { unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const s32 *gpl_crcs; + bool using_gplonly_symbols; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ -- cgit v1.2.3 From 75820314de26b00aaea0d0e79269c0d17914c5c4 Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Tue, 4 Aug 2020 12:59:24 +0300 Subject: i2c: core: add generic I2C GPIO recovery Multiple I2C bus drivers use similar bindings to obtain information needed for I2C recovery. For example, for platforms using device-tree, the properties look something like this: &i2c { ... pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c_default>; pinctrl-1 = <&pinctrl_i2c_gpio>; sda-gpios = <&pio 0 GPIO_ACTIVE_HIGH>; scl-gpios = <&pio 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; ... } For this reason, we can add this common initialization in the core. This way, other I2C bus drivers will be able to support GPIO recovery just by providing a pointer to platform's pinctrl and calling i2c_recover_bus() when SDA is stuck low. Signed-off-by: Codrin Ciubotariu [wsa: inverted one logic for better readability, minor update to kdoc] Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8ea9c3f86dba..d387d3786429 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -606,6 +606,14 @@ struct i2c_timings { * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. + * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. + * Optional. + * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned + * to the I2C bus. Optional. Populated internally for GPIO recovery, if + * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. + * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as + * GPIOs. Optional. Populated internally for GPIO recovery, if this state + * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); @@ -622,6 +630,9 @@ struct i2c_bus_recovery_info { /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); -- cgit v1.2.3 From 923a3a863ae0c26876d704fb3453069e11ebdcb6 Mon Sep 17 00:00:00 2001 From: Michael Shych Date: Mon, 4 May 2020 17:14:24 +0300 Subject: platform_data/mlxreg: support new watchdog type with longer timeout period Add new watchdog type 3 with longer timeout period. Extend size of health_cntr field that that can be used to init watchdog timeout period. Signed-off-by: Michael Shych Reviewed-by: Vadim Pasternak Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200504141427.17685-2-michaelsh@mellanox.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/platform_data/mlxreg.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index b8da8aef2446..2c5e58d1d77b 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -43,10 +43,13 @@ * * TYPE1 HW watchdog implementation exist in old systems. * All new systems have TYPE2 HW watchdog. + * TYPE3 HW watchdog can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, MLX_WDT_TYPE2, + MLX_WDT_TYPE3, }; /** @@ -90,7 +93,7 @@ struct mlxreg_core_data { umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; - u8 health_cntr; + u32 health_cntr; bool attached; }; -- cgit v1.2.3 From cef9572e9af373cefd1adc9e771e89670da5da3c Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 17 Jul 2020 16:29:56 +0300 Subject: watchdog: add support for adjusting last known HW keepalive time Certain watchdogs require the watchdog only to be pinged within a specific time window, pinging too early or too late cause the watchdog to fire. In cases where this sort of watchdog has been started before kernel comes up, we must adjust the watchdog keepalive window to match the actually running timer, so add a new driver API for this purpose. Signed-off-by: Tero Kristo Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200717132958.14304-3-t-kristo@ti.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 1464ce6ffa31..9b19e6bb68b5 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -210,6 +210,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd, extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); +int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int); + /* devres register variant */ int devm_watchdog_register_device(struct device *dev, struct watchdog_device *); -- cgit v1.2.3 From 1fb497dd003009be95ce67689ac800c446b7acc5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 Jul 2020 12:14:06 +0200 Subject: posix-cpu-timers: Provide mechanisms to defer timer handling to task_work Running posix CPU timers in hard interrupt context has a few downsides: - For PREEMPT_RT it cannot work as the expiry code needs to take sighand lock, which is a 'sleeping spinlock' in RT. The original RT approach of offloading the posix CPU timer handling into a high priority thread was clumsy and provided no real benefit in general. - For fine grained accounting it's just wrong to run this in context of the timer interrupt because that way a process specific CPU time is accounted to the timer interrupt. - Long running timer interrupts caused by a large amount of expiring timers which can be created and armed by unpriviledged user space. There is no hard requirement to expire them in interrupt context. If the signal is targeted at the task itself then it won't be delivered before the task returns to user space anyway. If the signal is targeted at a supervisor process then it might be slightly delayed, but posix CPU timers are inaccurate anyway due to the fact that they are tied to the tick. Provide infrastructure to schedule task work which allows splitting the posix CPU timer code into a quick check in interrupt context and a thread context expiry and signal delivery function. This has to be enabled by architectures as it requires that the architecture specific KVM implementation handles pending task work before exiting to guest mode. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20200730102337.783470146@linutronix.de --- include/linux/posix-timers.h | 17 +++++++++++++++++ include/linux/sched.h | 4 ++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e3f0f8585da4..896c16d2c5fb 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -6,6 +6,7 @@ #include #include #include +#include struct kernel_siginfo; struct task_struct; @@ -125,6 +126,16 @@ struct posix_cputimers { unsigned int expiry_active; }; +/** + * posix_cputimers_work - Container for task work based posix CPU timer expiry + * @work: The task work to be scheduled + * @scheduled: @work has been scheduled already, no further processing + */ +struct posix_cputimers_work { + struct callback_head work; + unsigned int scheduled; +}; + static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); @@ -165,6 +176,12 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } #endif +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +void posix_cputimers_init_work(void); +#else +static inline void posix_cputimers_init_work(void) { } +#endif + #define REQUEUE_PENDING 1 /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 06ec60462af0..e9942ce07ef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -889,6 +889,10 @@ struct task_struct { /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK + struct posix_cputimers_work posix_cputimers_work; +#endif + /* Process credentials: */ /* Tracer's credentials at attach: */ -- cgit v1.2.3 From 09fc67b500c7f0bb1b5ed774197ac7f2c5285655 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 17 Jul 2020 17:42:55 +0900 Subject: kprobes: Remove show_registers() function prototype Remove show_registers() function prototype because this function has been renamed by commit: 57da8b960b9a ("x86: Avoid double stack traces with show_regs()") and this commit has removed the caller in kprobes altogether: 80006dbee674 ("kprobes/x86: Remove jprobe implementation") So this doesn't exist anymore - remove the orphan prototype. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 45b8cdc9fad7..9be1bff4f586 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -227,7 +227,6 @@ extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); -extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -- cgit v1.2.3 From b55b3fdce3e554a6bbe8f8ca6a01a892d720e64e Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Fri, 17 Jul 2020 13:01:00 +0530 Subject: hw_breakpoint: Remove unused __register_perf_hw_breakpoint() declaration Commit: b326e9560a28 ("hw-breakpoints: Use overflow handler instead of the event callback") removed __register_perf_hw_breakpoint() function usage and replaced it with register_perf_hw_breakpoint() function. Remove the left-over unused __register_perf_hw_breakpoint() declaration from as well. Signed-off-by: Bhupesh Sharma Signed-off-by: Ingo Molnar Acked-by: Mark Rutland Link: https://lore.kernel.org/r/1594971060-14180-1-git-send-email-bhsharma@redhat.com --- include/linux/hw_breakpoint.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index d7d4250cd1e4..78dd7035d1e5 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -72,7 +72,6 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, void *context); extern int register_perf_hw_breakpoint(struct perf_event *bp); -extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); @@ -119,8 +118,6 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, void *context) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } -static inline int -__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } -- cgit v1.2.3 From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/linux/bpf.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; -- cgit v1.2.3 From d58669b093997e4e5f98c38a54f99761657c19d2 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:01 +0530 Subject: ACPI: APD: Change name from ST to FCH AMD SoC general pupose clk is present in new platforms with same MMIO mappings. We can reuse the same clk handler support for other platforms. Hence, changing name from ST(SoC) to FCH(IP) Signed-off-by: Akshu Agrawal Acked-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/platform_data/clk-fch.h | 17 +++++++++++++++++ include/linux/platform_data/clk-st.h | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) create mode 100644 include/linux/platform_data/clk-fch.h delete mode 100644 include/linux/platform_data/clk-st.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h new file mode 100644 index 000000000000..850ca776156d --- /dev/null +++ b/include/linux/platform_data/clk-fch.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * clock framework for AMD misc clocks + * + * Copyright 2018 Advanced Micro Devices, Inc. + */ + +#ifndef __CLK_FCH_H +#define __CLK_FCH_H + +#include + +struct fch_clk_data { + void __iomem *base; +}; + +#endif /* __CLK_FCH_H */ diff --git a/include/linux/platform_data/clk-st.h b/include/linux/platform_data/clk-st.h deleted file mode 100644 index 7cdb6a402b35..000000000000 --- a/include/linux/platform_data/clk-st.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * clock framework for AMD Stoney based clock - * - * Copyright 2018 Advanced Micro Devices, Inc. - */ - -#ifndef __CLK_ST_H -#define __CLK_ST_H - -#include - -struct st_clk_data { - void __iomem *base; -}; - -#endif /* __CLK_ST_H */ -- cgit v1.2.3 From 7f8802f2d2ed67ffbac9264f946a52507f749e19 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 31 Jul 2020 19:06:03 +0530 Subject: ACPI: APD: Add a fmw property is_raven Since there is slight difference in AMD RV based soc in misc clk architecture. The fmw property will help in differentiating the SoCs. Signed-off-by: Akshu Agrawal Signed-off-by: Rafael J. Wysocki --- include/linux/platform_data/clk-fch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-fch.h b/include/linux/platform_data/clk-fch.h index 850ca776156d..b9f682459f08 100644 --- a/include/linux/platform_data/clk-fch.h +++ b/include/linux/platform_data/clk-fch.h @@ -12,6 +12,7 @@ struct fch_clk_data { void __iomem *base; + u32 is_rv; }; #endif /* __CLK_FCH_H */ -- cgit v1.2.3 From 519a8a6cf91dda095be2d36216fc4ebc525270a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 18:42:14 +0200 Subject: net: Revert "net: optimize the sockptr_t for unified kernel/user address spaces" This reverts commits 6d04fe15f78acdf8e32329e208552e226f7a8ae6 and a31edb2059ed4e498f9aa8230c734b59d0ad797a. It turns out the idea to share a single pointer for both kernel and user space address causes various kinds of problems. So use the slightly less optimal version that uses an extra bit, but which is guaranteed to be safe everywhere. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: Eric Dumazet Reported-by: John Stultz Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include #include #include -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) -- cgit v1.2.3 From 444da3f52407d74c9aa12187ac6b01f76ee47d62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 11:21:11 -0700 Subject: bitfield.h: don't compile-time validate _val in FIELD_FIT When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the compiler to deduce a case where _val can only have the value of -1 at compile time. Specifically, /* struct bpf_insn: _s32 imm */ u64 imm = insn->imm; /* sign extend */ if (imm >> 32) { /* non-zero only if insn->imm is negative */ /* inlined from ur_load_imm_any */ u32 __imm = imm >> 32; /* therefore, always 0xffffffff */ if (__builtin_constant_p(__imm) && __imm > 255) compiletime_assert_XXX() This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that checks that a given value is representable in one byte (interpreted as unsigned). FIELD_FIT() should return true or false at runtime for whether a value can fit for not. Don't break the build over a value that's too large for the mask. We'd prefer to keep the inlining and compiler optimizations though we know this case will always return false. Cc: stable@vger.kernel.org Fixes: 1697599ee301a ("bitfield.h: add FIELD_FIT() helper") Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjXpEUL7d=V0CXiAXcNw@mail.gmail.com/ Reported-by: Masahiro Yamada Debugged-by: Sami Tolvanen Signed-off-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) -- cgit v1.2.3 From efa8480a831673bb52400df9dbe5da0aacda97bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Aug 2020 18:44:24 -0600 Subject: fs: RWF_NOWAIT should imply IOCB_NOIO With the change allowing read-ahead for IOCB_NOWAIT, we changed the RWF_NOWAIT semantics of only doing cached reads. Since we know have IOCB_NOIO to manage that specific side of it, just make RWF_NOWAIT imply IOCB_NOIO as well to restore the previous behavior. Fixes: 2e85abf053b9 ("mm: allow read-ahead with IOCB_NOWAIT set") Reported-by: Dave Chinner Reviewed-by: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bd7ec3eaeed0..f1cca4bfdd7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3293,7 +3293,7 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; } if (flags & RWF_HIPRI) kiocb_flags |= IOCB_HIPRI; -- cgit v1.2.3 From f6ebbcf08f37b01827c51309a188e85165e498e7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Aug 2020 14:03:55 +0200 Subject: cpufreq: intel_pstate: Implement passive mode with HWP enabled Allow intel_pstate to work in the passive mode with HWP enabled and make it set the HWP minimum performance limit (HWP floor) to the P-state value given by the target frequency supplied by the cpufreq governor, so as to prevent the HWP algorithm and the CPU scheduler from working against each other, at least when the schedutil governor is in use, and update the intel_pstate documentation accordingly. Among other things, this allows utilization clamps to be taken into account, at least to a certain extent, when intel_pstate is in use and makes it more likely that sufficient capacity for deadline tasks will be provided. After this change, the resulting behavior of an HWP system with intel_pstate in the passive mode should be close to the behavior of the analogous non-HWP system with intel_pstate in the passive mode, except that the HWP algorithm is generally allowed to make the CPU run at a frequency above the floor P-state set by intel_pstate in the entire available range of P-states, while without HWP a CPU can run in a P-state above the requested one if the latter falls into the range of turbo P-states (referred to as the turbo range) or if the P-states of all CPUs in one package are coordinated with each other at the hardware level. [Note that in principle the HWP floor may not be taken into account by the processor if it falls into the turbo range, in which case the processor has a license to choose any P-state, either below or above the HWP floor, just like a non-HWP processor in the case when the target P-state falls into the turbo range.] With this change applied, intel_pstate in the passive mode assumes complete control over the HWP request MSR and concurrent changes of that MSR (eg. via the direct MSR access interface) are overridden by it. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada Reviewed-by: Francisco Jerez --- include/linux/cpufreq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 58687a5bf9c8..8f141d4c859c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -576,6 +576,8 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); +int cpufreq_start_governor(struct cpufreq_policy *policy); +void cpufreq_stop_governor(struct cpufreq_policy *policy); #define cpufreq_governor_init(__governor) \ static int __init __governor##_init(void) \ -- cgit v1.2.3 From 772616b031f06e05846488b01dab46a7c832da13 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 11 Aug 2020 18:30:21 -0700 Subject: mm: memcg/percpu: per-memcg percpu memory statistics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Percpu memory can represent a noticeable chunk of the total memory consumption, especially on big machines with many CPUs. Let's track percpu memory usage for each memcg and display it in memory.stat. A percpu allocation is usually scattered over multiple pages (and nodes), and can be significantly smaller than a page. So let's add a byte-sized counter on the memcg level: MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs can be perfectly reused for percpu case. [guro@fb.com: v3] Link: http://lkml.kernel.org/r/20200623184515.4132564-4-guro@fb.com Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Dennis Zhou Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Tejun Heo Cc: Tobin C. Harding Cc: Vlastimil Babka Cc: Waiman Long Cc: Bixuan Cui Cc: Michal Koutný Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200608230819.832349-4-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1bb49b600310..2c2d301eac33 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,7 @@ struct kmem_cache; enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, + MEMCG_PERCPU_B, MEMCG_NR_STAT, }; @@ -339,6 +340,13 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +static __always_inline bool memcg_stat_item_in_bytes(int idx) +{ + if (idx == MEMCG_PERCPU_B) + return true; + return vmstat_item_in_bytes(idx); +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); -- cgit v1.2.3 From 8ca39e6874f812a393bb66d9fdbb7598d5f0451c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 11 Aug 2020 18:30:32 -0700 Subject: mm/hugetlb: add mempolicy check in the reservation routine In the reservation routine, we only check whether the cpuset meets the memory allocation requirements. But we ignore the mempolicy of MPOL_BIND case. If someone mmap hugetlb succeeds, but the subsequent memory allocation may fail due to mempolicy restrictions and receives the SIGBUS signal. This can be reproduced by the follow steps. 1) Compile the test case. cd tools/testing/selftests/vm/ gcc map_hugetlb.c -o map_hugetlb 2) Pre-allocate huge pages. Suppose there are 2 numa nodes in the system. Each node will pre-allocate one huge page. echo 2 > /proc/sys/vm/nr_hugepages 3) Run test case(mmap 4MB). We receive the SIGBUS signal. numactl --membind=3D0 ./map_hugetlb 4 With this patch applied, the mmap will fail in the step 3) and throw "mmap: Cannot allocate memory". [akpm@linux-foundation.org: include sched.h for `current'] Reported-by: Jianchao Guo Suggested-by: Michal Hocko Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: David Rientjes Cc: Mel Gorman Cc: Michel Lespinasse Cc: Baoquan He Link: http://lkml.kernel.org/r/20200728034938.14993-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ea9c15b60a96..5f1648c23e29 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -6,7 +6,7 @@ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 - +#include #include #include #include @@ -152,6 +152,15 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); +extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + struct mempolicy *mpol = get_task_policy(current); + + return policy_nodemask(gfp, mpol); +} + extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -281,5 +290,10 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, static inline void mpol_put_task_policy(struct task_struct *task) { } + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + return NULL; +} #endif /* CONFIG_NUMA */ #endif -- cgit v1.2.3 From b518154e59aab3ad0780a169c5cc84bd4ee4357e Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:40 -0700 Subject: mm/vmscan: protect the workingset on anonymous LRU In current implementation, newly created or swap-in anonymous page is started on active list. Growing active list results in rebalancing active/inactive list so old pages on active list are demoted to inactive list. Hence, the page on active list isn't protected at all. Following is an example of this situation. Assume that 50 hot pages on active list. Numbers denote the number of pages on active/inactive list (active | inactive). 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(uo) | 50(h) 3. workload: another 50 newly created (used-once) pages 50(uo) | 50(uo), swap-out 50(h) This patch tries to fix this issue. Like as file LRU, newly created or swap-in anonymous pages will be inserted to the inactive list. They are promoted to active list if enough reference happens. This simple modification changes the above example as following. 1. 50 hot pages on active list 50(h) | 0 2. workload: 50 newly created (used-once) pages 50(h) | 50(uo) 3. workload: another 50 newly created (used-once) pages 50(h) | 50(uo), swap-out 50(uo) As you can see, hot pages on active list would be protected. Note that, this implementation has a drawback that the page cannot be promoted and will be swapped-out if re-access interval is greater than the size of inactive list but less than the size of total(active+inactive). To solve this potential issue, following patch will apply workingset detection similar to the one that's already applied to file LRU. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7eb59bc552a5..51ec9cdb92c0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -352,7 +352,7 @@ extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void lru_cache_add_active_or_unevictable(struct page *page, +extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ -- cgit v1.2.3 From 170b04b7ae49634df103810dad67b22cf8a99aa6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:43 -0700 Subject: mm/workingset: prepare the workingset detection infrastructure for anon LRU To prepare the workingset detection for anon LRU, this patch splits workingset event counters for refault, activate and restore into anon and file variants, as well as the refaults counter in struct lruvec. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-4-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 635a96cd9b1f..efbd95dd3bbf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -173,9 +173,15 @@ enum node_stat_item { NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, - WORKINGSET_REFAULT, - WORKINGSET_ACTIVATE, - WORKINGSET_RESTORE, + WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_FILE, + WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_FILE, + WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. @@ -277,8 +283,8 @@ struct lruvec { unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; - /* Refaults at the time of last reclaim cycle */ - unsigned long refaults; + /* Refaults at the time of last reclaim cycle, anon=0, file=1 */ + unsigned long refaults[2]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_MEMCG -- cgit v1.2.3 From 3852f6768ede542ed48b9077bedf482c7ecb6327 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:47 -0700 Subject: mm/swapcache: support to handle the shadow entries Workingset detection for anonymous page will be implemented in the following patch and it requires to store the shadow entries into the swapcache. This patch implements an infrastructure to store the shadow entry in the swapcache. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vlastimil Babka Link: http://lkml.kernel.org/r/1595490560-15117-5-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 51ec9cdb92c0..8a4c592698a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -414,9 +414,13 @@ extern struct address_space *swapper_spaces[]; extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); -extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); -extern void __delete_from_swap_cache(struct page *, swp_entry_t entry); +extern int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp); +extern void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow); extern void delete_from_swap_cache(struct page *); +extern void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -570,13 +574,13 @@ static inline int add_to_swap(struct page *page) } static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp_mask) + gfp_t gfp_mask, void **shadowp) { return -1; } static inline void __delete_from_swap_cache(struct page *page, - swp_entry_t entry) + swp_entry_t entry, void *shadow) { } @@ -584,6 +588,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ +} + static inline int page_swapcount(struct page *page) { return 0; -- cgit v1.2.3 From aae466b0052e1888edd1d7f473d4310d64936196 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:30:50 -0700 Subject: mm/swap: implement workingset detection for anonymous LRU This patch implements workingset detection for anonymous LRU. All the infrastructure is implemented by the previous patches so this patch just activates the workingset detection by installing/retrieving the shadow entry and adding refault calculation. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Acked-by: Vlastimil Babka Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Link: http://lkml.kernel.org/r/1595490560-15117-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 8a4c592698a9..661046994db4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -414,6 +414,7 @@ extern struct address_space *swapper_spaces[]; extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); +extern void *get_shadow_from_swap_cache(swp_entry_t entry); extern int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp, void **shadowp); extern void __delete_from_swap_cache(struct page *page, @@ -573,6 +574,11 @@ static inline int add_to_swap(struct page *page) return 0; } +static inline void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + return NULL; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask, void **shadowp) { -- cgit v1.2.3 From facdaa917c4d5a376d09d25865f5a863f906234a Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 11 Aug 2020 18:31:00 -0700 Subject: mm: proactive compaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some applications, we need to allocate almost all memory as hugepages. However, on a running system, higher-order allocations can fail if the memory is fragmented. Linux kernel currently does on-demand compaction as we request more hugepages, but this style of compaction incurs very high latency. Experiments with one-time full memory compaction (followed by hugepage allocations) show that kernel is able to restore a highly fragmented memory state to a fairly compacted memory state within <1 sec for a 32G system. Such data suggests that a more proactive compaction can help us allocate a large fraction of memory as hugepages keeping allocation latencies low. For a more proactive compaction, the approach taken here is to define a new sysctl called 'vm.compaction_proactiveness' which dictates bounds for external fragmentation which kcompactd tries to maintain. The tunable takes a value in range [0, 100], with a default of 20. Note that a previous version of this patch [1] was found to introduce too many tunables (per-order extfrag{low, high}), but this one reduces them to just one sysctl. Also, the new tunable is an opaque value instead of asking for specific bounds of "external fragmentation", which would have been difficult to estimate. The internal interpretation of this opaque value allows for future fine-tuning. Currently, we use a simple translation from this tunable to [low, high] "fragmentation score" thresholds (low=100-proactiveness, high=low+10%). The score for a node is defined as weighted mean of per-zone external fragmentation. A zone's present_pages determines its weight. To periodically check per-node score, we reuse per-node kcompactd threads, which are woken up every 500 milliseconds to check the same. If a node's score exceeds its high threshold (as derived from user-provided proactiveness value), proactive compaction is started until its score reaches its low threshold value. By default, proactiveness is set to 20, which implies threshold values of low=80 and high=90. This patch is largely based on ideas from Michal Hocko [2]. See also the LWN article [3]. Performance data ================ System: x64_64, 1T RAM, 80 CPU threads. Kernel: 5.6.0-rc3 + this patch echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/defrag Before starting the driver, the system was fragmented from a userspace program that allocates all memory and then for each 2M aligned section, frees 3/4 of base pages using munmap. The workload is mainly anonymous userspace pages, which are easy to move around. I intentionally avoided unmovable pages in this test to see how much latency we incur when hugepage allocations hit direct compaction. 1. Kernel hugepage allocation latencies With the system in such a fragmented state, a kernel driver then allocates as many hugepages as possible and measures allocation latency: (all latency values are in microseconds) - With vanilla 5.6.0-rc3 percentile latency –––––––––– ––––––– 5 7894 10 9496 25 12561 30 15295 40 18244 50 21229 60 27556 75 30147 80 31047 90 32859 95 33799 Total 2M hugepages allocated = 383859 (749G worth of hugepages out of 762G total free => 98% of free memory could be allocated as hugepages) - With 5.6.0-rc3 + this patch, with proactiveness=20 sysctl -w vm.compaction_proactiveness=20 percentile latency –––––––––– ––––––– 5 2 10 2 25 3 30 3 40 3 50 4 60 4 75 4 80 4 90 5 95 429 Total 2M hugepages allocated = 384105 (750G worth of hugepages out of 762G total free => 98% of free memory could be allocated as hugepages) 2. JAVA heap allocation In this test, we first fragment memory using the same method as for (1). Then, we start a Java process with a heap size set to 700G and request the heap to be allocated with THP hugepages. We also set THP to madvise to allow hugepage backing of this heap. /usr/bin/time java -Xms700G -Xmx700G -XX:+UseTransparentHugePages -XX:+AlwaysPreTouch The above command allocates 700G of Java heap using hugepages. - With vanilla 5.6.0-rc3 17.39user 1666.48system 27:37.89elapsed - With 5.6.0-rc3 + this patch, with proactiveness=20 8.35user 194.58system 3:19.62elapsed Elapsed time remains around 3:15, as proactiveness is further increased. Note that proactive compaction happens throughout the runtime of these workloads. The situation of one-time compaction, sufficient to supply hugepages for following allocation stream, can probably happen for more extreme proactiveness values, like 80 or 90. In the above Java workload, proactiveness is set to 20. The test starts with a node's score of 80 or higher, depending on the delay between the fragmentation step and starting the benchmark, which gives more-or-less time for the initial round of compaction. As t he benchmark consumes hugepages, node's score quickly rises above the high threshold (90) and proactive compaction starts again, which brings down the score to the low threshold level (80). Repeat. bpftrace also confirms proactive compaction running 20+ times during the runtime of this Java benchmark. kcompactd threads consume 100% of one of the CPUs while it tries to bring a node's score within thresholds. Backoff behavior ================ Above workloads produce a memory state which is easy to compact. However, if memory is filled with unmovable pages, proactive compaction should essentially back off. To test this aspect: - Created a kernel driver that allocates almost all memory as hugepages followed by freeing first 3/4 of each hugepage. - Set proactiveness=40 - Note that proactive_compact_node() is deferred maximum number of times with HPAGE_FRAG_CHECK_INTERVAL_MSEC of wait between each check (=> ~30 seconds between retries). [1] https://patchwork.kernel.org/patch/11098289/ [2] https://lore.kernel.org/linux-mm/20161230131412.GI13301@dhcp22.suse.cz/ [3] https://lwn.net/Articles/817905/ Signed-off-by: Nitin Gupta Signed-off-by: Andrew Morton Tested-by: Oleksandr Natalenko Reviewed-by: Vlastimil Babka Reviewed-by: Khalid Aziz Reviewed-by: Oleksandr Natalenko Cc: Vlastimil Babka Cc: Khalid Aziz Cc: Michal Hocko Cc: Mel Gorman Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Joonsoo Kim Cc: David Rientjes Cc: Nitin Gupta Cc: Oleksandr Natalenko Link: http://lkml.kernel.org/r/20200616204527.19185-1-nigupta@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6fa0eea3f530..7a242d46454e 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,11 +85,13 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; +extern int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; +extern int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, -- cgit v1.2.3 From d34c0a7599ea8c301bc471dfa1eb2bf2db6752d1 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Tue, 11 Aug 2020 18:31:07 -0700 Subject: mm: use unsigned types for fragmentation score Proactive compaction uses per-node/zone "fragmentation score" which is always in range [0, 100], so use unsigned type of these scores as well as for related constants. Signed-off-by: Nitin Gupta Signed-off-by: Andrew Morton Reviewed-by: Baoquan He Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Vlastimil Babka Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200618010319.13159-1-nigupta@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 7a242d46454e..25a521d299c1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,13 +85,13 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; -extern int sysctl_compaction_proactiveness; +extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; -extern int extfrag_for_order(struct zone *zone, unsigned int order); +extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, -- cgit v1.2.3 From 860b32729a21e0565585a9e2ecea2e5244d65acd Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 11 Aug 2020 18:31:10 -0700 Subject: mm/compaction: correct the comments of compact_defer_shift There is no compact_defer_limit. It should be compact_defer_shift in use. and add compact_order_failed explanation. Signed-off-by: Alex Shi Signed-off-by: Andrew Morton Reviewed-by: Alexander Duyck Link: http://lkml.kernel.org/r/3bd60e1b-a74e-050d-ade4-6e8f54e00b92@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index efbd95dd3bbf..8379432f4f2f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -536,6 +536,7 @@ struct zone { * On compaction failure, 1< Date: Tue, 11 Aug 2020 18:31:19 -0700 Subject: include/linux/mempolicy.h: fix typo Change "interlave" to "interleave". Signed-off-by: Yanfei Xu Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200810063454.9357-1-yanfei.xu@windriver.com Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f1648c23e29..5f1c74df264d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -28,7 +28,7 @@ struct mm_struct; * the process policy is used. Interrupts ignore the memory policy * of the current process. * - * Locking policy for interlave: + * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. -- cgit v1.2.3 From 9066e5cfb73cdbcdbb49e87999482ab615e9fc76 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 11 Aug 2020 18:31:22 -0700 Subject: mm, oom: make the calculation of oom badness more accurate Recently we found an issue on our production environment that when memcg oom is triggered the oom killer doesn't chose the process with largest resident memory but chose the first scanned process. Note that all processes in this memcg have the same oom_score_adj, so the oom killer should chose the process with largest resident memory. Bellow is part of the oom info, which is enough to analyze this issue. [7516987.983223] memory: usage 16777216kB, limit 16777216kB, failcnt 52843037 [7516987.983224] memory+swap: usage 16777216kB, limit 9007199254740988kB, failcnt 0 [7516987.983225] kmem: usage 301464kB, limit 9007199254740988kB, failcnt 0 [...] [7516987.983293] [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name [7516987.983510] [ 5740] 0 5740 257 1 32768 0 -998 pause [7516987.983574] [58804] 0 58804 4594 771 81920 0 -998 entry_point.bas [7516987.983577] [58908] 0 58908 7089 689 98304 0 -998 cron [7516987.983580] [58910] 0 58910 16235 5576 163840 0 -998 supervisord [7516987.983590] [59620] 0 59620 18074 1395 188416 0 -998 sshd [7516987.983594] [59622] 0 59622 18680 6679 188416 0 -998 python [7516987.983598] [59624] 0 59624 1859266 5161 548864 0 -998 odin-agent [7516987.983600] [59625] 0 59625 707223 9248 983040 0 -998 filebeat [7516987.983604] [59627] 0 59627 416433 64239 774144 0 -998 odin-log-agent [7516987.983607] [59631] 0 59631 180671 15012 385024 0 -998 python3 [7516987.983612] [61396] 0 61396 791287 3189 352256 0 -998 client [7516987.983615] [61641] 0 61641 1844642 29089 946176 0 -998 client [7516987.983765] [ 9236] 0 9236 2642 467 53248 0 -998 php_scanner [7516987.983911] [42898] 0 42898 15543 838 167936 0 -998 su [7516987.983915] [42900] 1000 42900 3673 867 77824 0 -998 exec_script_vr2 [7516987.983918] [42925] 1000 42925 36475 19033 335872 0 -998 python [7516987.983921] [57146] 1000 57146 3673 848 73728 0 -998 exec_script_J2p [7516987.983925] [57195] 1000 57195 186359 22958 491520 0 -998 python2 [7516987.983928] [58376] 1000 58376 275764 14402 290816 0 -998 rosmaster [7516987.983931] [58395] 1000 58395 155166 4449 245760 0 -998 rosout [7516987.983935] [58406] 1000 58406 18285584 3967322 37101568 0 -998 data_sim [7516987.984221] oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=3aa16c9482ae3a6f6b78bda68a55d32c87c99b985e0f11331cddf05af6c4d753,mems_allowed=0-1,oom_memcg=/kubepods/podf1c273d3-9b36-11ea-b3df-246e9693c184,task_memcg=/kubepods/podf1c273d3-9b36-11ea-b3df-246e9693c184/1f246a3eeea8f70bf91141eeaf1805346a666e225f823906485ea0b6c37dfc3d,task=pause,pid=5740,uid=0 [7516987.984254] Memory cgroup out of memory: Killed process 5740 (pause) total-vm:1028kB, anon-rss:4kB, file-rss:0kB, shmem-rss:0kB [7516988.092344] oom_reaper: reaped process 5740 (pause), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB We can find that the first scanned process 5740 (pause) was killed, but its rss is only one page. That is because, when we calculate the oom badness in oom_badness(), we always ignore the negtive point and convert all of these negtive points to 1. Now as oom_score_adj of all the processes in this targeted memcg have the same value -998, the points of these processes are all negtive value. As a result, the first scanned process will be killed. The oom_socre_adj (-998) in this memcg is set by kubelet, because it is a a Guaranteed pod, which has higher priority to prevent from being killed by system oom. To fix this issue, we should make the calculation of oom point more accurate. We can achieve it by convert the chosen_point from 'unsigned long' to 'long'. [cai@lca.pw: reported a issue in the previous version] [mhocko@suse.com: fixed the issue reported by Cai] [mhocko@suse.com: add the comment in proc_oom_score()] [laoar.shao@gmail.com: v3] Link: http://lkml.kernel.org/r/1594396651-9931-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Tested-by: Naresh Kamboju Acked-by: Michal Hocko Cc: David Rientjes Cc: Qian Cai Link: http://lkml.kernel.org/r/1594309987-9919-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- include/linux/oom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index c696c265f019..f022f581ac29 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -48,7 +48,7 @@ struct oom_control { /* Used by oom implementation, do not set */ unsigned long totalpages; struct task_struct *chosen; - unsigned long chosen_points; + long chosen_points; /* Used to print the constraint info. */ enum oom_constraint constraint; @@ -107,7 +107,7 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) bool __oom_reap_task_mm(struct mm_struct *mm); -extern unsigned long oom_badness(struct task_struct *p, +long oom_badness(struct task_struct *p, unsigned long totalpages); extern bool out_of_memory(struct oom_control *oc); -- cgit v1.2.3 From 34ae204f18519f0920bd50a644abd6fefc8dbfcf Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 11 Aug 2020 18:31:38 -0700 Subject: hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem Commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem in at least read mode. This is because the explicit locking in huge_pmd_share (called by huge_pte_alloc) was removed. When restructuring the code, the call to huge_pte_alloc in the else block at the beginning of hugetlb_fault was missed. Unfortunately, that else clause is exercised when there is no page table entry. This will likely lead to a call to huge_pmd_share. If huge_pmd_share thinks pmd sharing is possible, it will traverse the mapping tree (i_mmap) without holding i_mmap_rwsem. If someone else is modifying the tree, bad things such as addressing exceptions or worse could happen. Simply remove the else clause. It should have been removed previously. The code following the else will call huge_pte_alloc with the appropriate locking. To prevent this type of issue in the future, add routines to assert that i_mmap_rwsem is held, and call these routines in huge pmd sharing routines. Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Suggested-by: Matthew Wilcox Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Cc: Michal Hocko Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: Andrea Arcangeli Cc: "Kirill A.Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Link: http://lkml.kernel.org/r/e670f327-5cf9-1959-96e4-6dc7cc30d3d5@oracle.com Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 ++++++++++ include/linux/hugetlb.h | 8 +++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 011af396aa17..7c69dd7c6160 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) up_read(&mapping->i_mmap_rwsem); } +static inline void i_mmap_assert_locked(struct address_space *mapping) +{ + lockdep_assert_held(&mapping->i_mmap_rwsem); +} + +static inline void i_mmap_assert_write_locked(struct address_space *mapping) +{ + lockdep_assert_held_write(&mapping->i_mmap_rwsem); +} + /* * Might pages of this file be mapped into userspace? */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 50650d0d01b9..a520bf26e5d8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -203,8 +204,9 @@ static inline struct address_space *hugetlb_page_mapping_lock_write( return NULL; } -static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, - pte_t *ptep) +static inline int huge_pmd_unshare(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } -- cgit v1.2.3 From 4958e4d86ecb011a81f5d80ce2023ef9f10692d8 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 11 Aug 2020 18:31:48 -0700 Subject: mm: thp: remove debug_cow switch Since commit 3917c80280c93a7123f ("thp: change CoW semantics for anon-THP"), the CoW page fault of THP has been rewritten, debug_cow is not used anymore. So, just remove it. Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Reviewed-by: Zi Yan Cc: Kirill A. Shutemov Link: http://lkml.kernel.org/r/1592270980-116062-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 17c4c4975145..467302056e17 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -181,13 +181,6 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Tue, 11 Aug 2020 18:31:51 -0700 Subject: mm/vmstat: add events for THP migration without split Add following new vmstat events which will help in validating THP migration without split. Statistics reported through these new VM events will help in performance debugging. 1. THP_MIGRATION_SUCCESS 2. THP_MIGRATION_FAILURE 3. THP_MIGRATION_SPLIT In addition, these new events also update normal page migration statistics appropriately via PGMIGRATE_SUCCESS and PGMIGRATE_FAILURE. While here, this updates current trace event 'mm_migrate_pages' to accommodate now available THP statistics. [akpm@linux-foundation.org: s/hpage_nr_pages/thp_nr_pages/] [ziy@nvidia.com: v2] Link: http://lkml.kernel.org/r/C5E3C65C-8253-4638-9D3C-71A61858BB8B@nvidia.com [anshuman.khandual@arm.com: s/thp_nr_pages/hpage_nr_pages/] Link: http://lkml.kernel.org/r/1594287583-16568-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Zi Yan Signed-off-by: Andrew Morton Reviewed-by: Daniel Jordan Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Zi Yan Cc: John Hubbard Cc: Naoya Horiguchi Link: http://lkml.kernel.org/r/1594080415-27924-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 24fc7c3ae7d6..2e6ca53b9bbd 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -56,6 +56,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, + THP_MIGRATION_SUCCESS, + THP_MIGRATION_FAIL, + THP_MIGRATION_SPLIT, #endif #ifdef CONFIG_COMPACTION COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, -- cgit v1.2.3 From af161bee93332a1ff10ba029f41936d21850ae82 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 11 Aug 2020 18:32:06 -0700 Subject: include/linux/sched/mm.h: optimize current_gfp_context() The current_gfp_context() converts a number of PF_MEMALLOC_* per-process flags into the corresponding GFP_* flags for memory allocation. In that function, current->flags is accessed 3 times. That may lead to duplicated access of the same memory location. This is not usually a problem with minimal debug config options on as the compiler can optimize away the duplicated memory accesses. With most of the debug config options on, however, that may not be the case. For example, the x86-64 object size of the __need_fs_reclaim() in a debug kernel that calls current_gfp_context() was 309 bytes. With this patch applied, the object size is reduced to 202 bytes. This is a saving of 107 bytes and will probably be slightly faster too. Use READ_ONCE() to access current->flags to prevent the compiler from possibly accessing current->flags multiple times. Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mathieu Desnoyers Cc: Michel Lespinasse Link: http://lkml.kernel.org/r/20200618212936.9776-1-longman@redhat.com Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 85023ddc2dc2..f889e332912f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -178,14 +178,16 @@ static inline bool in_vfork(struct task_struct *tsk) */ static inline gfp_t current_gfp_context(gfp_t flags) { - if (unlikely(current->flags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { + unsigned int pflags = READ_ONCE(current->flags); + + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence */ - if (current->flags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); - else if (current->flags & PF_MEMALLOC_NOFS) + else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; } return flags; -- cgit v1.2.3 From 1067b261cc973d68c29de54ef0587c56786e6bd5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:27 -0700 Subject: mm: drop duplicated words in Drop the doubled words "used" and "by". Drop the repeated acronym "TLB" and make several other fixes around it. (capital letters, spellos) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: SeongJae Park Link: http://lkml.kernel.org/r/2bb6e13e-44df-4920-52d9-4d3539945f73@infradead.org Signed-off-by: Linus Torvalds --- include/linux/pgtable.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 53e97da1e8e2..a124c21e3204 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -804,7 +804,7 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, /* * No-op macros that just return the current protection value. Defined here - * because these macros can be used used even if CONFIG_MMU is not defined. + * because these macros can be used even if CONFIG_MMU is not defined. */ #ifndef pgprot_nx @@ -1234,7 +1234,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd) * Technically a PTE can be PROTNONE even when not doing NUMA balancing but * the only case the kernel cares is for NUMA balancing and is only ever set * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked - * _PAGE_PROTNONE so by by default, implement the helper as "always no". It + * _PAGE_PROTNONE so by default, implement the helper as "always no". It * is the responsibility of the caller to distinguish between PROT_NONE * protections and NUMA hinting fault protections. */ @@ -1318,10 +1318,10 @@ static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* * ARCHes with special requirements for evicting THP backing TLB entries can * implement this. Otherwise also, it can help optimize normal TLB flush in - * THP regime. stock flush_tlb_range() typically has optimization to nuke the - * entire TLB TLB if flush span is greater than a threshold, which will - * likely be true for a single huge page. Thus a single thp flush will - * invalidate the entire TLB which is not desitable. + * THP regime. Stock flush_tlb_range() typically has optimization to nuke the + * entire TLB if flush span is greater than a threshold, which will + * likely be true for a single huge page. Thus a single THP flush will + * invalidate the entire TLB which is not desirable. * e.g. see arch/arc: flush_pmd_tlb_range */ #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) -- cgit v1.2.3 From 11192337206d2211bce3ba4ec1575439b6045654 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:30 -0700 Subject: mm: drop duplicated words in Drop the doubled words "to" and "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: SeongJae Park Link: http://lkml.kernel.org/r/d9fae8d6-0d60-4d52-9385-3199ee98de49@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f6a82f9bccd7..f97b10117d44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -479,7 +479,7 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* - * vm_fault is filled by the the pagefault handler and passed to the vma's + * vm_fault is filled by the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * @@ -2599,7 +2599,7 @@ extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ +/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ extern int expand_downwards(struct vm_area_struct *vma, unsigned long address); #if VM_GROWSUP -- cgit v1.2.3 From 3ecabd31341fb4307f156a2bf4467c3f8fa9101b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:33 -0700 Subject: include/linux/highmem.h: fix duplicated words in a comment Change the doubled word "is" in a comment to "it is". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/ad605959-0083-4794-8d31-6b073300dd6f@infradead.org Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d6e82e3de027..14e6202ce47f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -73,7 +73,7 @@ static inline void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. * * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap -- cgit v1.2.3 From c82f16b52cb3b50ac1b47e6c590e3dddefc5a97c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:36 -0700 Subject: include/linux/frontswap.h: drop duplicated word in a comment Drop the doubled word "in" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/3af7ed91-ad62-8445-40a4-9e07a64b9523@infradead.org Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6d775984905b..b07d88c92bb2 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -10,7 +10,7 @@ /* * Return code to denote that requested number of * frontswap pages are unused(moved to page cache). - * Used in in shmem_unuse and try_to_unuse. + * Used in shmem_unuse and try_to_unuse. */ #define FRONTSWAP_PAGES_UNUSED 2 -- cgit v1.2.3 From 0845f83122d93ae3bafa7ea10209de2148a3b9bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:32:40 -0700 Subject: include/linux/memcontrol.h: drop duplicate word and fix spello Drop the doubled word "for" in a comment. Fix spello of "incremented". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Chris Down Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/b04aa2e4-7c95-12f0-599d-43d07fb28134@infradead.org Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2c2d301eac33..385237e4cb44 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -65,8 +65,8 @@ struct mem_cgroup_id { /* * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremated by the number of pages. This counter is used for - * for trigger some periodic events. This is straightforward and better + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better * than using jiffies etc. to handle periodic memcg event. */ enum mem_cgroup_events_target { -- cgit v1.2.3 From bfe00c5bbd9ee37b99c281429556c335271d027b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:34 -0700 Subject: syscalls: use uaccess_kernel in addr_limit_user_check Patch series "clean up address limit helpers", v2. In preparation for eventually phasing out direct use of set_fs(), this series removes the segment_eq() arch helper that is only used to implement or duplicate the uaccess_kernel() API, and then adds descriptive helpers to force the kernel address limit. This patch (of 6): Use the uaccess_kernel helper instead of duplicating it. [hch@lst.de: arm: don't call addr_limit_user_check for nommu] Link: http://lkml.kernel.org/r/20200721045834.GA9613@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Guenter Roeck Acked-by: Linus Torvalds Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20200714105505.935079-1-hch@lst.de Link: http://lkml.kernel.org/r/20200710135706.537715-1-hch@lst.de Link: http://lkml.kernel.org/r/20200710135706.537715-2-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a2429d336593..dc2b827c81e5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -263,7 +263,7 @@ static inline void addr_limit_user_check(void) return; #endif - if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), + if (CHECK_DATA_CORRUPTION(uaccess_kernel(), "Invalid address limit on user-mode return")) force_sig(SIGKILL); -- cgit v1.2.3 From 428e2976a5bf7e7f5554286d7a5a33b8147b106a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:44 -0700 Subject: uaccess: remove segment_eq segment_eq is only used to implement uaccess_kernel. Just open code uaccess_kernel in the arch uaccess headers and remove one layer of indirection. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven Cc: Nick Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Link: http://lkml.kernel.org/r/20200710135706.537715-5-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 0a76ddc07d59..5c62d0c6f15b 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -6,8 +6,6 @@ #include #include -#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) - #include /* -- cgit v1.2.3 From 3d13f313ce4c34c524ccc37986fe77172f601ff3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:33:47 -0700 Subject: uaccess: add force_uaccess_{begin,end} helpers Add helpers to wrap the get_fs/set_fs magic for undoing any damange done by set_fs(KERNEL_DS). There is no real functional benefit, but this documents the intent of these calls better, and will allow stubbing the functions out easily for kernels builds that do not allow address space overrides in the future. [hch@lst.de: drop two incorrect hunks, fix a commit log typo] Link: http://lkml.kernel.org/r/20200714105505.935079-6-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Acked-by: Linus Torvalds Acked-by: Mark Rutland Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven Cc: Nick Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Link: http://lkml.kernel.org/r/20200710135706.537715-6-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5c62d0c6f15b..94b285411659 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -8,6 +8,24 @@ #include +/* + * Force the uaccess routines to be wired up for actual userspace access, + * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone + * using force_uaccess_end below. + */ +static inline mm_segment_t force_uaccess_begin(void) +{ + mm_segment_t fs = get_fs(); + + set_fs(USER_DS); + return fs; +} + +static inline void force_uaccess_end(mm_segment_t oldfs) +{ + set_fs(oldfs); +} + /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and -- cgit v1.2.3 From c5f748e2f2ad970078de11bd6b12bcd81147c636 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:33:57 -0700 Subject: include/linux/compiler-clang.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Nathan Chancellor Link: http://lkml.kernel.org/r/6a18c301-3505-742f-4dd7-0f38d0e537b9@infradead.org Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 8a072d00e688..cee0c728d39a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -40,7 +40,7 @@ #endif /* - * Not all versions of clang implement the the type-generic versions + * Not all versions of clang implement the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements * __has_builtin allowing us to avoid awkward version * checks. Unfortunately, we don't know which version of gcc clang -- cgit v1.2.3 From cd1a406fa46f81b1a859ef874b8413a6fa6ac7e8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:00 -0700 Subject: include/linux/exportfs.h: drop duplicated word in a comment Drop the doubled word "a" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Alexander Viro Link: http://lkml.kernel.org/r/c61b707a-8fd8-5b1b-aab0-679122881543@infradead.org Signed-off-by: Linus Torvalds --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d896b8657085..3ceb72b67a7a 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -178,7 +178,7 @@ struct fid { * get_name: * @get_name should find a name for the given @child in the given @parent * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a a %NAME_MAX+1 sized + * understanding that it is already pointing to a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code * or error. @get_name will be called without @parent->i_mutex held. * -- cgit v1.2.3 From 121ae8da9cd49f7139bf80f26352337458e63fe1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:04 -0700 Subject: include/linux/async_tx.h: drop duplicated word in a comment Drop the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Dan Williams Link: http://lkml.kernel.org/r/e85802f7-8f48-8b4c-29b3-ea237a2c7ae9@infradead.org Signed-off-by: Linus Torvalds --- include/linux/async_tx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 75e582b8d2d9..4c328fef403c 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -36,7 +36,7 @@ struct dma_chan_ref { /** * async_tx_flags - modifiers for the async_* calls * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the - * the destination address is not a source. The asynchronous case handles this + * destination address is not a source. The asynchronous case handles this * implicitly, the synchronous case needs to zero the destination block. * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is * also one of the source addresses. In the synchronous case the destination -- cgit v1.2.3 From f48ff83e9c1a8fc2e8bfedb4855933eb1ed159b2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Aug 2020 18:34:07 -0700 Subject: include/linux/xz.h: drop duplicated word Drop the doubled word "than" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Cc: Lasse Collin Link: http://lkml.kernel.org/r/05ebba7a-c1e4-01ae-fc7b-15c081b33f3e@infradead.org Signed-off-by: Linus Torvalds --- include/linux/xz.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xz.h b/include/linux/xz.h index 64cffa6ddfce..24ad7d875977 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -28,7 +28,7 @@ * enum xz_mode - Operation mode * * @XZ_SINGLE: Single-call mode. This uses less RAM than - * than multi-call modes, because the LZMA2 + * multi-call modes, because the LZMA2 * dictionary doesn't need to be allocated as * part of the decoder state. All required data * structures are allocated at initialization, -- cgit v1.2.3 From 8043fc147a97ec2eefc582487f344f2cbe86d12e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:34:10 -0700 Subject: kernel: add a kernel_wait helper Add a helper that waits for a pid and stores the status in the passed in kernel pointer. Use it to fix the usage of kernel_wait4 in call_usermodehelper_exec_sync that only happens to work due to the implicit set_fs(KERNEL_DS) for kernel threads. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: "Eric W. Biederman" Cc: Luis Chamberlain Link: http://lkml.kernel.org/r/20200721130449.5008-1-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/sched/task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ae3060f0b0c9..a98965007eef 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,6 +88,7 @@ struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); +int kernel_wait(pid_t pid, int *stat); extern void free_task(struct task_struct *tsk); -- cgit v1.2.3 From 376653435dacf84a8aca87e66aff94079a817cf2 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Aug 2020 18:34:16 -0700 Subject: kernel.h: remove duplicate include of asm/div64.h This seems to have been added inadvertently in commit 72deb455b5ec ("block: remove CONFIG_LBDAF") Fixes: 72deb455b5ec ("block: remove CONFIG_LBDAF") Signed-off-by: Arvind Sankar Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Link: http://lkml.kernel.org/r/20200727034852.2813453-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7339a00c895e..e19c13616666 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,7 +17,6 @@ #include #include #include -#include #define STACK_MAGIC 0xdeadbeef -- cgit v1.2.3 From 7f317d34906c1033f0752fc137dda04e43979bb8 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Tue, 11 Aug 2020 18:34:19 -0700 Subject: include/: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Signed-off-by: Alexander A. Klimov Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Link: http://lkml.kernel.org/r/20200726110117.16346-1-grandmaster@al2klimov.de Signed-off-by: Linus Torvalds --- include/linux/btree.h | 2 +- include/linux/delay.h | 2 +- include/linux/dma/k3-psil.h | 2 +- include/linux/dma/k3-udma-glue.h | 2 +- include/linux/dma/ti-cppi5.h | 2 +- include/linux/irqchip/irq-omap-intc.h | 2 +- include/linux/jhash.h | 2 +- include/linux/leds-ti-lmu-common.h | 2 +- include/linux/platform_data/davinci-cpufreq.h | 2 +- include/linux/platform_data/davinci_asp.h | 2 +- include/linux/platform_data/elm.h | 2 +- include/linux/platform_data/gpio-davinci.h | 2 +- include/linux/platform_data/gpmc-omap.h | 2 +- include/linux/platform_data/mtd-davinci-aemif.h | 2 +- include/linux/platform_data/omap-twl4030.h | 2 +- include/linux/platform_data/uio_pruss.h | 2 +- include/linux/platform_data/usb-omap.h | 2 +- include/linux/soc/ti/k3-ringacc.h | 2 +- include/linux/soc/ti/knav_qmss.h | 2 +- include/linux/soc/ti/ti-msgmgr.h | 2 +- include/linux/wkup_m3_ipc.h | 2 +- include/linux/xxhash.h | 2 +- include/linux/xz.h | 2 +- include/linux/zlib.h | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/btree.h b/include/linux/btree.h index 68f858c831b1..243ee544397a 100644 --- a/include/linux/btree.h +++ b/include/linux/btree.h @@ -10,7 +10,7 @@ * * A B+Tree is a data structure for looking up arbitrary (currently allowing * unsigned long, u32, u64 and 2 * u64) keys into pointers. The data structure - * is described at http://en.wikipedia.org/wiki/B-tree, we currently do not + * is described at https://en.wikipedia.org/wiki/B-tree, we currently do not * use binary search to find the key on lookups. * * Each B+Tree consists of a head, that contains bookkeeping information and diff --git a/include/linux/delay.h b/include/linux/delay.h index 5e016a4029d9..1d0e2ce6b6d9 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -16,7 +16,7 @@ * 3. CPU clock rate changes. * * Please see this thread: - * http://lists.openwall.net/linux-kernel/2011/01/09/56 + * https://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include diff --git a/include/linux/dma/k3-psil.h b/include/linux/dma/k3-psil.h index 61d5cc0ad601..1962f75fa2d3 100644 --- a/include/linux/dma/k3-psil.h +++ b/include/linux/dma/k3-psil.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_PSIL_H_ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index caadbab1632a..5eb34ad973a7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_UDMA_GLUE_H_ diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 579356ae447e..5896441ee604 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -2,7 +2,7 @@ /* * CPPI5 descriptors interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __TI_CPPI5_H__ diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h index 216e5adf80ce..dca379c0d7eb 100644 --- a/include/linux/irqchip/irq-omap-intc.h +++ b/include/linux/irqchip/irq-omap-intc.h @@ -2,7 +2,7 @@ /** * irq-omap-intc.h - INTC Idle Functions * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * * Author: Felipe Balbi */ diff --git a/include/linux/jhash.h b/include/linux/jhash.h index ba2f6a9776b6..19ddd43aee68 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -5,7 +5,7 @@ * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * - * http://burtleburtle.net/bob/hash/ + * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * diff --git a/include/linux/leds-ti-lmu-common.h b/include/linux/leds-ti-lmu-common.h index 5eb111f38803..420b61e5a213 100644 --- a/include/linux/leds-ti-lmu-common.h +++ b/include/linux/leds-ti-lmu-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ // TI LMU Common Core -// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ +// Copyright (C) 2018 Texas Instruments Incorporated - https://www.ti.com/ #ifndef _TI_LMU_COMMON_H_ #define _TI_LMU_COMMON_H_ diff --git a/include/linux/platform_data/davinci-cpufreq.h b/include/linux/platform_data/davinci-cpufreq.h index 3fbf9f2793b5..bc208c64e3d7 100644 --- a/include/linux/platform_data/davinci-cpufreq.h +++ b/include/linux/platform_data/davinci-cpufreq.h @@ -2,7 +2,7 @@ /* * TI DaVinci CPUFreq platform support. * - * Copyright (C) 2009 Texas Instruments, Inc. http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments, Inc. https://www.ti.com/ */ #ifndef _MACH_DAVINCI_CPUFREQ_H diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index 7fe80f1c7e08..5d1fb0d78a22 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -1,7 +1,7 @@ /* * TI DaVinci Audio Serial Port support * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 0f491d8abfdd..3cc78f0447b1 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -2,7 +2,7 @@ /* * BCH Error Location Module * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef __ELM_H diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index a93841bfb9f7..e182a46e609f 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -1,7 +1,7 @@ /* * DaVinci GPIO Platform Related Defines * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index ef663e570552..c9cc4e32435d 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -2,7 +2,7 @@ /* * OMAP GPMC Platform data * - * Copyright (C) 2014 Texas Instruments, Inc. - http://www.ti.com + * Copyright (C) 2014 Texas Instruments, Inc. - https://www.ti.com * Roger Quadros */ diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h index a403dd51dacc..a49826214a39 100644 --- a/include/linux/platform_data/mtd-davinci-aemif.h +++ b/include/linux/platform_data/mtd-davinci-aemif.h @@ -1,7 +1,7 @@ /* * TI DaVinci AEMIF support * - * Copyright 2010 (C) Texas Instruments, Inc. http://www.ti.com/ + * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/ * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 8419c8caf54e..0dd851ea1c72 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -3,7 +3,7 @@ * omap-twl4030.h - ASoC machine driver for TI SoC based boards with twl4030 * codec, header. * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * All rights reserved. * * Author: Peter Ujfalusi diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h index 3d47d219827f..31f2e22661bc 100644 --- a/include/linux/platform_data/uio_pruss.h +++ b/include/linux/platform_data/uio_pruss.h @@ -3,7 +3,7 @@ * * Platform data for uio_pruss driver * - * Copyright (C) 2010-11 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2010-11 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index fa579b4c666b..5e70d667031c 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -1,7 +1,7 @@ /* * usb-omap.h - Platform data for the various OMAP USB IPs * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * * This software is distributed under the terms of the GNU General Public * License ("GPL") version 2, as published by the Free Software Foundation. diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 7ac115432fa1..5a472eca5ee4 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -2,7 +2,7 @@ /* * K3 Ring Accelerator (RA) subsystem interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __SOC_TI_K3_RINGACC_API_H_ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9745df6ed9d3..c75ef99c99ca 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -1,7 +1,7 @@ /* * Keystone Navigator Queue Management Sub-System header * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * Author: Sandeep Nair * Cyril Chemparathy * Santosh Shilimkar diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h index eac8e0c6fe11..1f6e76d423cf 100644 --- a/include/linux/soc/ti/ti-msgmgr.h +++ b/include/linux/soc/ti/ti-msgmgr.h @@ -1,7 +1,7 @@ /* * Texas Instruments' Message Manager * - * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015-2016 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h index e497e621dbb7..3f496967b538 100644 --- a/include/linux/wkup_m3_ipc.h +++ b/include/linux/wkup_m3_ipc.h @@ -1,7 +1,7 @@ /* * TI Wakeup M3 for AMx3 SoCs Power Management Routines * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Dave Gerlach * * This program is free software; you can redistribute it and/or diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index 52b073fea17f..df42511438d0 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -34,7 +34,7 @@ * ("BSD"). * * You can contact the author at: - * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ diff --git a/include/linux/xz.h b/include/linux/xz.h index 24ad7d875977..9884c8440188 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -2,7 +2,7 @@ * XZ decompressor * * Authors: Lasse Collin - * Igor Pavlov + * Igor Pavlov * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/include/linux/zlib.h b/include/linux/zlib.h index c757d848a758..78ede944c082 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -23,7 +23,7 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + Comments) 1950 to 1952 in the files https://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ -- cgit v1.2.3 From 9e58c5e2fcd8d8d8820ea277e0f577f563eed1f1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 11 Aug 2020 18:34:23 -0700 Subject: include/linux/poison.h: remove obsolete comment When the definition was changed, the comment became stale. Just remove it since there isn't anything useful to say here. Fixes: b8a0255db958 ("include/linux/poison.h: use POISON_POINTER_DELTA for poison pointers") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Vasily Kulikov Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200730174108.GJ23808@casper.infradead.org Signed-off-by: Linus Torvalds --- include/linux/poison.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index df34330b4e34..dc8ae5d8db03 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -24,10 +24,6 @@ #define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ -/* - * Magic number "tsta" to indicate a static timer initializer - * for the object debugging code. - */ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ -- cgit v1.2.3 From 25fd529c34d063d1bef23742f2e8f8341c639dc3 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 11 Aug 2020 18:34:26 -0700 Subject: sparse: group the defines by functionality By popular demand, reorder the defines for sparse annotations and group them by functionality. Signed-off-by: Luc Van Oostenryck Signed-off-by: Andrew Morton Acked-by: Miguel Ojeda Cc: Geert Uytterhoeven Link: lore.kernel.org/r/CAMuHMdWQsirja-h3wBcZezk+H2Q_HShhAks8Hc8ps5fTAp=ObQ@mail.gmail.com Link: http://lkml.kernel.org/r/20200621143652.53798-1-luc.vanoostenryck@gmail.com Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 44 ++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2e231ba8fe3f..4b33cb385f96 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,48 +5,54 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ +/* address spaces */ # define __kernel __attribute__((address_space(0))) # define __user __attribute__((noderef, address_space(__user))) -# define __safe __attribute__((safe)) -# define __force __attribute__((force)) -# define __nocast __attribute__((nocast)) # define __iomem __attribute__((noderef, address_space(__iomem))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) +extern void __chk_user_ptr(const volatile void __user *); +extern void __chk_io_ptr(const volatile void __iomem *); +/* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(__percpu))) -# define __rcu __attribute__((noderef, address_space(__rcu))) +/* other */ +# define __force __attribute__((force)) +# define __nocast __attribute__((nocast)) +# define __safe __attribute__((safe)) # define __private __attribute__((noderef)) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) #else /* __CHECKER__ */ +/* address spaces */ +# define __kernel # ifdef STRUCTLEAK_PLUGIN -# define __user __attribute__((user)) +# define __user __attribute__((user)) # else # define __user # endif -# define __kernel -# define __safe -# define __force -# define __nocast # define __iomem -# define __chk_user_ptr(x) (void)0 -# define __chk_io_ptr(x) (void)0 -# define __builtin_warning(x, y...) (1) +# define __percpu +# define __rcu +# define __chk_user_ptr(x) (void)0 +# define __chk_io_ptr(x) (void)0 +/* context/locking */ # define __must_hold(x) # define __acquires(x) # define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 +# define __acquire(x) (void)0 +# define __release(x) (void)0 # define __cond_lock(x,c) (c) -# define __percpu -# define __rcu +/* other */ +# define __force +# define __nocast +# define __safe # define __private # define ACCESS_PRIVATE(p, member) ((p)->member) +# define __builtin_warning(x, y...) (1) #endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ -- cgit v1.2.3 From 0a650e472d2039a5f768acd82f2a7068bf338dfd Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 11 Aug 2020 18:34:35 -0700 Subject: lib/generic-radix-tree.c: remove unneeded __rcu struct __genradix is defined as having its member 'root' annotated as __rcu. But in the corresponding API RCU is not used. Sparse reports this type mismatch as: lib/generic-radix-tree.c:56:35: warning: incorrect type in initializer (different address spaces) lib/generic-radix-tree.c:56:35: expected struct genradix_root *r lib/generic-radix-tree.c:56:35: got struct genradix_root [noderef] *__val with 6 other ones. So, correct root's type by removing this unneeded __rcu. Signed-off-by: Luc Van Oostenryck Signed-off-by: Andrew Morton Cc: Kent Overstreet Link: http://lkml.kernel.org/r/20200621161745.55396-1-luc.vanoostenryck@gmail.com Signed-off-by: Linus Torvalds --- include/linux/generic-radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 02393c0c98f9..bfd00320c7f3 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -44,7 +44,7 @@ struct genradix_root; struct __genradix { - struct genradix_root __rcu *root; + struct genradix_root *root; }; /* -- cgit v1.2.3 From b642e44e8ab335868b549fe5753b783ca47bf3a3 Mon Sep 17 00:00:00 2001 From: Kars Mulder Date: Tue, 11 Aug 2020 18:34:53 -0700 Subject: kstrto*: correct documentation references to simple_strto*() The documentation of the kstrto*() functions reference the simple_strtoull function by "used as a replacement for [the obsolete] simple_strtoull". All these functions describes themselves as replacements for the function simple_strtoull, even though a function like kstrtol() would be more aptly described as a replacement of simple_strtol(). Fix these references by making the documentation of kstrto*() reference the closest simple_strto*() equivalent available. The functions kstrto[u]int() do not have direct simple_strto[u]int() equivalences, so these are made to refer to simple_strto[u]l() instead. Furthermore, add parentheses after function names, as is standard in kernel documentation. Fixes: 4c925d6031f71 ("kstrto*: add documentation") Signed-off-by: Kars Mulder Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Eldad Zack Cc: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Link: http://lkml.kernel.org/r/1ee1-5f234c00-f3-165a6440@234394593 Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e19c13616666..47b1dad63ffc 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -346,7 +346,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Used as a replacement for the simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -374,7 +374,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Used as a replacement for the simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { -- cgit v1.2.3 From ef0f2685336bbc334e8b6997ce9b155e5f7edd31 Mon Sep 17 00:00:00 2001 From: Kars Mulder Date: Tue, 11 Aug 2020 18:34:56 -0700 Subject: kstrto*: do not describe simple_strto*() as obsolete/replaced The documentation of the kstrto*() functions describes kstrto*() as "replacements" of the "obsolete" simple_strto*() functions. Both of these terms are inaccurate: they're not replacements because they have different behaviour, and the simple_strto*() are not obsolete because there are cases where they have benefits over kstrto*(). Remove usage of the terms "replacement" and "obsolete" in reference to simple_strto*(), and instead use the term "preferred over". Fixes: 4c925d6031f71 ("kstrto*: add documentation") Fixes: 885e68e8b7b13 ("kernel.h: update comment about simple_strto() functions") Signed-off-by: Kars Mulder Signed-off-by: Andrew Morton Reviewed-by: Andy Shevchenko Cc: Eldad Zack Cc: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Link: http://lkml.kernel.org/r/29b9-5f234c80-13-4e3aa200@244003027 Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 47b1dad63ffc..92517ae53e7c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -346,7 +346,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoul(). Return code must be checked. + * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -374,7 +374,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtol(). Return code must be checked. + * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { -- cgit v1.2.3 From 0935288c6e008c0682ab6171fb5605dcc049e7bd Mon Sep 17 00:00:00 2001 From: Vijay Balakrishna Date: Tue, 11 Aug 2020 18:36:33 -0700 Subject: kdump: append kernel build-id string to VMCOREINFO Make kernel GNU build-id available in VMCOREINFO. Having build-id in VMCOREINFO facilitates presenting appropriate kernel namelist image with debug information file to kernel crash dump analysis tools. Currently VMCOREINFO lacks uniquely identifiable key for crash analysis automation. Regarding if this patch is necessary or matching of linux_banner and OSRELEASE in VMCOREINFO employed by crash(8) meets the need -- IMO, build-id approach more foolproof, in most instances it is a cryptographic hash generated using internal code/ELF bits unlike kernel version string upon which linux_banner is based that is external to the code. I feel each is intended for a different purpose. Also OSRELEASE is not suitable when two different kernel builds from same version with different features enabled. Currently for most linux (and non-linux) systems build-id can be extracted using standard methods for file types such as user mode crash dumps, shared libraries, loadable kernel modules etc., This is an exception for linux kernel dump. Having build-id in VMCOREINFO brings some uniformity for automation tools. Tyler said: : I think this is a nice improvement over today's linux_banner approach for : correlating vmlinux to a kernel dump. : : The elf notes parsing in this patch lines up with what is described in in : the "Notes (Nhdr)" section of the elf(5) man page. : : BUILD_ID_MAX is sufficient to hold a sha1 build-id, which is the default : build-id type today in GNU ld(2). It is also sufficient to hold the : "fast" build-id, which is the default build-id type today in LLVM lld(2). Signed-off-by: Vijay Balakrishna Signed-off-by: Andrew Morton Reviewed-by: Tyler Hicks Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Link: http://lkml.kernel.org/r/1591849672-34104-1-git-send-email-vijayb@linux.microsoft.com Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..6594dbc34a37 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -38,6 +38,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID(value) \ + vmcoreinfo_append_str("BUILD-ID=%s\n", value) #define VMCOREINFO_PAGESIZE(value) \ vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ @@ -64,6 +66,10 @@ extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +/* raw contents of kernel .notes section */ +extern const void __start_notes __weak; +extern const void __stop_notes __weak; + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); -- cgit v1.2.3 From 79076e1241bb3bf02d0aac7d39120d8161fe07b1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Aug 2020 18:36:46 -0700 Subject: kernel/panic.c: make oops_may_print() return bool The return value of oops_may_print() is true or false, so change its type to reflect that. Signed-off-by: Tiezhu Yang Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Cc: Xuefeng Li Link: http://lkml.kernel.org/r/1591103358-32087-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 92517ae53e7c..211005163682 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -322,7 +322,7 @@ void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); -extern int oops_may_print(void); +extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; -- cgit v1.2.3 From 63037f74725ddd8a767ed2ad0369e60a3bf1f2ce Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 11 Aug 2020 18:36:53 -0700 Subject: panic: make print_oops_end_marker() static Since print_oops_end_marker() is not used externally, also remove it in kernel.h at the same time. Signed-off-by: Yue Hu Signed-off-by: Andrew Morton Cc: Kees Cook Link: http://lkml.kernel.org/r/20200724011516.12756-1-zbestahu@gmail.com Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 211005163682..500def620d8f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -321,7 +321,6 @@ void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); -void print_oops_end_marker(void); extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; -- cgit v1.2.3 From b4b382238ed2f94f0d3860f9120b66404fa99463 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:14 -0700 Subject: mm/migrate: move migration helper from .h to .c It's not performance sensitive function. Move it to .c. This is a preparation step for future change. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-3-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 540998d9810b..abeb4b15b297 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -31,34 +31,6 @@ enum migrate_reason { /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; -static inline struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) -{ - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; - unsigned int order = 0; - struct page *new_page = NULL; - - if (PageHuge(page)) - return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - preferred_nid, nodemask); - - if (PageTransHuge(page)) { - gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; - } - - if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) - gfp_mask |= __GFP_HIGHMEM; - - new_page = __alloc_pages_nodemask(gfp_mask, order, - preferred_nid, nodemask); - - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); - - return new_page; -} - #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); @@ -67,6 +39,8 @@ extern int migrate_page(struct address_space *mapping, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); +extern struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); @@ -85,6 +59,9 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } +static inline struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask) + { return NULL; } static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -- cgit v1.2.3 From d92bbc2719bd2be237ee336113b63492a6baca3b Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:17 -0700 Subject: mm/hugetlb: unify migration callbacks There is no difference between two migration callback functions, alloc_huge_page_node() and alloc_huge_page_nodemask(), except __GFP_THISNODE handling. It's redundant to have two almost similar functions in order to handle this flag. So, this patch tries to remove one by introducing a new argument, gfp_mask, to alloc_huge_page_nodemask(). After introducing gfp_mask argument, it's caller's job to provide correct gfp_mask. So, every callsites for alloc_huge_page_nodemask() are changed to provide gfp_mask. Note that it's safe to remove a node id check in alloc_huge_page_node() since there is no caller passing NUMA_NO_NODE as a node id. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Reviewed-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-4-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a520bf26e5d8..3517edde681e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -10,6 +10,7 @@ #include #include #include +#include struct ctl_table; struct user_struct; @@ -506,9 +507,8 @@ struct huge_bootmem_page { struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask); + nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, @@ -694,6 +694,15 @@ static inline bool hugepage_movable_supported(struct hstate *h) return true; } +/* Movability of hugepages depends on migration support. */ +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + if (hugepage_movable_supported(h)) + return GFP_HIGHUSER_MOVABLE; + else + return GFP_HIGHUSER; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -761,13 +770,9 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma, return NULL; } -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - return NULL; -} - static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask) +alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } @@ -880,6 +885,11 @@ static inline bool hugepage_movable_supported(struct hstate *h) return false; } +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + return 0; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { -- cgit v1.2.3 From 19fc7bed252c16ace29491e4cfa2bafb264eb505 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:25 -0700 Subject: mm/migrate: introduce a standard migration target allocation function There are some similar functions for migration target allocation. Since there is no fundamental difference, it's better to keep just one rather than keeping all variants. This patch implements base migration target allocation function. In the following patches, variants will be converted to use this function. Changes should be mechanical, but, unfortunately, there are some differences. First, some callers' nodemask is assgined to NULL since NULL nodemask will be considered as all available nodes, that is, &node_states[N_MEMORY]. Second, for hugetlb page allocation, gfp_mask is redefined as regular hugetlb allocation gfp_mask plus __GFP_THISNODE if user provided gfp_mask has it. This is because future caller of this function requires to set this node constaint. Lastly, if provided nodeid is NUMA_NO_NODE, nodeid is set up to the node where migration source lives. It helps to remove simple wrappers for setting up the nodeid. Note that PageHighmem() call in previous function is changed to open-code "is_highmem_idx()" since it provides more readability. [akpm@linux-foundation.org: tweak patch title, per Vlastimil] [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1594622517-20681-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 15 +++++++++++++++ include/linux/migrate.h | 9 +++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3517edde681e..30e1f14119c8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -703,6 +703,16 @@ static inline gfp_t htlb_alloc_mask(struct hstate *h) return GFP_HIGHUSER; } +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + gfp_t modified_mask = htlb_alloc_mask(h); + + /* Some callers might want to enforce node */ + modified_mask |= (gfp_mask & __GFP_THISNODE); + + return modified_mask; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -890,6 +900,11 @@ static inline gfp_t htlb_alloc_mask(struct hstate *h) return 0; } +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + return 0; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/migrate.h b/include/linux/migrate.h index abeb4b15b297..0f8d1583fa8e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -10,6 +10,8 @@ typedef struct page *new_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private); +struct migration_target_control; + /* * Return values from addresss_space_operations.migratepage(): * - negative errno on page migration failure; @@ -39,8 +41,7 @@ extern int migrate_page(struct address_space *mapping, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); -extern struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask); +extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); @@ -59,8 +60,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } -static inline struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) +static inline struct page *alloc_migration_target(struct page *page, + unsigned long private) { return NULL; } static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -- cgit v1.2.3 From 41b4dc14ee807cb1bd15e67cad287534046f92dc Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:34 -0700 Subject: mm/gup: restrict CMA region by using allocation scope API We have well defined scope API to exclude CMA region. Use it rather than manipulating gfp_mask manually. With this change, we can now restore __GFP_MOVABLE for gfp_mask like as usual migration target allocation. It would result in that the ZONE_MOVABLE is also searched by page allocator. For hugetlb, gfp_mask is redefined since it has a regular allocation mask filter for migration target. __GPF_NOWARN is added to hugetlb gfp_mask filter since a new user for gfp_mask filter, gup, want to be silent when allocation fails. Note that this can be considered as a fix for the commit 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region"). However, "Fixes" tag isn't added here since it is just suboptimal but it doesn't cause any problem. Suggested-by: Michal Hocko Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Roman Gushchin Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K . V" Link: http://lkml.kernel.org/r/1596180906-8442-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 30e1f14119c8..d86c82749836 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -710,6 +710,8 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) /* Some callers might want to enforce node */ modified_mask |= (gfp_mask & __GFP_THISNODE); + modified_mask |= (gfp_mask & __GFP_NOWARN); + return modified_mask; } -- cgit v1.2.3 From bbe88753bd42b1faf1458dde8f58ff1239990436 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 11 Aug 2020 18:37:38 -0700 Subject: mm/hugetlb: make hugetlb migration callback CMA aware new_non_cma_page() in gup.c requires to allocate the new page that is not on the CMA area. new_non_cma_page() implements it by using allocation scope APIs. However, there is a work-around for hugetlb. Normal hugetlb page allocation API for migration is alloc_huge_page_nodemask(). It consists of two steps. First is dequeing from the pool. Second is, if there is no available page on the queue, allocating by using the page allocator. new_non_cma_page() can't use this API since first step (deque) isn't aware of scope API to exclude CMA area. So, new_non_cma_page() exports hugetlb internal function for the second step, alloc_migrate_huge_page(), to global scope and uses it directly. This is suboptimal since hugetlb pages on the queue cannot be utilized. This patch tries to fix this situation by making the deque function on hugetlb CMA aware. In the deque function, CMA memory is skipped if PF_MEMALLOC_NOCMA flag is found. Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Mike Kravetz Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Aneesh Kumar K . V" Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Roman Gushchin Link: http://lkml.kernel.org/r/1596180906-8442-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d86c82749836..d5cc5f802dd4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -511,8 +511,6 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); -- cgit v1.2.3 From bce617edecada007aee8610fbe2c14d10b8de2f6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:37:44 -0700 Subject: mm: do page fault accounting in handle_mm_fault Patch series "mm: Page fault accounting cleanups", v5. This is v5 of the pf accounting cleanup series. It originates from Gerald Schaefer's report on an issue a week ago regarding to incorrect page fault accountings for retried page fault after commit 4064b9827063 ("mm: allow VM_FAULT_RETRY for multiple times"): https://lore.kernel.org/lkml/20200610174811.44b94525@thinkpad/ What this series did: - Correct page fault accounting: we do accounting for a page fault (no matter whether it's from #PF handling, or gup, or anything else) only with the one that completed the fault. For example, page fault retries should not be counted in page fault counters. Same to the perf events. - Unify definition of PERF_COUNT_SW_PAGE_FAULTS: currently this perf event is used in an adhoc way across different archs. Case (1): for many archs it's done at the entry of a page fault handler, so that it will also cover e.g. errornous faults. Case (2): for some other archs, it is only accounted when the page fault is resolved successfully. Case (3): there're still quite some archs that have not enabled this perf event. Since this series will touch merely all the archs, we unify this perf event to always follow case (1), which is the one that makes most sense. And since we moved the accounting into handle_mm_fault, the other two MAJ/MIN perf events are well taken care of naturally. - Unify definition of "major faults": the definition of "major fault" is slightly changed when used in accounting (not VM_FAULT_MAJOR). More information in patch 1. - Always account the page fault onto the one that triggered the page fault. This does not matter much for #PF handlings, but mostly for gup. More information on this in patch 25. Patchset layout: Patch 1: Introduced the accounting in handle_mm_fault(), not enabled. Patch 2-23: Enable the new accounting for arch #PF handlers one by one. Patch 24: Enable the new accounting for the rest outliers (gup, iommu, etc.) Patch 25: Cleanup GUP task_struct pointer since it's not needed any more This patch (of 25): This is a preparation patch to move page fault accountings into the general code in handle_mm_fault(). This includes both the per task flt_maj/flt_min counters, and the major/minor page fault perf events. To do this, the pt_regs pointer is passed into handle_mm_fault(). PERF_COUNT_SW_PAGE_FAULTS should still be kept in per-arch page fault handlers. So far, all the pt_regs pointer that passed into handle_mm_fault() is NULL, which means this patch should have no intented functional change. Suggested-by: Linus Torvalds Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Albert Ou Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Brian Cain Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Chris Zankel Cc: Dave Hansen Cc: David S. Miller Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James E.J. Bottomley Cc: John Hubbard Cc: Jonas Bonn Cc: Ley Foon Tan Cc: "Luck, Tony" Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vincent Chen Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200707225021.200906-1-peterx@redhat.com Link: http://lkml.kernel.org/r/20200707225021.200906-2-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f97b10117d44..ec0ffb423769 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -38,6 +38,7 @@ struct file_ra_state; struct user_struct; struct writeback_control; struct bdi_writeback; +struct pt_regs; void init_mm_internals(void); @@ -1658,7 +1659,8 @@ int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); + unsigned long address, unsigned int flags, + struct pt_regs *regs); extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); @@ -1668,7 +1670,8 @@ void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); #else static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) + unsigned long address, unsigned int flags, + struct pt_regs *regs) { /* should never happen if there's no MMU */ BUG(); -- cgit v1.2.3 From 64019a2e467a288a16b65ab55ddcbf58c1b00187 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 11 Aug 2020 18:39:01 -0700 Subject: mm/gup: remove task_struct pointer for all gup code After the cleanup of page fault accounting, gup does not need to pass task_struct around any more. Remove that parameter in the whole gup stack. Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Reviewed-by: John Hubbard Link: http://lkml.kernel.org/r/20200707225021.200906-26-peterx@redhat.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ec0ffb423769..e7602a3bcef1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1661,7 +1661,7 @@ int invalidate_inode_page(struct page *page); extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs); -extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); void unmap_mapping_pages(struct address_space *mapping, @@ -1677,8 +1677,7 @@ static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, BUG(); return VM_FAULT_SIGBUS; } -static inline int fixup_user_fault(struct task_struct *tsk, - struct mm_struct *mm, unsigned long address, +static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { /* should never happen if there's no MMU */ @@ -1704,11 +1703,11 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); -- cgit v1.2.3 From 466a62d7642f02f36d37d9b30c19a725538a01ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 07:35:33 +0100 Subject: mfd: core: Make a best effort attempt to match devices with the correct of_nodes Currently, when a child platform device (sometimes referred to as a sub-device) is registered via the Multi-Functional Device (MFD) API, the framework attempts to match the newly registered platform device with its associated Device Tree (OF) node. Until now, the device has been allocated the first node found with an identical OF compatible string. Unfortunately, if there are, say for example '3' devices which are to be handled by the same driver and therefore have the same compatible string, each of them will be allocated a pointer to the *first* node. An example Device Tree entry might look like this: mfd_of_test { compatible = "mfd,of-test-parent"; #address-cells = <0x02>; #size-cells = <0x02>; child@aaaaaaaaaaaaaaaa { compatible = "mfd,of-test-child"; reg = <0xaaaaaaaa 0xaaaaaaaa 0 0x11>, <0xbbbbbbbb 0xbbbbbbbb 0 0x22>; }; child@cccccccc { compatible = "mfd,of-test-child"; reg = <0x00000000 0xcccccccc 0 0x33>; }; child@dddddddd00000000 { compatible = "mfd,of-test-child"; reg = <0xdddddddd 0x00000000 0 0x44>; }; }; When used with example sub-device registration like this: static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 0, "mfd,of-test-child"), OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child"), OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child") }; ... the current implementation will result in all devices being allocated the first OF node found containing a matching compatible string: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@aaaaaaaaaaaaaaaa [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@aaaaaaaaaaaaaaaa After this patch each device will be allocated a unique OF node: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@aaaaaaaaaaaaaaaa [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@cccccccc [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@dddddddd00000000 Which is fine if all OF nodes are identical. However if we wish to apply an attribute to particular device, we really need to ensure the correct OF node will be associated with the device containing the correct address. We accomplish this by matching the device's address expressed in DT with one provided during sub-device registration. Like this: static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child", 0xdddddddd00000000), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child", 0xaaaaaaaaaaaaaaaa), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 3, "mfd,of-test-child", 0x00000000cccccccc) }; This will ensure a specific device (designated here using the platform_ids; 1, 2 and 3) is matched with a particular OF node: [0.712511] mfd-of-test-child mfd-of-test-child.0: Probing platform device: 0 [0.712710] mfd-of-test-child mfd-of-test-child.0: Using OF node: child@dddddddd00000000 [0.713033] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.713381] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.713691] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.713889] mfd-of-test-child mfd-of-test-child.2: Using OF node: child@cccccccc This implementation is still not infallible, hence the mention of "best effort" in the commit subject. Since we have not *insisted* on the existence of 'reg' properties (in some scenarios they just do not make sense) and no device currently uses the new 'of_reg' attribute, we have to make an on-the-fly judgement call whether to associate the OF node anyway. Which we do in cases where parent drivers haven't specified a particular OF node to match to. So there is a *slight* possibility of the following result (note: the implementation here is convoluted, but it shows you one means by which this process can still break): /* * First entry will match to the first OF node with matching compatible * Second will fail, since the first took its OF node and is no longer available * Third will succeed */ static const struct mfd_cell mfd_of_test_cell[] = { OF_MFD_CELL("mfd-of-test-child", NULL, NULL, 0, 1, "mfd,of-test-child"), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 2, "mfd,of-test-child", 0xaaaaaaaaaaaaaaaa), OF_MFD_CELL_REG("mfd-of-test-child", NULL, NULL, 0, 3, "mfd,of-test-child", 0x00000000cccccccc) }; The result: [0.753869] mfd-of-test-parent mfd_of_test: Registering 3 devices [0.756597] mfd-of-test-child: Failed to locate of_node [id: 2] [0.759999] mfd-of-test-child mfd-of-test-child.1: Probing platform device: 1 [0.760314] mfd-of-test-child mfd-of-test-child.1: Using OF node: child@aaaaaaaaaaaaaaaa [0.760908] mfd-of-test-child mfd-of-test-child.2: Probing platform device: 2 [0.761183] mfd-of-test-child mfd-of-test-child.2: No OF node associated with this device [0.761621] mfd-of-test-child mfd-of-test-child.3: Probing platform device: 3 [0.761899] mfd-of-test-child mfd-of-test-child.3: Using OF node: child@cccccccc We could code around this with some pre-parsing semantics, but the added complexity required to cover each and every corner-case is not justified. Merely patching the current failing (via this patch) is already working with some pretty small corner-cases. Other issues should be patched in the parent drivers which can be achieved simply by implementing OF_MFD_CELL_REG(). Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index ab76cdd06199..c437a73b43a3 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -78,6 +78,16 @@ struct mfd_cell { */ const char *of_compatible; + /* + * Address as defined in Device Tree. Used to compement 'of_compatible' + * (above) when matching OF nodes with devices that have identical + * compatible strings + */ + const u64 of_reg; + + /* Set to 'true' to use 'of_reg' (above) - allows for of_reg=0 */ + bool use_of_reg; + /* Matches ACPI */ const struct mfd_cell_acpi_match *acpi_match; -- cgit v1.2.3 From d097965bb6682afe1f8481923b16c033f708923b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 10:18:43 +0100 Subject: mfd: core: Fix formatting of MFD helpers Remove unnecessary '\'s and leading tabs. This will help to clean-up future diffs when subsequent changes are made. Hint: The aforementioned changes follow this patch. Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index c437a73b43a3..da9b066e1594 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -26,20 +26,20 @@ .id = (_id), \ } -#define OF_MFD_CELL(_name, _res, _pdata, _pdsize,_id, _compat) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) \ +#define OF_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _compat) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) -#define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) \ +#define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) -#define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) \ +#define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) -#define MFD_CELL_RES(_name, _res) \ - MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) \ +#define MFD_CELL_RES(_name, _res) \ + MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) -#define MFD_CELL_NAME(_name) \ - MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) \ +#define MFD_CELL_NAME(_name) \ + MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) struct irq_domain; struct property_entry; -- cgit v1.2.3 From 44e6171ed04a0cd0378e2503f03a444ebdd4e8e3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Jun 2020 10:12:05 +0100 Subject: mfd: core: Add OF_MFD_CELL_REG() helper Extend current list of helpers to provide support for parent drivers wishing to match specific child devices to particular OF nodes. Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index da9b066e1594..6d68f44a26a1 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -14,7 +14,7 @@ #define MFD_RES_SIZE(arr) (sizeof(arr) / sizeof(struct resource)) -#define MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _match)\ +#define MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg, _use_of_reg, _match) \ { \ .name = (_name), \ .resources = (_res), \ @@ -22,24 +22,29 @@ .platform_data = (_pdata), \ .pdata_size = (_pdsize), \ .of_compatible = (_compat), \ + .of_reg = (_of_reg), \ + .use_of_reg = (_use_of_reg), \ .acpi_match = (_match), \ .id = (_id), \ } +#define OF_MFD_CELL_REG(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _of_reg, true, NULL) + #define OF_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _compat) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, 0, false, NULL) #define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, 0, false, _match) #define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ - MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, 0, false, NULL) #define MFD_CELL_RES(_name, _res) \ - MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) + MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, 0, false, NULL) #define MFD_CELL_NAME(_name) \ - MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) + MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, 0, false, NULL) struct irq_domain; struct property_entry; -- cgit v1.2.3 From 7d2594cd1fa0b03b2746ce811926ee150a3a14fa Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 1 Jul 2020 23:23:48 +0200 Subject: mfd: smsc-ece1099: Remove driver This MFD driver has no user. The keypad driver of this device never made it into the kernel. Therefore, this driver is useless. Remove it. Signed-off-by: Michael Walle Cc: Sourav Poddar Signed-off-by: Lee Jones --- include/linux/mfd/smsc.h | 104 ----------------------------------------------- 1 file changed, 104 deletions(-) delete mode 100644 include/linux/mfd/smsc.h (limited to 'include/linux') diff --git a/include/linux/mfd/smsc.h b/include/linux/mfd/smsc.h deleted file mode 100644 index 83944124e886..000000000000 --- a/include/linux/mfd/smsc.h +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * SMSC ECE1099 - * - * Copyright 2012 Texas Instruments Inc. - * - * Author: Sourav Poddar - */ - -#ifndef __LINUX_MFD_SMSC_H -#define __LINUX_MFD_SMSC_H - -#include - -#define SMSC_ID_ECE1099 1 -#define SMSC_NUM_CLIENTS 2 - -#define SMSC_BASE_ADDR 0x38 -#define OMAP_GPIO_SMSC_IRQ 151 - -#define SMSC_MAXGPIO 32 -#define SMSC_BANK(offs) ((offs) >> 3) -#define SMSC_BIT(offs) (1u << ((offs) & 0x7)) - -struct smsc { - struct device *dev; - struct i2c_client *i2c_clients[SMSC_NUM_CLIENTS]; - struct regmap *regmap; - int clk; - /* Stored chip id */ - int id; -}; - -struct smsc_gpio; -struct smsc_keypad; - -static inline int smsc_read(struct device *child, unsigned int reg, - unsigned int *dest) -{ - struct smsc *smsc = dev_get_drvdata(child->parent); - - return regmap_read(smsc->regmap, reg, dest); -} - -static inline int smsc_write(struct device *child, unsigned int reg, - unsigned int value) -{ - struct smsc *smsc = dev_get_drvdata(child->parent); - - return regmap_write(smsc->regmap, reg, value); -} - -/* Registers for SMSC */ -#define SMSC_RESET 0xF5 -#define SMSC_GRP_INT 0xF9 -#define SMSC_CLK_CTRL 0xFA -#define SMSC_WKUP_CTRL 0xFB -#define SMSC_DEV_ID 0xFC -#define SMSC_DEV_REV 0xFD -#define SMSC_VEN_ID_L 0xFE -#define SMSC_VEN_ID_H 0xFF - -/* CLK VALUE */ -#define SMSC_CLK_VALUE 0x13 - -/* Registers for function GPIO INPUT */ -#define SMSC_GPIO_DATA_IN_START 0x00 - -/* Registers for function GPIO OUPUT */ -#define SMSC_GPIO_DATA_OUT_START 0x05 - -/* Definitions for SMSC GPIO CONFIGURATION REGISTER*/ -#define SMSC_GPIO_INPUT_LOW 0x01 -#define SMSC_GPIO_INPUT_RISING 0x09 -#define SMSC_GPIO_INPUT_FALLING 0x11 -#define SMSC_GPIO_INPUT_BOTH_EDGE 0x19 -#define SMSC_GPIO_OUTPUT_PP 0x21 -#define SMSC_GPIO_OUTPUT_OP 0x31 - -#define GRP_INT_STAT 0xf9 -#define SMSC_GPI_INT 0x0f -#define SMSC_CFG_START 0x0A - -/* Registers for SMSC GPIO INTERRUPT STATUS REGISTER*/ -#define SMSC_GPIO_INT_STAT_START 0x32 - -/* Registers for SMSC GPIO INTERRUPT MASK REGISTER*/ -#define SMSC_GPIO_INT_MASK_START 0x37 - -/* Registers for SMSC function KEYPAD*/ -#define SMSC_KP_OUT 0x40 -#define SMSC_KP_IN 0x41 -#define SMSC_KP_INT_STAT 0x42 -#define SMSC_KP_INT_MASK 0x43 - -/* Definitions for keypad */ -#define SMSC_KP_KSO 0x70 -#define SMSC_KP_KSI 0x51 -#define SMSC_KSO_ALL_LOW 0x20 -#define SMSC_KP_SET_LOW_PWR 0x0B -#define SMSC_KP_SET_HIGH 0xFF -#define SMSC_KSO_EVAL 0x00 - -#endif /* __LINUX_MFD_SMSC_H */ -- cgit v1.2.3 From 091c6110862bce4e2380e353cb062dcb6a56bcb6 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Mon, 13 Jul 2020 10:38:57 +0100 Subject: mfd: da9063: Fix revision handling to correctly select reg tables The current implementation performs checking in the i2c_probe() function of the variant_code but does this immediately after the containing struct has been initialised as all zero. This means the check for variant code will always default to using the BB tables and will never select AD. The variant code is subsequently set by device_init() and later used by the RTC so really it's a little fortunate this mismatch works. This update adds raw I2C read access functionality to read the chip and variant/revision information (common to all revisions) so that it can subsequently correctly choose the proper regmap tables for real initialisation. Signed-off-by: Adam Thomson Signed-off-by: Lee Jones --- include/linux/mfd/da9063/registers.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index ba706b0e28c2..1dbabf1b3cb8 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -292,8 +292,10 @@ #define DA9063_BB_REG_GP_ID_19 0x134 /* Chip ID and variant */ -#define DA9063_REG_CHIP_ID 0x181 -#define DA9063_REG_CHIP_VARIANT 0x182 +#define DA9063_REG_DEVICE_ID 0x181 +#define DA9063_REG_VARIANT_ID 0x182 +#define DA9063_REG_CUSTOMER_ID 0x183 +#define DA9063_REG_CONFIG_ID 0x184 /* * PMIC registers bits @@ -929,9 +931,6 @@ #define DA9063_RTC_CLOCK 0x40 #define DA9063_OUT_32K_EN 0x80 -/* DA9063_REG_CHIP_VARIANT */ -#define DA9063_CHIP_VARIANT_SHIFT 4 - /* DA9063_REG_BUCK_ILIM_A (addr=0x9A) */ #define DA9063_BIO_ILIM_MASK 0x0F #define DA9063_BMEM_ILIM_MASK 0xF0 @@ -1065,4 +1064,10 @@ #define DA9063_MON_A10_IDX_LDO9 0x04 #define DA9063_MON_A10_IDX_LDO10 0x05 +/* DA9063_REG_VARIANT_ID (addr=0x182) */ +#define DA9063_VARIANT_ID_VRC_SHIFT 0 +#define DA9063_VARIANT_ID_VRC_MASK 0x0F +#define DA9063_VARIANT_ID_MRC_SHIFT 4 +#define DA9063_VARIANT_ID_MRC_MASK 0xF0 + #endif /* _DA9063_REG_H */ -- cgit v1.2.3 From 9ece3601aed46f7b460b79cd7d60908b47b2b0d4 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Mon, 13 Jul 2020 10:38:59 +0100 Subject: mfd: da9063: Add support for latest DA silicon revision This update adds new regmap tables to support the latest DA silicon which will automatically be selected based on the chip and variant information read from the device. Signed-off-by: Adam Thomson Signed-off-by: Lee Jones --- include/linux/mfd/da9063/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 5cd06ab26352..fa7a43f02f27 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -35,6 +35,7 @@ enum da9063_variant_codes { PMIC_DA9063_AD = 0x3, PMIC_DA9063_BB = 0x5, PMIC_DA9063_CA = 0x6, + PMIC_DA9063_DA = 0x7, }; /* Interrupts */ -- cgit v1.2.3 From 23ef2b642b85f03a109fbf5958134a5e40193dbd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:29:17 -0700 Subject: mfd: da9055: pdata.h: Drop a duplicated word Drop the repeated word "that" in a comment. Signed-off-by: Randy Dunlap Acked-by: Adam Thomson Signed-off-by: Lee Jones --- include/linux/mfd/da9055/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index eac48e483190..d3f126990ad0 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -35,7 +35,7 @@ struct da9055_pdata { int *gpio_rsel; /* * Regulator mode control bits value (GPI offset) that - * that controls the regulator state, 0 if not available. + * controls the regulator state, 0 if not available. */ enum gpio_select *reg_ren; /* -- cgit v1.2.3 From e7b85500885f2a70129f5d3a72153e23b37d0fe5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Jul 2020 17:29:31 -0700 Subject: mfd: max77693-private: Drop a duplicated word Drop the repeated word "in" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index e798c81aec31..311f7d3d2323 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -131,7 +131,7 @@ enum max77693_pmic_reg { #define FLASH_INT_FLED1_SHORT BIT(3) #define FLASH_INT_OVER_CURRENT BIT(4) -/* Fast charge timer in in hours */ +/* Fast charge timer in hours */ #define DEFAULT_FAST_CHARGE_TIMER 4 /* microamps */ #define DEFAULT_TOP_OFF_THRESHOLD_CURRENT 150000 -- cgit v1.2.3 From 114294d276279d6cda81f9c685452239ea89cdb8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Jul 2020 11:54:58 +0100 Subject: mfd: mfd-core: Add mechanism for removal of a subset of children Currently, the only way to remove MFD children is with a call to mfd_remove_devices, which will remove all the children. Under some circumstances it is useful to remove only a subset of the child devices. For example if some additional clean up is required between removal of certain child devices. To accomplish this a level field is added to mfd_cell, the normal mfd_remove_devices is modified to not remove devices that are set to a higher level and a corresponding mfd_remove_devices_late function is added to remove those children. See further discussion at: https://lore.kernel.org/lkml/20200616075834.GF2608702@dell/ Suggested-by: Lee Jones Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 6d68f44a26a1..4b35baa14d30 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -46,6 +46,9 @@ #define MFD_CELL_NAME(_name) \ MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, 0, false, NULL) +#define MFD_DEP_LEVEL_NORMAL 0 +#define MFD_DEP_LEVEL_HIGH 1 + struct irq_domain; struct property_entry; @@ -63,6 +66,7 @@ struct mfd_cell_acpi_match { struct mfd_cell { const char *name; int id; + int level; int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); @@ -150,6 +154,7 @@ static inline int mfd_add_hotplug_devices(struct device *parent, } extern void mfd_remove_devices(struct device *parent); +extern void mfd_remove_devices_late(struct device *parent); extern int devm_mfd_add_devices(struct device *dev, int id, const struct mfd_cell *cells, int n_devs, -- cgit v1.2.3 From 4f4ed4543e2096dc0158ff79bd6d8bc09e27fa93 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 22 Jul 2020 21:24:54 +0200 Subject: mfd: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Acked-by: Rob Herring Signed-off-by: Lee Jones --- include/linux/mfd/hi6421-pmic.h | 2 +- include/linux/mfd/lp873x.h | 2 +- include/linux/mfd/lp87565.h | 2 +- include/linux/mfd/ti_am335x_tscadc.h | 2 +- include/linux/mfd/tps65086.h | 2 +- include/linux/mfd/tps65217.h | 2 +- include/linux/mfd/tps65218.h | 2 +- include/linux/mfd/tps65912.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/hi6421-pmic.h b/include/linux/mfd/hi6421-pmic.h index bbc64484c021..2cadf8897c64 100644 --- a/include/linux/mfd/hi6421-pmic.h +++ b/include/linux/mfd/hi6421-pmic.h @@ -5,7 +5,7 @@ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd. * http://www.hisilicon.com * Copyright (c) <2013-2014> Linaro Ltd. - * http://www.linaro.org + * https://www.linaro.org * * Author: Guodong Xu */ diff --git a/include/linux/mfd/lp873x.h b/include/linux/mfd/lp873x.h index edbec8350a49..5546688c7da7 100644 --- a/include/linux/mfd/lp873x.h +++ b/include/linux/mfd/lp873x.h @@ -1,7 +1,7 @@ /* * Functions to access LP873X power management chip. * - * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2016 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h index ce965354bbad..43716aca46fa 100644 --- a/include/linux/mfd/lp87565.h +++ b/include/linux/mfd/lp87565.h @@ -2,7 +2,7 @@ /* * Functions to access LP87565 power management chip. * - * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef __LINUX_MFD_LP87565_H diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 483168403ae5..ffc091b77633 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -4,7 +4,7 @@ /* * TI Touch Screen / ADC MFD driver * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h index a228ae4c88d9..e0a417e53766 100644 --- a/include/linux/mfd/tps65086.h +++ b/include/linux/mfd/tps65086.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index b5dd108421c8..db7091824ed0 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -3,7 +3,7 @@ * * Functions to access TPS65217 power management chip. * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index b0470c35162d..f4ca367e3473 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -3,7 +3,7 @@ * * Functions to access TPS65219 power management chip. * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index b25d0297ba88..7943e413deae 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Andrew F. Davis * * This program is free software; you can redistribute it and/or -- cgit v1.2.3 From 5848dc5b1b76d83599dcec1b39f188a7cbdca7e2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 14 Aug 2020 15:22:43 -0700 Subject: dma-debug: remove debug_dma_assert_idle() function This remoes the code from the COW path to call debug_dma_assert_idle(), which was added many years ago. Google shows that it hasn't caught anything in the 6+ years we've had it apart from a false positive, and Hugh just noticed how it had a very unfortunate spinlock serialization in the COW path. He fixed that issue the previous commit (a85ffd59bd36: "dma-debug: fix debug_dma_assert_idle(), use rcu_read_lock()"), but let's see if anybody even notices when we remove this function entirely. NOTE! We keep the dma tracking infrastructure that was added by the commit that introduced it. Partly to make it easier to resurrect this debug code if we ever deside to, and partly because that tracking by pfn and offset looks quite reasonable. The problem with this debug code was simply that it was expensive and didn't seem worth it, not that it was wrong per se. Acked-by: Dan Williams Acked-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/dma-debug.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 4208f94d93f7..7b3b04ba60f3 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -67,8 +67,6 @@ extern void debug_dma_sync_sg_for_device(struct device *dev, extern void debug_dma_dump_mappings(struct device *dev); -extern void debug_dma_assert_idle(struct page *page); - #else /* CONFIG_DMA_API_DEBUG */ static inline void dma_debug_add_bus(struct bus_type *bus) @@ -157,10 +155,6 @@ static inline void debug_dma_dump_mappings(struct device *dev) { } -static inline void debug_dma_assert_idle(struct page *page) -{ -} - #endif /* CONFIG_DMA_API_DEBUG */ #endif /* __DMA_DEBUG_H */ -- cgit v1.2.3 From 1378a5ee451a5e87d0d8dd6356a0b7844db231f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:23 -0700 Subject: mm: store compound_nr as well as compound_order Patch series "THP prep patches". These are some generic cleanups and improvements, which I would like merged into mmotm soon. The first one should be a performance improvement for all users of compound pages, and the others are aimed at getting code to compile away when CONFIG_TRANSPARENT_HUGEPAGE is disabled (ie small systems). Also better documented / less confusing than the current prefix mixture of compound, hpage and thp. This patch (of 7): This removes a few instructions from functions which need to know how many pages are in a compound page. The storage used is either page->mapping on 64-bit or page->index on 32-bit. Both of these are fine to overlay on tail pages. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-1-willy@infradead.org Link: http://lkml.kernel.org/r/20200629151959.15779-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ++++- include/linux/mm_types.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e7602a3bcef1..10b1e6e5f885 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -922,12 +922,15 @@ static inline int compound_pincount(struct page *page) static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; + page[1].compound_nr = 1U << order; } /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { - return 1UL << compound_order(page); + if (!PageHead(page)) + return 1; + return page[1].compound_nr; } /* Returns the number of bytes in this potentially compound page. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0277fbab7c93..496c3ff97cce 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,6 +134,7 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + unsigned int compound_nr; /* 1 << compound_order */ }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ -- cgit v1.2.3 From 419015675fef6c4b28689bd2ace559564c2e106c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:26 -0700 Subject: mm: move page-flags include to top of file Give up on the notion that we can remove page-flags.h from mm.h. There are currently 14 inline functions which use a PageFoo function. Also, two of the files directly included by mm.h include page-flags.h themselves, and there are probably more indirect inclusions. So just include it at the top like any other header file. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-3-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 10b1e6e5f885..fd0cd4e93029 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -668,11 +669,6 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; -/* - * FIXME: take this include out, include page-flags.h in - * files which need it (119 of them) - */ -#include #include /* -- cgit v1.2.3 From 6ffbb45826f5d9ae09aa60cd88594b7816c96190 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:30 -0700 Subject: mm: add thp_order This function returns the order of a transparent huge page. It compiles to 0 if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-4-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 467302056e17..9521cfdf18ca 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -258,6 +258,19 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, else return NULL; } + +/** + * thp_order - Order of a transparent huge page. + * @page: Head page of a transparent huge page. + */ +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) + return HPAGE_PMD_ORDER; + return 0; +} + static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -317,6 +330,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline unsigned int thp_order(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return 0; +} + static inline int hpage_nr_pages(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); -- cgit v1.2.3 From af3bbc12df80e8c279b94c752b6edca29841f4f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:33 -0700 Subject: mm: add thp_size This function returns the number of bytes in a THP. It is like page_size(), but compiles to just PAGE_SIZE if CONFIG_TRANSPARENT_HUGEPAGE is disabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-5-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9521cfdf18ca..9b33ac774fdd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -462,4 +462,15 @@ static inline bool thp_migration_supported(void) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +/** + * thp_size - Size of a transparent huge page. + * @page: Head page of a transparent huge page. + * + * Return: Number of bytes in this page. + */ +static inline unsigned long thp_size(struct page *page) +{ + return PAGE_SIZE << thp_order(page); +} + #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From 6c357848b44b4016ca422178aa368a7472245f6f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:37 -0700 Subject: mm: replace hpage_nr_pages with thp_nr_pages The thp prefix is more frequently used than hpage and we should be consistent between the various functions. [akpm@linux-foundation.org: fix mm/migrate.c] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: David Hildenbrand Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-6-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 13 +++++++++---- include/linux/mm_inline.h | 6 +++--- include/linux/pagemap.h | 6 +++--- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b33ac774fdd..229f986d535a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -271,9 +271,14 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) { - if (unlikely(PageTransHuge(page))) + VM_BUG_ON_PGFLAGS(PageTail(page), page); + if (PageHead(page)) return HPAGE_PMD_NR; return 1; } @@ -336,9 +341,9 @@ static inline unsigned int thp_order(struct page *page) return 0; } -static inline int hpage_nr_pages(struct page *page) +static inline int thp_nr_pages(struct page *page) { - VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PGFLAGS(PageTail(page), page); return 1; } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 219bef41d87c..8fc71e9d7bb0 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -48,14 +48,14 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); } @@ -63,7 +63,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { list_del(&page->lru); - update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page)); + update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); } /** diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d1f4eff605ad..7de11dcd534d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,7 +381,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) if (PageHuge(head)) return head; - return head + (index & (hpage_nr_pages(head) - 1)); + return head + (index & (thp_nr_pages(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); @@ -773,7 +773,7 @@ static inline struct page *readahead_page(struct readahead_control *rac) page = xa_load(&rac->mapping->i_pages, rac->_index); VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = hpage_nr_pages(page); + rac->_batch_count = thp_nr_pages(page); return page; } @@ -796,7 +796,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; - rac->_batch_count += hpage_nr_pages(page); + rac->_batch_count += thp_nr_pages(page); /* * The page cache isn't using multi-index entries yet, -- cgit v1.2.3 From 2be1d71841b7ecfb01ce4c59f6e1d082c3c18a8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:40 -0700 Subject: mm: add thp_head This is like compound_head() but compiles away when CONFIG_TRANSPARENT_HUGEPAGE is not enabled. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-7-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 229f986d535a..8a8bc46a2432 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -259,6 +259,15 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } +/** + * thp_head - Head page of a transparent huge page. + * @page: Any page (tail, head or regular) found in the page cache. + */ +static inline struct page *thp_head(struct page *page) +{ + return compound_head(page); +} + /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. @@ -335,6 +344,12 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) +static inline struct page *thp_head(struct page *page) +{ + VM_BUG_ON_PGFLAGS(PageTail(page), page); + return page; +} + static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); -- cgit v1.2.3 From ee6c400f5c05459b8c5f2884a176a1287ce2f68f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 Aug 2020 17:30:43 -0700 Subject: mm: introduce offset_in_thp Mirroring offset_in_page(), this gives you the offset within this particular page, no matter what size page it is. It optimises down to offset_in_page() if CONFIG_TRANSPARENT_HUGEPAGE is not set. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: William Kucharski Reviewed-by: Zi Yan Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20200629151959.15779-8-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fd0cd4e93029..2e7ec3ee34cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1594,6 +1594,7 @@ static inline void clear_page_pfmemalloc(struct page *page) extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) +#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in -- cgit v1.2.3 From 88db0aa2421666d2f73486d15b239a4521983d55 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 14 Aug 2020 17:31:07 -0700 Subject: all arch: remove system call sys_sysctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"), sys_sysctl is actually unavailable: any input can only return an error. We have been warning about people using the sysctl system call for years and believe there are no more users. Even if there are users of this interface if they have not complained or fixed their code by now they probably are not going to, so there is no point in warning them any longer. So completely remove sys_sysctl on all architectures. [nixiaoming@huawei.com: s390: fix build error for sys_call_table_emu] Link: http://lkml.kernel.org/r/20200618141426.16884-1-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Signed-off-by: Andrew Morton Acked-by: Will Deacon [arm/arm64] Acked-by: "Eric W. Biederman" Cc: Aleksa Sarai Cc: Alexander Shishkin Cc: Al Viro Cc: Andi Kleen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bin Meng Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: chenzefeng Cc: Christian Borntraeger Cc: Christian Brauner Cc: Chris Zankel Cc: David Howells Cc: David S. Miller Cc: Diego Elio Pettenò Cc: Dmitry Vyukov Cc: Dominik Brodowski Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Iurii Zaikin Cc: Ivan Kokshaysky Cc: James Bottomley Cc: Jens Axboe Cc: Jiri Olsa Cc: Kars de Jong Cc: Kees Cook Cc: Krzysztof Kozlowski Cc: Luis Chamberlain Cc: Marco Elver Cc: Mark Rutland Cc: Martin K. Petersen Cc: Masahiro Yamada Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Miklos Szeredi Cc: Minchan Kim Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Nick Piggin Cc: Oleg Nesterov Cc: Olof Johansson Cc: Paul Burton Cc: "Paul E. McKenney" Cc: Paul Mackerras Cc: Peter Zijlstra (Intel) Cc: Randy Dunlap Cc: Ravi Bangoria Cc: Richard Henderson Cc: Rich Felker Cc: Russell King Cc: Sami Tolvanen Cc: Sargun Dhillon Cc: Stephen Rothwell Cc: Sudeep Holla Cc: Sven Schnelle Cc: Thiago Jung Bauermann Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Vlastimil Babka Cc: Yoshinori Sato Cc: Zhou Yanjie Link: http://lkml.kernel.org/r/20200616030734.87257-1-nixiaoming@huawei.com Signed-off-by: Linus Torvalds --- include/linux/compat.h | 1 - include/linux/syscalls.h | 2 -- include/linux/sysctl.h | 6 ++---- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index c4255d8a4a8a..d38c4d7e83bd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -851,7 +851,6 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); -asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); /* obsolete: fs/readdir.c */ asmlinkage long compat_sys_old_readdir(unsigned int fd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dc2b827c81e5..75ac7f8ae93c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -47,7 +47,6 @@ struct stat64; struct statfs; struct statfs64; struct statx; -struct __sysctl_args; struct sysinfo; struct timespec; struct __kernel_old_timeval; @@ -1117,7 +1116,6 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned); asmlinkage long sys_bdflush(int func, long data); asmlinkage long sys_oldumount(char __user *name); asmlinkage long sys_uselib(const char __user *library); -asmlinkage long sys_sysctl(struct __sysctl_args __user *args); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); asmlinkage long sys_fork(void); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 50bb7f383a1b..51298a4f4623 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -74,15 +74,13 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer, * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. * - * The table's mode will be honoured both for sys_sysctl(2) and - * proc-fs access. + * The table's mode will be honoured for proc-fs access. * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table + * The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be * performed, and the mode field allows minimal authentication. * -- cgit v1.2.3 From e0e3f42fd96cf830c61aa720f0abb4eef41c5ce3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:37 -0700 Subject: mm/memcontrol: fix a data race in scan count struct mem_cgroup_per_node mz.lru_zone_size[zone_idx][lru] could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in lruvec_lru_size / mem_cgroup_update_lru_size write to 0xffff9c804ca285f8 of 8 bytes by task 50951 on cpu 12: mem_cgroup_update_lru_size+0x11c/0x1d0 mem_cgroup_update_lru_size at mm/memcontrol.c:1266 isolate_lru_pages+0x6a9/0xf30 shrink_active_list+0x123/0xcc0 shrink_lruvec+0x8fd/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_vma+0x8a/0x2c0 do_anonymous_page+0x170/0x700 __handle_mm_fault+0xc9f/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 read to 0xffff9c804ca285f8 of 8 bytes by task 50964 on cpu 95: lruvec_lru_size+0xbb/0x270 mem_cgroup_get_zone_lru_size at include/linux/memcontrol.h:536 (inlined by) lruvec_lru_size at mm/vmscan.c:326 shrink_lruvec+0x1d0/0x1380 shrink_node+0x317/0xd80 do_try_to_free_pages+0x1f7/0xa10 try_to_free_pages+0x26c/0x5e0 __alloc_pages_slowpath+0x458/0x1290 __alloc_pages_nodemask+0x3bb/0x450 alloc_pages_current+0xa6/0x120 alloc_slab_page+0x3b1/0x540 allocate_slab+0x70/0x660 new_slab+0x46/0x70 ___slab_alloc+0x4ad/0x7d0 __slab_alloc+0x43/0x70 kmem_cache_alloc+0x2c3/0x420 getname_flags+0x4c/0x230 getname+0x22/0x30 do_sys_openat2+0x205/0x3b0 do_sys_open+0x9a/0xf0 __x64_sys_openat+0x62/0x80 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported by Kernel Concurrency Sanitizer on: CPU: 95 PID: 50964 Comm: cc1 Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The write is under lru_lock, but the read is done as lockless. The scan count is used to determine how aggressively the anon and file LRU lists should be scanned. Load tearing could generate an inefficient heuristic, so fix it by adding READ_ONCE() for the read. Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200206034945.2481-1-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 385237e4cb44..d0b036123c6a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -630,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, struct mem_cgroup_per_node *mz; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_zone_size[zone_idx][lru]; + return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } void mem_cgroup_handle_over_high(void); -- cgit v1.2.3 From c403f6a3a792a6601185497c12b0bdf4be880439 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 14 Aug 2020 17:31:53 -0700 Subject: mm: annotate a data race in page_zonenum() BUG: KCSAN: data-race in page_cpupid_xchg_last / put_page write (marked) to 0xfffffc0d48ec1a00 of 8 bytes by task 91442 on cpu 3: page_cpupid_xchg_last+0x51/0x80 page_cpupid_xchg_last at mm/mmzone.c:109 (discriminator 11) wp_page_reuse+0x3e/0xc0 wp_page_reuse at mm/memory.c:2453 do_wp_page+0x472/0x7b0 do_wp_page at mm/memory.c:2798 __handle_mm_fault+0xcb0/0xd00 handle_pte_fault at mm/memory.c:4049 (inlined by) __handle_mm_fault at mm/memory.c:4163 handle_mm_fault+0xfc/0x2f0 handle_mm_fault at mm/memory.c:4200 do_page_fault+0x263/0x6f9 do_user_addr_fault at arch/x86/mm/fault.c:1465 (inlined by) do_page_fault at arch/x86/mm/fault.c:1539 page_fault+0x34/0x40 read to 0xfffffc0d48ec1a00 of 8 bytes by task 94817 on cpu 69: put_page+0x15a/0x1f0 page_zonenum at include/linux/mm.h:923 (inlined by) is_zone_device_page at include/linux/mm.h:929 (inlined by) page_is_devmap_managed at include/linux/mm.h:948 (inlined by) put_page at include/linux/mm.h:1023 wp_page_copy+0x571/0x930 wp_page_copy at mm/memory.c:2615 do_wp_page+0x107/0x7b0 __handle_mm_fault+0xcb0/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40 Reported by Kernel Concurrency Sanitizer on: CPU: 69 PID: 94817 Comm: systemd-udevd Tainted: G W O L 5.5.0-next-20200204+ #6 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 A page never changes its zone number. The zone number happens to be stored in the same word as other bits which are modified, but the zone number bits will never be modified by any other write, so it can accept a reload of the zone bits after an intervening write and it don't need to use READ_ONCE(). Thus, annotate this data race using ASSERT_EXCLUSIVE_BITS() to also assert that there are no concurrent writes to it. Suggested-by: Marco Elver Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Cc: Paul E. McKenney Cc: David Hildenbrand Cc: Jan Kara Cc: John Hubbard Cc: Ira Weiny Cc: Dan Williams Link: http://lkml.kernel.org/r/1581619089-14472-1-git-send-email-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e7ec3ee34cf..1983e08f5906 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1067,6 +1067,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); static inline enum zone_type page_zonenum(const struct page *page) { + ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } -- cgit v1.2.3 From 8f28ca6bd8211214faf717677bbffe375c2a6072 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 14 Aug 2020 17:32:07 -0700 Subject: iomap: constify ioreadX() iomem argument (as in generic implementation) Patch series "iomap: Constify ioreadX() iomem argument", v3. The ioread8/16/32() and others have inconsistent interface among the architectures: some taking address as const, some not. It seems there is nothing really stopping all of them to take pointer to const. This patch (of 4): The ioreadX() and ioreadX_rep() helpers have inconsistent interface. On some architectures void *__iomem address argument is a pointer to const, on some not. Implementations of ioreadX() do not modify the memory under the address so they can be converted to a "const" version for const-safety and consistency among architectures. [krzk@kernel.org: sh: clk: fix assignment from incompatible pointer type for ioreadX()] Link: http://lkml.kernel.org/r/20200723082017.24053-1-krzk@kernel.org [akpm@linux-foundation.org: fix drivers/mailbox/bcm-pdc-mailbox.c] Link: http://lkml.kernel.org/r/202007132209.Rxmv4QyS%25lkp@intel.com Suggested-by: Geert Uytterhoeven Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Reviewed-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Yoshinori Sato Cc: Rich Felker Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Dave Jiang Cc: Jon Mason Cc: Allen Hubbe Cc: "Michael S. Tsirkin" Cc: Jason Wang Link: http://lkml.kernel.org/r/20200709072837.5869-1-krzk@kernel.org Link: http://lkml.kernel.org/r/20200709072837.5869-2-krzk@kernel.org Signed-off-by: Linus Torvalds --- include/linux/io-64-nonatomic-hi-lo.h | 4 ++-- include/linux/io-64-nonatomic-lo-hi.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index ae21b72cce85..f32522bb3aa5 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -57,7 +57,7 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_hi_lo #define ioread64_hi_lo ioread64_hi_lo -static inline u64 ioread64_hi_lo(void __iomem *addr) +static inline u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) #ifndef ioread64be_hi_lo #define ioread64be_hi_lo ioread64be_hi_lo -static inline u64 ioread64be_hi_lo(void __iomem *addr) +static inline u64 ioread64be_hi_lo(const void __iomem *addr) { u32 low, high; diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index faaa842dbdb9..448a21435dba 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -57,7 +57,7 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #ifndef ioread64_lo_hi #define ioread64_lo_hi ioread64_lo_hi -static inline u64 ioread64_lo_hi(void __iomem *addr) +static inline u64 ioread64_lo_hi(const void __iomem *addr) { u32 low, high; @@ -79,7 +79,7 @@ static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) #ifndef ioread64be_lo_hi #define ioread64be_lo_hi ioread64be_lo_hi -static inline u64 ioread64be_lo_hi(void __iomem *addr) +static inline u64 ioread64be_lo_hi(const void __iomem *addr) { u32 low, high; -- cgit v1.2.3 From 53bf2b0e4e4c1ff0a957474237f9dcd20036ca54 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:16 +0530 Subject: firmware: ti_sci: Add support for getting resource with subtype With SYSFW ABI 3.0 changes, interrupts coming out of an interrupt controller is identified by a type and it is consistent across SoCs. Similarly global events for Interrupt aggregator. So add an API to get resource range using a resource type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-4-lokeshvutla@ti.com --- include/linux/soc/ti/ti_sci_protocol.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 49c5d29cd33c..cf27b080e148 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -220,6 +220,9 @@ struct ti_sci_rm_core_ops { u16 *range_start, u16 *range_num); }; +#define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 +#define TI_SCI_RESASG_SUBTYPE_IA_VINT 0xa +#define TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT 0xd /** * struct ti_sci_rm_irq_ops: IRQ management operations * @set_irq: Set an IRQ route between the requested source @@ -556,6 +559,9 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res); struct ti_sci_resource * devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, struct device *dev, u32 dev_id, char *of_prop); +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); #else /* CONFIG_TI_SCI_PROTOCOL */ @@ -609,6 +615,13 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, { return ERR_PTR(-EINVAL); } + +static inline struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TI_SCI_PROTOCOL */ #endif /* __TISCI_PROTOCOL_H */ -- cgit v1.2.3