From a85857633b04d57f4524cca0a2bfaf87b2543f9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 25 Apr 2018 13:26:23 -0400
Subject: nfsd4: support change_attr_type attribute

The change attribute is what is used by clients to revalidate their
caches.  Our server may use i_version or ctime for that purpose.  Those
choices behave slightly differently, and it may be useful to the client
to know which we're using.  This attribute tells the client that.  The
Linux client doesn't yet use this attribute yet, though.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..0877ed333733 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -374,6 +374,13 @@ enum lock_type4 {
 	NFS4_WRITEW_LT = 4
 };
 
+enum change_attr_type4 {
+	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+	NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+	NFS4_CHANGE_TYPE_IS_UNDEFINED = 4
+};
 
 /* Mandatory Attributes */
 #define FATTR4_WORD0_SUPPORTED_ATTRS    (1UL << 0)
@@ -441,6 +448,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE	(1UL << 15)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 #define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 
-- 
cgit v1.2.3


From f2b1d2f94af887c91bb8a0cfb495e546331bc5ed Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:13 +0200
Subject: soc: renesas: rcar-sysc: Provide helpers to power up/down CPUs

Provide helpers to control CPU power areas from platform code, taking
just a CPU index.  This will avoid having to pass full CPU power area
parameter blocks, and thus duplicating information already provided by
SoC-specific SYSC drivers.

This will be used on R-Car H1 only.
Later R-Car generations rely on APMU/RST for CPU power area control.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 include/linux/soc/renesas/rcar-sysc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 8a6086d2e9c3..9020da2111fd 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -13,5 +13,7 @@ struct rcar_sysc_ch {
 int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
 int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
 void rcar_sysc_init(phys_addr_t base, u32 syscier);
+int rcar_sysc_power_down_cpu(unsigned int cpu);
+int rcar_sysc_power_up_cpu(unsigned int cpu);
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */
-- 
cgit v1.2.3


From 7e8a50df26f4e7003d09f7e8d1e57fbbb7ebb750 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:16 +0200
Subject: soc: renesas: rcar-sysc: Drop legacy handling

Now the R-Car platform code no longer supports DTBs lacking a SYSC
device node in DT, all legacy handling can be dropped from the R-Car
SYSC driver:
  - Make rcar_sysc_ch private to the driver,
  - Make rcar_sysc_power_{down,up}() static (they have been replaced by
    rcar_sysc_power_{down,up}_cpu()),
  - Remove the legacy wrapper rcar_sysc_init(), and the check for double
    initialization (only the early_initcall is left).

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 include/linux/soc/renesas/rcar-sysc.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 9020da2111fd..00fae6fd234d 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -2,17 +2,6 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 #define __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 
-#include <linux/types.h>
-
-struct rcar_sysc_ch {
-	u16 chan_offs;
-	u8 chan_bit;
-	u8 isr_bit;
-};
-
-int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
-int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
-void rcar_sysc_init(phys_addr_t base, u32 syscier);
 int rcar_sysc_power_down_cpu(unsigned int cpu);
 int rcar_sysc_power_up_cpu(unsigned int cpu);
 
-- 
cgit v1.2.3


From aece27a2f01be4bb7683790f69cd1bed3a0929a2 Mon Sep 17 00:00:00 2001
From: Samuel Morris <samorris@lexmark.com>
Date: Tue, 29 May 2018 10:06:12 +0000
Subject: ata: ahci_platform: allow disabling of hotplug to save power

A number of resources remain powered to support hotplug. On platforms
I've worked with, allowing the ahci_platform to suspend saves about
150mW. This patch enables rpm and allows the device to be auto-suspended
through sysfs.

Signed-off-by: Samuel Morris <samorris@lexmark.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ahci_platform.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6396e6982103 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,5 +42,7 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
+int ahci_platform_runtime_suspend(struct device *dev);
+int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 77bc8c28ddac90287bc42c129002eb703288d550 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 18 Jun 2018 17:32:30 +0200
Subject: ARM: mvebu: convert secondary CPU clock sync to hotplug state

The current call site in boot_secondary is causing sleep in invalid context
warnings, as this part of the code is running with interrrupts disabled and
some of the calls into the clock framework might sleep on a mutex.

Convert the secondary CPU clock sync to a hotplug state, which allows to
call it from a sleepable context.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..fa54e5fbea38 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -143,6 +143,7 @@ enum cpuhp_state {
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
+	CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-- 
cgit v1.2.3


From dc8fbeb0ffde1f2395449006019e2c89c177df50 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:26 +0200
Subject: ARM: OMAP1: Get rid of <mach/ams-delta-fiq.h>

Split the header file into two parts and move them to directories where
they belong.

Information on internal structure of FIQ buffer is moved to
<linux/platform_data/ams-delta-fiq.h> for ams-delta-serio driver use.

Other information used by ams-delta board init file and FIQ code is
made local to mach-omap1 root directory.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/ams-delta-fiq.h | 62 +++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/linux/platform_data/ams-delta-fiq.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
new file mode 100644
index 000000000000..dc0f835ea918
--- /dev/null
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * include/linux/platform_data/ams-delta-fiq.h
+ *
+ * Taken from the original Amstrad modifications to fiq.h
+ *
+ * Copyright (c) 2004 Amstrad Plc
+ * Copyright (c) 2006 Matt Callow
+ * Copyright (c) 2010 Janusz Krzysztofik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+#define __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+
+/*
+ * These are the offsets from the beginning of the fiq_buffer. They are put here
+ * since the buffer and header need to be accessed by drivers servicing devices
+ * which generate GPIO interrupts - e.g. keyboard, modem, hook switch.
+ */
+#define FIQ_MASK		 0
+#define FIQ_STATE		 1
+#define FIQ_KEYS_CNT		 2
+#define FIQ_TAIL_OFFSET		 3
+#define FIQ_HEAD_OFFSET		 4
+#define FIQ_BUF_LEN		 5
+#define FIQ_KEY			 6
+#define FIQ_MISSED_KEYS		 7
+#define FIQ_BUFFER_START	 8
+#define FIQ_GPIO_INT_MASK	 9
+#define FIQ_KEYS_HICNT		10
+#define FIQ_IRQ_PEND		11
+#define FIQ_SIR_CODE_L1		12
+#define IRQ_SIR_CODE_L2		13
+
+#define FIQ_CNT_INT_00		14
+#define FIQ_CNT_INT_KEY		15
+#define FIQ_CNT_INT_MDM		16
+#define FIQ_CNT_INT_03		17
+#define FIQ_CNT_INT_HSW		18
+#define FIQ_CNT_INT_05		19
+#define FIQ_CNT_INT_06		20
+#define FIQ_CNT_INT_07		21
+#define FIQ_CNT_INT_08		22
+#define FIQ_CNT_INT_09		23
+#define FIQ_CNT_INT_10		24
+#define FIQ_CNT_INT_11		25
+#define FIQ_CNT_INT_12		26
+#define FIQ_CNT_INT_13		27
+#define FIQ_CNT_INT_14		28
+#define FIQ_CNT_INT_15		29
+
+#define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
+
+#ifndef __ASSEMBLER__
+extern unsigned int fiq_buffer[];
+#endif
+
+#endif
-- 
cgit v1.2.3


From 5f73861fae087df19f7337620da65c99e4260c72 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:28 +0200
Subject: Input: ams_delta_serio: Get FIQ buffer from platform_data

Instead of exporting the FIQ buffer symbol to be used in
ams-delta-serio driver, pass it to the driver as platform_data.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/ams-delta-fiq.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
index dc0f835ea918..cf4589ccb720 100644
--- a/include/linux/platform_data/ams-delta-fiq.h
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -55,8 +55,4 @@
 
 #define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
 
-#ifndef __ASSEMBLER__
-extern unsigned int fiq_buffer[];
-#endif
-
 #endif
-- 
cgit v1.2.3


From d082852f40de5cf55a7a689bf582fced39f5443e Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Fri, 22 Jun 2018 13:32:48 +0800
Subject: ARM: imx: enable bus auto clock gating function for i.mx6sll

i.MX6SLL has HW bus auto clock gating function, enable
it by default to save VDD_SOC_IN power, about 5% ~ 20%
saved depends on different use cases.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index e06f5f79eaef..6c1ad160ed87 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -457,4 +457,7 @@
 #define MCLK_DIR(x) (x == 1 ? IMX6UL_GPR1_SAI1_MCLK_DIR : x == 2 ? \
 		     IMX6UL_GPR1_SAI2_MCLK_DIR : IMX6UL_GPR1_SAI3_MCLK_DIR)
 
+/* For imx6sll iomux gpr register field define */
+#define IMX6SLL_GPR5_AFCG_X_BYPASS_MASK		(0x1f << 11)
+
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From 63453b59e41173241c4efe9335815f6432fa8586 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 10:51:53 +0200
Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant

Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer,
then adds a new i2c_smbus_xfer function that simply grabs the lock while
calling the unlocked variant.

This is not perfectly equivalent, since i2c_smbus_xfer was callable from
atomic/irq context if you happened to end up emulating SMBus with an I2C
transfer, and that is no longer the case with this patch. It is unknown
(to me) if anything depends on that quirk, but it seems fragile enough to
simply break those cases and require them to call i2c_transfer directly
instead.

While at it, for consistency rename the 2nd to last argument (size) of
the i2c_smbus_xfer declaration to protocol and remove the surplus extern
marker.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..465afb092fa7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
    and probably just as fast.
    Note that we use i2c_adapter here, because you do not need a specific
    smbus adapter to call this function. */
-extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			  unsigned short flags, char read_write, u8 command,
-			  int size, union i2c_smbus_data *data);
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write, u8 command,
+		   int protocol, union i2c_smbus_data *data);
+
+/* Unlocked flavor */
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write, u8 command,
+		     int protocol, union i2c_smbus_data *data);
 
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
-- 
cgit v1.2.3


From 7a872b6fb7fdc4213e9bb4e1c83a65e6b8af7ebd Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add rtc_only with ddr in self refresh mode
 support

Adds rtc_only support. This needs resume function to shutdown and
reboot the m3.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 include/linux/wkup_m3_ipc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d6ba7d39a62f..d639df15e8ba 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -40,6 +40,7 @@ struct wkup_m3_ipc {
 	struct mbox_chan *mbox;
 
 	struct wkup_m3_ipc_ops *ops;
+	int is_rtc_only;
 };
 
 struct wkup_m3_ipc_ops {
@@ -48,8 +49,10 @@ struct wkup_m3_ipc_ops {
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
 struct wkup_m3_ipc *wkup_m3_ipc_get(void);
 void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc);
+void wkup_m3_set_rtc_only_mode(void);
 #endif /* _LINUX_WKUP_M3_IPC_H */
-- 
cgit v1.2.3


From ec93b62fec9c7138d2b75334d192ecc12376f885 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add wkup_m3_request_wake_src

Add wkup_m3_request_wake_src to allow users to get the name of
the wakeup source after a DeepSleep or Standby transition.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 include/linux/wkup_m3_ipc.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d639df15e8ba..e497e621dbb7 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -43,12 +43,18 @@ struct wkup_m3_ipc {
 	int is_rtc_only;
 };
 
+struct wkup_m3_wakeup_src {
+	int irq_nr;
+	char src[10];
+};
+
 struct wkup_m3_ipc_ops {
 	void (*set_mem_type)(struct wkup_m3_ipc *m3_ipc, int mem_type);
 	void (*set_resume_address)(struct wkup_m3_ipc *m3_ipc, void *addr);
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	const char *(*request_wake_src)(struct wkup_m3_ipc *m3_ipc);
 	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
-- 
cgit v1.2.3


From 0f725561e168485eff7277d683405c05b192f537 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:56:59 -0700
Subject: iommu/vt-d: Add definitions for PFSID

When SRIOV VF device IOTLB is invalidated, we need to provide
the PF source ID such that IOMMU hardware can gauge the depth
of invalidation queue which is shared among VFs. This is needed
when device invalidation throttle (DIT) capability is supported.

This patch adds bit definitions for checking and tracking PFSID.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1df940196ab2..3b1c37155572 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -114,6 +114,7 @@
  * Extended Capability Register
  */
 
+#define ecap_dit(e)		((e >> 41) & 0x1)
 #define ecap_pasid(e)		((e >> 40) & 0x1)
 #define ecap_pss(e)		((e >> 35) & 0x1f)
 #define ecap_eafs(e)		((e >> 34) & 0x1)
@@ -283,6 +284,7 @@ enum {
 #define QI_DEV_IOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
 #define QI_DEV_IOTLB_QDEP(qdep)	(((qdep) & 0x1f) << 16)
 #define QI_DEV_IOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_IOTLB_SIZE	1
 #define QI_DEV_IOTLB_MAX_INVS	32
 
@@ -307,6 +309,7 @@ enum {
 #define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
 #define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 16)
 #define QI_DEV_EIOTLB_QDEP(qd)	((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
 #define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-- 
cgit v1.2.3


From 1c48db44924298ad0cb5a6386b88017539be8822 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:57:00 -0700
Subject: iommu/vt-d: Fix dev iotlb pfsid use

PFSID should be used in the invalidation descriptor for flushing
device IOTLBs on SRIOV VFs.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3b1c37155572..6692b40ca814 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -455,9 +455,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
 			     u8 fm, u64 type);
 extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type);
-extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
-			       u64 addr, unsigned mask);
-
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+			u16 qdep, u64 addr, unsigned mask);
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
-- 
cgit v1.2.3


From bad614b24293ae463e74d2465685f0e4e229baca Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 12 Jun 2018 16:41:21 -0500
Subject: iommu: Enable debugfs exposure of IOMMU driver internals

Provide base enablement for using debugfs to expose internal data of an
IOMMU driver. When called, create the /sys/kernel/debug/iommu directory.

Emit a strong warning at boot time to indicate that this feature is
enabled.

This function is called from iommu_init, and creates the initial DebugFS
directory. Drivers may then call iommu_debugfs_new_driver_dir() to
instantiate a device-specific directory to expose internal data.
It will return a pointer to the new dentry structure created in
/sys/kernel/debug/iommu, or NULL in the event of a failure.

Since the IOMMU driver can not be removed from the running system, there
is no need for an "off" function.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 19938ee6eb31..7447b0b0579a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -698,4 +698,11 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 
 #endif /* CONFIG_IOMMU_API */
 
+#ifdef CONFIG_IOMMU_DEBUGFS
+extern	struct dentry *iommu_debugfs_dir;
+void iommu_debugfs_setup(void);
+#else
+static inline void iommu_debugfs_setup(void) {}
+#endif
+
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 818b7587b4d34e989ea6c042eeb8d50ffa5be13e Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 27 Jun 2018 10:31:20 -0500
Subject: x86: irq_remapping: Move irq remapping mode enum

The enum is currently defined in Intel-specific DMAR header file,
but it is also used by APIC common code. Therefore, move it to
a more appropriate interrupt-remapping common header file.
This will also be used by subsequent patches.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dmar.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e2433bc50210..843a41ba7e28 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -265,11 +265,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
 #define PDA_LOW_BIT    26
 #define PDA_HIGH_BIT   32
 
-enum {
-	IRQ_REMAP_XAPIC_MODE,
-	IRQ_REMAP_X2APIC_MODE,
-};
-
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
-- 
cgit v1.2.3


From 3ffa6583e24e1ad1abab836d24bfc9d2308074e5 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 25 Jun 2018 09:51:48 +0200
Subject: power: remove possible deadlock when unregistering power_supply

If a device gets removed right after having registered a power_supply node,
we might enter in a deadlock between the remove call (that has a lock on
the parent device) and the deferred register work.

Allow the deferred register work to exit without taking the lock when
we are in the remove state.

Stack trace on a Ubuntu 16.04:

[16072.109121] INFO: task kworker/u16:2:1180 blocked for more than 120 seconds.
[16072.109127]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109129] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109132] kworker/u16:2   D    0  1180      2 0x80000000
[16072.109142] Workqueue: events_power_efficient power_supply_deferred_register_work
[16072.109144] Call Trace:
[16072.109152]  __schedule+0x3d6/0x8b0
[16072.109155]  schedule+0x36/0x80
[16072.109158]  schedule_preempt_disabled+0xe/0x10
[16072.109161]  __mutex_lock.isra.2+0x2ab/0x4e0
[16072.109166]  __mutex_lock_slowpath+0x13/0x20
[16072.109168]  ? __mutex_lock_slowpath+0x13/0x20
[16072.109171]  mutex_lock+0x2f/0x40
[16072.109174]  power_supply_deferred_register_work+0x2b/0x50
[16072.109179]  process_one_work+0x15b/0x410
[16072.109182]  worker_thread+0x4b/0x460
[16072.109186]  kthread+0x10c/0x140
[16072.109189]  ? process_one_work+0x410/0x410
[16072.109191]  ? kthread_create_on_node+0x70/0x70
[16072.109194]  ret_from_fork+0x35/0x40
[16072.109199] INFO: task test:2257 blocked for more than 120 seconds.
[16072.109202]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109204] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109206] test            D    0  2257   2256 0x00000004
[16072.109208] Call Trace:
[16072.109211]  __schedule+0x3d6/0x8b0
[16072.109215]  schedule+0x36/0x80
[16072.109218]  schedule_timeout+0x1f3/0x360
[16072.109221]  ? check_preempt_curr+0x5a/0xa0
[16072.109224]  ? ttwu_do_wakeup+0x1e/0x150
[16072.109227]  wait_for_completion+0xb4/0x140
[16072.109230]  ? wait_for_completion+0xb4/0x140
[16072.109233]  ? wake_up_q+0x70/0x70
[16072.109236]  flush_work+0x129/0x1e0
[16072.109240]  ? worker_detach_from_pool+0xb0/0xb0
[16072.109243]  __cancel_work_timer+0x10f/0x190
[16072.109247]  ? device_del+0x264/0x310
[16072.109250]  ? __wake_up+0x44/0x50
[16072.109253]  cancel_delayed_work_sync+0x13/0x20
[16072.109257]  power_supply_unregister+0x37/0xb0
[16072.109260]  devm_power_supply_release+0x11/0x20
[16072.109263]  release_nodes+0x110/0x200
[16072.109266]  devres_release_group+0x7c/0xb0
[16072.109274]  wacom_remove+0xc2/0x110 [wacom]
[16072.109279]  hid_device_remove+0x6e/0xd0 [hid]
[16072.109284]  device_release_driver_internal+0x158/0x210
[16072.109288]  device_release_driver+0x12/0x20
[16072.109291]  bus_remove_device+0xec/0x160
[16072.109293]  device_del+0x1de/0x310
[16072.109298]  hid_destroy_device+0x27/0x60 [hid]
[16072.109303]  usbhid_disconnect+0x51/0x70 [usbhid]
[16072.109308]  usb_unbind_interface+0x77/0x270
[16072.109311]  device_release_driver_internal+0x158/0x210
[16072.109315]  device_release_driver+0x12/0x20
[16072.109318]  usb_driver_release_interface+0x77/0x80
[16072.109321]  proc_ioctl+0x20f/0x250
[16072.109325]  usbdev_do_ioctl+0x57f/0x1140
[16072.109327]  ? __wake_up+0x44/0x50
[16072.109331]  usbdev_ioctl+0xe/0x20
[16072.109336]  do_vfs_ioctl+0xa4/0x600
[16072.109339]  ? vfs_write+0x15a/0x1b0
[16072.109343]  SyS_ioctl+0x79/0x90
[16072.109347]  entry_SYSCALL_64_fastpath+0x24/0xab
[16072.109349] RIP: 0033:0x7f20da807f47
[16072.109351] RSP: 002b:00007ffc422ae398 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[16072.109353] RAX: ffffffffffffffda RBX: 00000000010b8560 RCX: 00007f20da807f47
[16072.109355] RDX: 00007ffc422ae3a0 RSI: 00000000c0105512 RDI: 0000000000000009
[16072.109356] RBP: 0000000000000000 R08: 00007ffc422ae3e0 R09: 0000000000000010
[16072.109357] R10: 00000000000000a6 R11: 0000000000000246 R12: 0000000000000000
[16072.109359] R13: 00000000010b8560 R14: 00007ffc422ae2e0 R15: 0000000000000000

Reported-and-tested-by: Richard Hughes <rhughes@redhat.com>
Tested-by: Aaron Skomra <Aaron.Skomra@wacom.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Fixes: 7f1a57fdd6cb ("power_supply: Fix possible NULL pointer dereference on early uevent")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index b21c4bd96b84..f80769175c56 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -269,6 +269,7 @@ struct power_supply {
 	spinlock_t changed_lock;
 	bool changed;
 	bool initialized;
+	bool removing;
 	atomic_t use_cnt;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
-- 
cgit v1.2.3


From 7efe25a70c37395b41b034335345d6855926c2a6 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:44:56 +0000
Subject: iommu/shmobile: Remove unused include/linux/platform_data/sh_ipmmu.h
 header

include/linux/platform_data/sh_ipmmu.h is unused since commit

	ae50dc4874c5 ("iommu/shmobile: Remove unused Renesas IPMMU/IPMMUI")

Let's remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/platform_data/sh_ipmmu.h | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 include/linux/platform_data/sh_ipmmu.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/sh_ipmmu.h b/include/linux/platform_data/sh_ipmmu.h
deleted file mode 100644
index 39f7405cdac5..000000000000
--- a/include/linux/platform_data/sh_ipmmu.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* sh_ipmmu.h
- *
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#ifndef __SH_IPMMU_H__
-#define __SH_IPMMU_H__
-
-struct shmobile_ipmmu_platform_data {
-	const char * const *dev_names;
-	unsigned int num_dev_names;
-};
-
-#endif /* __SH_IPMMU_H__ */
-- 
cgit v1.2.3


From 5740c99e9d30b81fcc478797e7215c61e241f44e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: return int

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..b81c4b7e339f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 87eb5eb2423213ac0e7315ce5d275f1ff80e0263 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: rationalize args

Clean up f_op->dedupe_file_range() interface.

1) Use loff_t for offsets and length instead of u64
2) Order the arguments the same way as {copy|clone}_file_range().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b81c4b7e339f..a8fee2f44981 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 7f35e63dbfcb627bd30bac45702ffdf1ddde1516 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Mon, 9 Jul 2018 22:18:38 +0530
Subject: bus: ti-sysc: Add support for using ti-sysc for MCAN on dra76x

The dra76x MCAN generic interconnect module has a its own
format for the bits in the control registers.

Therefore add a new module type, new regbits and new capabilities
specific to the MCAN module.

Acked-by: Rob Herring <robh@kernel.org>
CC: Tony Lindgren <tony@atomide.com>
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/ti-sysc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 990aad477458..2efa3470a451 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -14,6 +14,7 @@ enum ti_sysc_module_type {
 	TI_SYSC_OMAP4_SR,
 	TI_SYSC_OMAP4_MCASP,
 	TI_SYSC_OMAP4_USB_HOST_FS,
+	TI_SYSC_DRA7_MCAN,
 };
 
 struct ti_sysc_cookie {
-- 
cgit v1.2.3


From 74655749a58405e259eaaba66bfc391fdbe1e34e Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Mon, 9 Jul 2018 13:03:16 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Make sleep actions configurable

Add an argument to the sleep33xx and sleep43xx code to allow us to set
flags to determine which portions of the code get called in order to use
the same code for multiple power saving modes. This patch allows us to
decide whether or not we flush and disable caches, save EMIF context,
put the memory into self refresh and disable the EMIF, and/or invoke
the wkup_m3 when entering into WFI.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/pm33xx.h | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index f9bed2a0af9d..d231265c135c 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -12,6 +12,29 @@
 #include <linux/kbuild.h>
 #include <linux/types.h>
 
+/*
+ * WFI Flags for sleep code control
+ *
+ * These flags allow PM code to exclude certain operations from happening
+ * in the low level ASM code found in sleep33xx.S and sleep43xx.S
+ *
+ * WFI_FLAG_FLUSH_CACHE: Flush the ARM caches and disable caching. Only
+ *			 needed when MPU will lose context.
+ * WFI_FLAG_SELF_REFRESH: Let EMIF place DDR memory into self-refresh and
+ *			  disable EMIF.
+ * WFI_FLAG_SAVE_EMIF: Save context of all EMIF registers and restore in
+ *		       resume path. Only needed if PER domain loses context
+ *		       and must also have WFI_FLAG_SELF_REFRESH set.
+ * WFI_FLAG_WAKE_M3: Disable MPU clock or clockdomain to cause wkup_m3 to
+ *		     execute when WFI instruction executes.
+ * WFI_FLAG_RTC_ONLY: Configure the RTC to enter RTC+DDR mode.
+ */
+#define WFI_FLAG_FLUSH_CACHE		BIT(0)
+#define WFI_FLAG_SELF_REFRESH		BIT(1)
+#define WFI_FLAG_SAVE_EMIF		BIT(2)
+#define WFI_FLAG_WAKE_M3		BIT(3)
+#define WFI_FLAG_RTC_ONLY		BIT(4)
+
 #ifndef __ASSEMBLER__
 struct am33xx_pm_sram_addr {
 	void (*do_wfi)(void);
@@ -23,7 +46,8 @@ struct am33xx_pm_sram_addr {
 
 struct am33xx_pm_platform_data {
 	int	(*init)(void);
-	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long));
+	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
+			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
 };
 
-- 
cgit v1.2.3


From 8c5a916f4c8815196cc8a86b9582ca89422aac25 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 9 Jul 2018 13:03:17 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Add RTC-Mode support

Add support for RTC mode to low level suspend code. This includes
providing the rtc base address for the assembly code to configuring the
PMIC_PWR_EN line late in suspend to enter RTC+DDR mode.

Note: This patch also fold in left out space parameter for
am33xx_emif_sram_table and am43xx_emif_sram_table

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/platform_data/pm33xx.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index d231265c135c..fbf5ed73c7cc 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -42,6 +42,7 @@ struct am33xx_pm_sram_addr {
 	unsigned long *resume_offset;
 	unsigned long *emif_sram_table;
 	unsigned long *ro_sram_data;
+	unsigned long resume_address;
 };
 
 struct am33xx_pm_platform_data {
@@ -49,6 +50,7 @@ struct am33xx_pm_platform_data {
 	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
 			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
+	void __iomem *(*get_rtc_base_addr)(void);
 };
 
 struct am33xx_pm_sram_data {
@@ -60,6 +62,7 @@ struct am33xx_pm_sram_data {
 struct am33xx_pm_ro_sram_data {
 	u32 amx3_pm_sram_data_virt;
 	u32 amx3_pm_sram_data_phys;
+	void __iomem *rtc_base_virt;
 } __packed __aligned(8);
 
 #endif /* __ASSEMBLER__ */
-- 
cgit v1.2.3


From 7f69ae7fadd9dbebe563bfc28b993a2bc9c8acf9 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 Jun 2018 11:57:48 +0200
Subject: ARM: davinci: unduplicate aemif support

All users now register platform devices using the ti-aemif driver.
Remove the handcrafted aemif API.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/platform_data/mtd-davinci-aemif.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h
index 97948ac2bb9b..a403dd51dacc 100644
--- a/include/linux/platform_data/mtd-davinci-aemif.h
+++ b/include/linux/platform_data/mtd-davinci-aemif.h
@@ -33,5 +33,4 @@ struct davinci_aemif_timing {
 	u8	ta;
 };
 
-int davinci_aemif_setup(struct platform_device *pdev);
 #endif
-- 
cgit v1.2.3


From fae68031f7fbc8b6db58d87830ba7ed1d696fbb1 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 6 Jul 2018 07:35:50 +0200
Subject: w1: core: match sub-nodes of bus masters in devicetree

Once a new slave device is detected, match it against all sub-nodes of the
master bus controller. If a match is found, set the slave device's of_node
pointer.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 include/linux/w1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/w1.h b/include/linux/w1.h
index 694101f744c7..3111585c371f 100644
--- a/include/linux/w1.h
+++ b/include/linux/w1.h
@@ -274,6 +274,8 @@ struct w1_family {
 
 	struct w1_family_ops	*fops;
 
+	const struct of_device_id *of_match_table;
+
 	atomic_t		refcnt;
 };
 
-- 
cgit v1.2.3


From 23ebda2fc715534d383d59ae6740d4e3ebd43798 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Jul 2018 17:21:05 +0200
Subject: libata: remove ata_sff_data_xfer_noirq()

ata_sff_data_xfer_noirq() is invoked via the ->sff_data_xfer hook. The
latter is invoked by ata_pio_sector(), atapi_send_cdb() and
__atapi_pio_bytes() which in turn is invoked by ata_sff_hsm_move().
The latter function requires that the "ap->lock" lock is held which
needs to be taken with disabled interrupts.

There is no need have to have ata_sff_data_xfer_noirq() which invokes
ata_sff_data_xfer32() with disabled interrupts because at this point the
interrupts are already disabled.
Remove the function and its references to it and replace all callers
with ata_sff_data_xfer32().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..aa8583655a18 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1832,8 +1832,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc,
-			unsigned char *buf, unsigned int buflen, int rw);
 extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
-- 
cgit v1.2.3


From 2abc77af89e17582db9039293c8ac881c8c96d79 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Jul 2018 11:18:42 -0400
Subject: new helper: open_with_fake_path()

open a file by given inode, faking ->f_path.  Use with shitloads
of caution - at the very least you'd damn better make sure that
some dentry alias of that inode is pinned down by the path in
question.  Again, this is no general-purpose interface and I hope
it will eventually go away.  Right now overlayfs wants something
like that, but nothing else should.

Any out-of-tree code with bright idea of using this one *will*
eventually get hurt, with zero notice and great delight on my part.
I refuse to use EXPORT_SYMBOL_GPL(), especially in situations when
it's really EXPORT_SYMBOL_DONT_USE_IT(), but don't take that export
as "you are welcome to use it".

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..4ff7b7012186 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2424,6 +2424,8 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file * open_with_fake_path(const struct path *, int,
+					 struct inode*, const struct cred *);
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
-- 
cgit v1.2.3


From 3f3a89e1d7c31558c070692241e3d6146d2cf1bf Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 07:18:03 +0200
Subject: i2c: remove i2c_lock_adapter and use i2c_lock_bus directly

The i2c_lock_adapter name is ambiguous since it is unclear if it
refers to the root adapter or the adapter you name in the argument.
The natural interpretation is the adapter you name in the argument,
but there are historical reasons for that not being the case; it
in fact locks the root adapter. Just remove the function and force
users to spell out the I2C_LOCK_ROOT_ADAPTER name to indicate what
is really going on. Also remove i2c_unlock_adapter, of course.

This patch was generated with

git grep -l 'i2c_\(un\)\?lock_adapter' \
| xargs sed -i 's/i2c_\(un\)\?lock_adapter(\([^)]*\))/'\
'i2c_\1lock_bus(\2, I2C_LOCK_ROOT_ADAPTER)/g'

followed by white-space touch-up.

Signed-off-by: Peter Rosin <peda@axentia.se>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..795e3a860afe 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -754,18 +754,6 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
 	adapter->lock_ops->unlock_bus(adapter, flags);
 }
 
-static inline void
-i2c_lock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_lock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
-static inline void
-i2c_unlock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_unlock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
-- 
cgit v1.2.3


From ffc59c496bf8498657321c59433f55bbcf2d9c38 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 10 Jul 2018 23:42:16 +0200
Subject: i2c: recovery: require either get_sda or set_sda

For bus recovery, we either need to bail out early if we can read SDA or
we need to send STOP after every pulse. Otherwise recovery might be
misinterpreted as an unwanted write. So, require one of those SDA
handling functions to avoid this problem.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 24acd69c8874..4c4360f94786 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -581,12 +581,12 @@ struct i2c_timings {
  *      recovery. Populated internally for generic GPIO recovery.
  * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery.
  *      Populated internally for generic GPIO recovery.
- * @get_sda: This gets current value of SDA line. Optional for generic SCL
- *      recovery. Populated internally, if sda_gpio is a valid GPIO, for generic
- *      GPIO recovery.
- * @set_sda: This sets/clears the SDA line. Optional for generic SCL recovery.
- *	Populated internally, if sda_gpio is a valid GPIO, for generic GPIO
- *	recovery.
+ * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory
+ *	for generic SCL recovery. Populated internally, if sda_gpio is a valid
+ *	GPIO, for generic GPIO recovery.
+ * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
+ *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
+ *	for generic GPIO recovery.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
-- 
cgit v1.2.3


From 7ca5f6be7900ca753ed01c0202dc5f998a41f4ee Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 11 Jul 2018 00:24:22 +0200
Subject: i2c: recovery: add get_bus_free callback

Some IP cores have an internal 'bus free' logic which may be more
advanced than just checking if SDA is high. Add a separate callback to
get this status. Filling it is optional.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4c4360f94786..bc8d42f8544f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -587,6 +587,8 @@ struct i2c_timings {
  * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
  *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
  *	for generic GPIO recovery.
+ * @get_bus_free: Returns the bus free state as seen from the IP core in case it
+ *	has a more complex internal logic than just reading SDA. Optional.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
@@ -601,6 +603,7 @@ struct i2c_bus_recovery_info {
 	void (*set_scl)(struct i2c_adapter *adap, int val);
 	int (*get_sda)(struct i2c_adapter *adap);
 	void (*set_sda)(struct i2c_adapter *adap, int val);
+	int (*get_bus_free)(struct i2c_adapter *adap);
 
 	void (*prepare_recovery)(struct i2c_adapter *adap);
 	void (*unprepare_recovery)(struct i2c_adapter *adap);
-- 
cgit v1.2.3


From 5b56c182edb1224bc1a97a1c74003eaa0eb59daf Mon Sep 17 00:00:00 2001
From: Wenyou Yang <wenyou.yang@atmel.com>
Date: Tue, 17 Jul 2018 11:26:55 +0300
Subject: ARM: at91: pm: Add ULP1 mode support

In the ULP1 mode, in order to achieve the lowest power consumption
with the system in retention mode and be able to resume on the wake
up events, all the clocks are shut off, inclusive the embedded 12MHz
RC oscillator, and the number of wake up sources is limited as well.
When the wake up event is asserted, the embedded 12MHz RC oscillator
restarts automatically.

The ULP1 (Ultra Low-power mode 1) is introduced by SAMA5D2.

The previous size of pm_suspend.o was 2148 bytes. With the addition of
ULP1 mode the new size of pm_suspend.o raised at 2456 bytes.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
[claudiu.beznea@microchip.com: aligned with 4.18-rc1]
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/clk/at91_pmc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 6aca5ce8a99a..4ea2cbf9b50d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -47,8 +47,10 @@
 #define	AT91_CKGR_MOR		0x20			/* Main Oscillator Register [not on SAM9RL] */
 #define		AT91_PMC_MOSCEN		(1    <<  0)		/* Main Oscillator Enable */
 #define		AT91_PMC_OSCBYPASS	(1    <<  1)		/* Oscillator Bypass */
+#define		AT91_PMC_WAITMODE	(1    <<  2)		/* Wait Mode Command */
 #define		AT91_PMC_MOSCRCEN	(1    <<  3)		/* Main On-Chip RC Oscillator Enable [some SAM9] */
 #define		AT91_PMC_OSCOUNT	(0xff <<  8)		/* Main Oscillator Start-up Time */
+#define		AT91_PMC_KEY_MASK	(0xff << 16)
 #define		AT91_PMC_KEY		(0x37 << 16)		/* MOR Writing Key */
 #define		AT91_PMC_MOSCSEL	(1    << 24)		/* Main Oscillator Selection [some SAM9] */
 #define		AT91_PMC_CFDEN		(1    << 25)		/* Clock Failure Detector Enable [some SAM9] */
-- 
cgit v1.2.3


From 3abd729aa468d7346f12d7dfc8f81aba653f6c88 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Tue, 17 Jul 2018 11:26:56 +0300
Subject: ARM: at91: pm: add PMC fast startup registers defines

Add PMC fast startup registers defines.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/clk/at91_pmc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 4ea2cbf9b50d..931ab05f771d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -157,6 +157,19 @@
 #define		AT91_PMC_GCKRDY		(1 << 24)		/* Generated Clocks */
 #define	AT91_PMC_IMR		0x6c			/* Interrupt Mask Register */
 
+#define AT91_PMC_FSMR		0x70		/* Fast Startup Mode Register */
+#define AT91_PMC_FSTT(n)	BIT(n)
+#define AT91_PMC_RTCAL		BIT(17)		/* RTC Alarm Enable */
+#define AT91_PMC_USBAL		BIT(18)		/* USB Resume Enable */
+#define AT91_PMC_SDMMC_CD	BIT(19)		/* SDMMC Card Detect Enable */
+#define AT91_PMC_LPM		BIT(20)		/* Low-power Mode */
+#define AT91_PMC_RXLP_MCE	BIT(24)		/* Backup UART Receive Enable */
+#define AT91_PMC_ACC_CE		BIT(25)		/* ACC Enable */
+
+#define AT91_PMC_FSPR		0x74		/* Fast Startup Polarity Reg */
+
+#define AT91_PMC_FS_INPUT_MASK  0x7ff
+
 #define AT91_PMC_PLLICPR	0x80			/* PLL Charge Pump Current Register */
 
 #define AT91_PMC_PROT		0xe4			/* Write Protect Mode Register [some SAM9] */
-- 
cgit v1.2.3


From d3b1084dfd629ef89bc1c4bab95e5cb87e7d08c2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: make open_with_fake_path() not contribute to nr_files

Stacking file operations in overlay will store an extra open file for each
overlay file opened.

The overhead is just that of "struct file" which is about 256bytes, because
overlay already pins an extra dentry and inode when the file is open, which
add up to a much larger overhead.

For fear of breaking working setups, don't start accounting the extra file.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ce2b413abc6..e1884840d556 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -156,6 +156,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File is capable of returning -EAGAIN if I/O will block */
 #define FMODE_NOWAIT	((__force fmode_t)0x8000000)
 
+/* File does not contribute to nr_files count */
+#define FMODE_NOACCOUNT	((__force fmode_t)0x20000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
-- 
cgit v1.2.3


From 9df6702ad0e85901450fe48a7b5f0f8975353eeb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_ioctl() to modules

This is needed by the stacked ioctl implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e1884840d556..019817a083a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1634,6 +1634,8 @@ int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
 		void *);
 
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
 /*
  * VFS file helper functions.
  */
-- 
cgit v1.2.3


From f182536684d876afaf4627c36a16c4e15ea8a2b8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_dedupe_file_range_one() to modules

This is needed by the stacked dedupe implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 019817a083a0..b67209948f1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1829,6 +1829,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
+extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+				     struct file *dst_file, loff_t dst_pos,
+				     u64 len);
+
 
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
-- 
cgit v1.2.3


From 88059de155d4db817a3a78ba899cb3b7f4de0fb0 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "ovl: fix relatime for directories"

This reverts commit cd91304e7190b4c4802f8e413ab2214b233e0260.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/dcache.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..ddae4103d324 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -564,9 +564,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
-/* d_real() flags */
-#define D_REAL_UPPER	0x2	/* return upper dentry or NULL if non-upper */
-
 /**
  * d_real - Return the real dentry
  * @dentry: the dentry to query
-- 
cgit v1.2.3


From c6718543463dbb78486ad259f884cb800df802b5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: update ovl inode before relatime check"

This reverts commit 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b67209948f1b..8f8c9ac1c9d5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2088,6 +2088,7 @@ enum file_time_flags {
 	S_VERSION = 8,
 };
 
+extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 static inline void file_accessed(struct file *file)
 {
-- 
cgit v1.2.3


From 4ab30319fd7c691a1b3165325c647a5cd6d282ac Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: add flags to d_real()"

This reverts commit 495e642939114478a5237a7d91661ba93b76f15a.

No user of "flags" argument of d_real() remain.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/dcache.h | 11 +++++------
 include/linux/fs.h     |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ddae4103d324..8fe4efa94af6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -146,7 +146,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 } ____cacheline_aligned;
 
 /*
@@ -568,8 +568,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @open_flags: open flags to control copy-up behavior
- * @flags: flags to control what is returned by this function
+ * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -578,10 +577,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  */
 static inline struct dentry *d_real(struct dentry *dentry,
 				    const struct inode *inode,
-				    unsigned int open_flags, unsigned int flags)
+				    unsigned int flags)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, open_flags, flags);
+		return dentry->d_op->d_real(dentry, inode, flags);
 	else
 		return dentry;
 }
@@ -596,7 +595,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f8c9ac1c9d5..16e2741cec3c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1249,7 +1249,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0, 0);
+	return d_real(file->f_path.dentry, file_inode(file), 0);
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From de2a4a501e716bbf5ff691ba16faf59a35320228 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Partially revert "locks: fix file locking on overlayfs"

This partially reverts commit c568d68341be7030f5647def68851e469b21ca11.

Overlayfs files will now automatically get the correct locks, no need to
hack overlay support in VFS.

It is a partial revert, because it leaves the locks_inode() calls in place
and defines locks_inode() to file_inode().  We could revert those as well,
but it would be unnecessary code churn and it makes sense to document that
we are getting the inode for locking purposes.

Don't revert MS_NOREMOTELOCK yet since that has been part of the userspace
API for some time (though not in a useful way).  Will try to remove
internal flags later when the dust around the new mount API settles.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/fs.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16e2741cec3c..1cbcf37c45e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1054,17 +1054,7 @@ struct file_lock_context {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/*
- * Return the inode to use for locking
- *
- * For overlayfs this should be the overlay inode, not the real inode returned
- * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
- * equal.
- */
-static inline struct inode *locks_inode(const struct file *f)
-{
-	return f->f_path.dentry->d_inode;
-}
+#define locks_inode(f) file_inode(f)
 
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
@@ -1305,7 +1295,6 @@ extern int send_sigurg(struct fown_struct *fown);
 
 /* These sb flags are internal to the kernel */
 #define SB_SUBMOUNT     (1<<26)
-#define SB_NOREMOTELOCK	(1<<27)
 #define SB_NOSEC	(1<<28)
 #define SB_BORN		(1<<29)
 #define SB_ACTIVE	(1<<30)
-- 
cgit v1.2.3


From 573e1784817ca1f13d76a0df636929e983e5de3c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: Revert "fsnotify: support overlayfs"

This reverts commit f3fbbb079263bd29ae592478de6808db7e708267.

Overlayfs now works correctly without adding hacks to fsnotify.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fsnotify.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bdaf22582f6e..fd1ce10553bf 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
 static inline int fsnotify_perm(struct file *file, int mask)
 {
 	const struct path *path = &file->f_path;
-	/*
-	 * Do not use file_inode() here or anywhere in this file to get the
-	 * inode.  That would break *notity on overlayfs.
-	 */
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 fsnotify_mask = 0;
 	int ret;
 
@@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 static inline void fsnotify_access(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
@@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file)
 static inline void fsnotify_modify(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
@@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file)
 static inline void fsnotify_open(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
@@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file)
 static inline void fsnotify_close(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
-- 
cgit v1.2.3


From fb16043b46831a75c9b076a7262ae035290b0409 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: vfs: remove open_flags from d_real()

Opening regular files on overlayfs is now handled via ovl_open().  Remove
the now unused "open_flags" argument from d_op->d_real() and the d_real()
helper.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/dcache.h | 11 ++++-------
 include/linux/fs.h     |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8fe4efa94af6..78cea80423a3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,8 +145,7 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -568,7 +567,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -576,11 +574,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * See also: Documentation/filesystems/vfs.txt
  */
 static inline struct dentry *d_real(struct dentry *dentry,
-				    const struct inode *inode,
-				    unsigned int flags)
+				    const struct inode *inode)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, flags);
+		return dentry->d_op->d_real(dentry, inode);
 	else
 		return dentry;
 }
@@ -595,7 +592,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cbcf37c45e1..1fa63d184d1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1239,7 +1239,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0);
+	return d_real(file->f_path.dentry, file_inode(file));
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From 51261aac51a05c791ef880a100ac2ceed201ef72 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:55 +0800
Subject: iommu/vt-d: Avoid using idr_for_each_entry()

idr_for_each_entry() is used to iteratte over idr elements
of a given type. It isn't suitable for the globle pasid idr
since the pasid idr consumer could specify different types
of pointers to bind with a pasid.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6692b40ca814..e1e193855581 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -487,6 +487,7 @@ struct intel_svm {
 	int flags;
 	int pasid;
 	struct list_head devs;
+	struct list_head list;
 };
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
-- 
cgit v1.2.3


From af39507305fb83a5d3c475c2851f4d59545d8a18 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:56 +0800
Subject: iommu/vt-d: Apply global PASID in SVA

This patch applies the global pasid name space in the shared
virtual address (SVA) implementation.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e1e193855581..2ff15195f73d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -420,7 +420,6 @@ struct intel_iommu {
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
-	struct idr pasid_idr;
 	u32 pasid_max;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
-- 
cgit v1.2.3


From 9ddbfb42138d84bb326023616c40a3dc30ea2837 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:57 +0800
Subject: iommu/vt-d: Move device_domain_info to header

This allows the per device iommu data and some helpers to be
used in other files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 61 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2ff15195f73d..2e1fbde020ca 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/iommu.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmar.h>
 
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -387,6 +388,42 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+struct dmar_domain {
+	int	nid;			/* node id */
+
+	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
+					/* Refcount of devices per iommu */
+
+
+	u16		iommu_did[DMAR_UNITS_SUPPORTED];
+					/* Domain ids per IOMMU. Use u16 since
+					 * domain ids are 16 bit wide according
+					 * to VT-d spec, section 9.3 */
+
+	bool has_iotlb_device;
+	struct list_head devices;	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+	int		flags;		/* flags to find out type of domain */
+
+	int		iommu_coherency;/* indicate coherency of iommu access */
+	int		iommu_snooping; /* indicate snooping control feature*/
+	int		iommu_count;	/* reference count of iommu */
+	int		iommu_superpage;/* Level of superpages supported:
+					   0 == 4KiB (no superpages), 1 == 2MiB,
+					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+	u64		max_addr;	/* maximum mapped address */
+
+	struct iommu_domain domain;	/* generic domain data structure for
+					   iommu core */
+};
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -435,6 +472,25 @@ struct intel_iommu {
 	u32		flags;      /* Software defined flags */
 };
 
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus number */
+	u8 devfn;		/* PCI devfn number */
+	u16 pfsid;		/* SRIOV physical function source ID */
+	u8 pasid_supported:3;
+	u8 pasid_enabled:1;
+	u8 pri_supported:1;
+	u8 pri_enabled:1;
+	u8 ats_supported:1;
+	u8 ats_enabled:1;
+	u8 ats_qdep;
+	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
+	struct intel_iommu *iommu; /* IOMMU used by this device */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
 static inline void __iommu_flush_cache(
 	struct intel_iommu *iommu, void *addr, int size)
 {
@@ -460,6 +516,11 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
+void *alloc_pgtable_page(int node);
+void free_pgtable_page(void *vaddr);
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From 85319dcc8955f8f31828dc8bafff29f6aa011d93 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:58 +0800
Subject: iommu/vt-d: Add for_each_device_domain() helper

This adds a helper named for_each_device_domain() to iterate
over the elements in device_domain_list and invoke a callback
against each element. This allows to search the device_domain
list in other source files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2e1fbde020ca..4fd4c6fee93e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -520,6 +520,8 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
 void *alloc_pgtable_page(int node);
 void free_pgtable_page(void *vaddr);
 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From cc580e41260dbf1a46269235f1f2b572137d9d03 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:59 +0800
Subject: iommu/vt-d: Per PCI device pasid table interfaces

This patch adds the interfaces for per PCI device pasid
table management. Currently we allocate one pasid table
for all PCI devices under the scope of an IOMMU. It's
insecure in some cases where multiple devices under one
single IOMMU unit support PASID features. With per PCI
device pasid table, we can achieve finer protection and
isolation granularity.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4fd4c6fee93e..e7901d402337 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -476,6 +476,7 @@ struct intel_iommu {
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
+	struct list_head table;	/* link to pasid table */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
 	u16 pfsid;		/* SRIOV physical function source ID */
@@ -489,6 +490,7 @@ struct device_domain_info {
 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
+	struct pasid_table *pasid_table; /* pasid table */
 };
 
 static inline void __iommu_flush_cache(
-- 
cgit v1.2.3


From d9737953d85131436b09668b5e8d3389c37c1f28 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:47:02 +0800
Subject: iommu/vt-d: Remove the obsolete per iommu pasid tables

The obsolete per iommu pasid tables are no longer used. Hence,
clean up them.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e7901d402337..3c43882d3b77 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -453,7 +453,6 @@ struct intel_iommu {
 	 * devices away to userspace processes (e.g. for DPDK) and don't
 	 * want to trust that userspace will use *only* the PASID it was
 	 * told to. But while it's all driver-arbitrated, we're fine. */
-	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
@@ -526,8 +525,8 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+int intel_svm_init(struct intel_iommu *iommu);
+int intel_svm_exit(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
-- 
cgit v1.2.3


From 226ab561075f6f8f3cd5f7b3b7544f3997aab51f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:49:34 -0700
Subject: device-dax: Convert to vmf_insert_mixed and vm_fault_t

Use new return type vm_fault_t for fault and huge_fault handler. For
now, this is just documenting that the function returns a VM_FAULT value
rather than an errno.  Once all instances are converted, vm_fault_t will
become a distinct type.

Commit 1c8f422059ae ("mm: change return type to vm_fault_t")

Previously vm_insert_mixed() returned an error code which driver mapped into
VM_FAULT_* type. The new function vmf_insert_mixed() will replace this
inefficiency by returning VM_FAULT_* type.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/huge_mm.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..d3bbf6bea9e9 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -3,6 +3,7 @@
 #define _LINUX_HUGE_MM_H
 
 #include <linux/sched/coredump.h>
+#include <linux/mm_types.h>
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
@@ -46,9 +47,9 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			pmd_t *pmd, pfn_t pfn, bool write);
-int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 			pud_t *pud, pfn_t pfn, bool write);
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
-- 
cgit v1.2.3


From a2dca217dae29c4ff6420e8c78d56b3f61ae0797 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:18 +0200
Subject: KVM: arm/arm64: vgic: Define GICD_IIDR fields for GICv2 and GIv3

Instead of hardcoding the shifts and masks in the GICD_IIDR register
emulation, let's add the definition of these fields to the GIC header
files and use them.

This will make things more obvious when we're going to bump the revision
in the IIDR when we'll make guest-visible changes to the implementation.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 10 ++++++++++
 include/linux/irqchip/arm-gic.h    | 10 ++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..b22f9dfa61af 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -61,6 +61,16 @@
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 /*
  * In systems with a single security state (what we emulate in KVM)
  * the meaning of the interrupt group enable bits is slightly different
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 68d8b1f73682..484f5bfa9f3d 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -71,6 +71,16 @@
 					(GICD_INT_DEF_PRI << 8) |\
 					GICD_INT_DEF_PRI)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
-- 
cgit v1.2.3


From 87322099052b6a534cecd3d303fc097d4560b7d0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:22 +0200
Subject: KVM: arm/arm64: vgic: Signal IRQs using their configured group

Now when we have a group configuration on the struct IRQ, use this state
when populating the LR and signaling interrupts as either group 0 or
group 1 to the VM.  Depending on the model of the emulated GIC, and the
guest's configuration of the VMCR, interrupts may be signaled as IRQs or
FIQs to the VM.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 484f5bfa9f3d..6c4aaf04046c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -104,6 +104,7 @@
 #define GICH_LR_PENDING_BIT		(1 << 28)
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
+#define GICH_LR_GROUP1			(1 << 30)
 #define GICH_LR_HW			(1 << 31)
 
 #define GICH_VMCR_ENABLE_GRP0_SHIFT	0
-- 
cgit v1.2.3


From 1fb53567a3633740aac8761eb7023dc5671f0edb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 5 May 2017 13:45:14 -0500
Subject: pids: Move task_pid_type into sched/signal.h

The function is general and inline so there is no need
to hide it inside of exit.c

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..d8ef0a3d2e7e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -556,6 +556,14 @@ extern bool current_is_single_threaded(void);
 typedef int (*proc_visitor)(struct task_struct *p, void *data);
 void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+{
+	if (type != PIDTYPE_PID)
+		task = task->group_leader;
+	return task->pids[type].pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
-- 
cgit v1.2.3


From 7a36094d61bfe9843de5484ff0140227983ac5d5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 12:45:33 -0500
Subject: pids: Compute task_tgid using signal->leader_pid

The cost is the the same and this removes the need
to worry about complications that come from de_thread
and group_leader changing.

__task_pid_nr_ns has been updated to take advantage of this change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched.h        | 5 -----
 include/linux/sched/signal.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..a461ff89a3af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,6 @@ static inline struct pid *task_pid(struct task_struct *task)
 	return task->pids[PIDTYPE_PID].pid;
 }
 
-static inline struct pid *task_tgid(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PID].pid;
-}
-
 /*
  * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d8ef0a3d2e7e..b95a272c1ab5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -564,6 +564,11 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 	return task->pids[type].pid;
 }
 
+static inline struct pid *task_tgid(struct task_struct *task)
+{
+	return task->signal->leader_pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
-- 
cgit v1.2.3


From 2c4704756cab7cfa031ada4dab361562f0e357c0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 13:06:43 -0500
Subject: pids: Move the pgrp and session pid pointers from task_struct to
 signal_struct

To access these fields the code always has to go to group leader so
going to signal struct is no loss and is actually a fundamental simplification.

This saves a little bit of memory by only allocating the pid pointer array
once instead of once for every thread, and even better this removes a
few potential races caused by the fact that group_leader can be changed
by de_thread, while signal_struct can not.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/init_task.h    |  9 ---------
 include/linux/pid.h          |  8 +-------
 include/linux/sched.h        | 22 ++++------------------
 include/linux/sched/signal.h | 26 +++++++++++++++++++++++---
 4 files changed, 28 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a454b8aeb938..a7083a45a26c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -46,15 +46,6 @@ extern struct cred init_cred;
 #define INIT_CPU_TIMERS(s)
 #endif
 
-#define INIT_PID_LINK(type) 					\
-{								\
-	.node = {						\
-		.next = NULL,					\
-		.pprev = NULL,					\
-	},							\
-	.pid = &init_struct_pid,				\
-}
-
 #define INIT_TASK_COMM "swapper"
 
 /* Attach to the init_task data structure for proper alignment */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7633d55d9a24..3d4c504dcc8c 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -67,12 +67,6 @@ struct pid
 
 extern struct pid init_struct_pid;
 
-struct pid_link
-{
-	struct hlist_node node;
-	struct pid *pid;
-};
-
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
@@ -177,7 +171,7 @@ pid_t pid_vnr(struct pid *pid);
 	do {								\
 		if ((pid) != NULL)					\
 			hlist_for_each_entry_rcu((task),		\
-				&(pid)->tasks[type], pids[type].node) {
+				&(pid)->tasks[type], pid_links[type]) {
 
 			/*
 			 * Both old and new leaders may be attached to
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a461ff89a3af..445bdf5b1f64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,8 @@ struct task_struct {
 	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link			pids[PIDTYPE_MAX];
+	struct pid			*thread_pid;
+	struct hlist_node		pid_links[PIDTYPE_MAX];
 	struct list_head		thread_group;
 	struct list_head		thread_node;
 
@@ -1199,22 +1200,7 @@ struct task_struct {
 
 static inline struct pid *task_pid(struct task_struct *task)
 {
-	return task->pids[PIDTYPE_PID].pid;
-}
-
-/*
- * Without tasklist or RCU lock it is not safe to dereference
- * the result of task_pgrp/task_session even if task == current,
- * we can race with another thread doing sys_setsid/sys_setpgid.
- */
-static inline struct pid *task_pgrp(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PGID].pid;
-}
-
-static inline struct pid *task_session(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_SID].pid;
+	return task->thread_pid;
 }
 
 /*
@@ -1263,7 +1249,7 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
  */
 static inline int pid_alive(const struct task_struct *p)
 {
-	return p->pids[PIDTYPE_PID].pid != NULL;
+	return p->thread_pid != NULL;
 }
 
 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b95a272c1ab5..2dcded16eb1e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -146,7 +146,9 @@ struct signal_struct {
 
 #endif
 
+	/* PID/PID hash table linkage. */
 	struct pid *leader_pid;
+	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
@@ -559,9 +561,12 @@ void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 static inline
 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	return task->pids[type].pid;
+	struct pid *pid;
+	if (type == PIDTYPE_PID)
+		pid = task_pid(task);
+	else
+		pid = task->signal->pids[type];
+	return pid;
 }
 
 static inline struct pid *task_tgid(struct task_struct *task)
@@ -569,6 +574,21 @@ static inline struct pid *task_tgid(struct task_struct *task)
 	return task->signal->leader_pid;
 }
 
+/*
+ * Without tasklist or RCU lock it is not safe to dereference
+ * the result of task_pgrp/task_session even if task == current,
+ * we can race with another thread doing sys_setsid/sys_setpgid.
+ */
+static inline struct pid *task_pgrp(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_PGID];
+}
+
+static inline struct pid *task_session(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_SID];
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
-- 
cgit v1.2.3


From 6883f81aac6f44e7df70a6af189b3689ff52cbfb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 4 Jun 2017 04:32:13 -0500
Subject: pid: Implement PIDTYPE_TGID

Everywhere except in the pid array we distinguish between a tasks pid and
a tasks tgid (thread group id).  Even in the enumeration we want that
distinction sometimes so we have added __PIDTYPE_TGID.  With leader_pid
we almost have an implementation of PIDTYPE_TGID in struct signal_struct.

Add PIDTYPE_TGID as a first class member of the pid_type enumeration and
into the pids array.  Then remove the __PIDTYPE_TGID special case and the
leader_pid in signal_struct.

The net size increase is just an extra pointer added to struct pid and
an extra pair of pointers of an hlist_node added to task_struct.

The effect on code maintenance is the removal of a number of special
cases today and the potential to remove many more special cases as
PIDTYPE_TGID gets used to it's fullest.  The long term potential
is allowing zombie thread group leaders to exit, which will remove
a lot more special cases in the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid.h          | 3 +--
 include/linux/sched.h        | 4 ++--
 include/linux/sched/signal.h | 5 ++---
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 3d4c504dcc8c..14a9a39da9c7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -7,11 +7,10 @@
 enum pid_type
 {
 	PIDTYPE_PID,
+	PIDTYPE_TGID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
 	PIDTYPE_MAX,
-	/* only valid to __task_pid_nr_ns() */
-	__PIDTYPE_TGID
 };
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 445bdf5b1f64..06b4e3bda93a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1275,12 +1275,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 
 static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
 }
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
 }
 
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2dcded16eb1e..ee30a5ba475f 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -147,7 +147,6 @@ struct signal_struct {
 #endif
 
 	/* PID/PID hash table linkage. */
-	struct pid *leader_pid;
 	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -571,7 +570,7 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 
 static inline struct pid *task_tgid(struct task_struct *task)
 {
-	return task->signal->leader_pid;
+	return task->signal->pids[PIDTYPE_TGID];
 }
 
 /*
@@ -607,7 +606,7 @@ static inline bool thread_group_leader(struct task_struct *p)
  */
 static inline bool has_group_leader_pid(struct task_struct *p)
 {
-	return task_pid(p) == p->signal->leader_pid;
+	return task_pid(p) == task_tgid(p);
 }
 
 static inline
-- 
cgit v1.2.3


From 24122c7f4969adeeaeca3fb1656a31569e9aa59b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Jul 2018 14:30:23 -0500
Subject: signal: Pass pid and pid type into send_sigqueue

Make the code more maintainable by performing more of the signal
related work in send_sigqueue.

A quick inspection of do_timer_create will show that this code path
does not lookup a thread group by a thread's pid.  Making it safe
to find the task pointed to by it_pid with "pid_task(it_pid, type)";

This supports the changes needed in fork to tell if a signal was sent
to a single process or a group of processes.

Having the pid to task transition in signal.c will also make it easier
to sort out races with de_thread and and the thread group leader
exiting when it comes time to address that.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ee30a5ba475f..94558ffa82ab 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -330,7 +330,7 @@ extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline int restart_syscall(void)
-- 
cgit v1.2.3


From 0102498083d58d8b17759642c602b525215e1a54 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 13 Jul 2018 18:40:57 -0500
Subject: signal: Pass pid type into group_send_sig_info

This passes the information we already have at the call sight
into group_send_sig_info.  Ultimatelly allowing for to better handle
signals sent to a group of processes.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..d8f2bf3d41e6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -254,11 +254,13 @@ static inline int valid_signal(unsigned long sig)
 
 struct timespec;
 struct pt_regs;
+enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
-extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+extern int group_send_sig_info(int sig, struct siginfo *info,
+			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
-- 
cgit v1.2.3


From 40b3b02535621027f56d248139e0e467573c3098 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 21 Jul 2018 10:45:15 -0500
Subject: signal: Pass pid type into do_send_sig_info

This passes the information we already have at the call sight into
do_send_sig_info.  Ultimately allowing for better handling of signals
sent to a group of processes during fork.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index d8f2bf3d41e6..fe125b0335f7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -258,7 +258,7 @@ enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
-				struct task_struct *p, bool group);
+				struct task_struct *p, enum pid_type type);
 extern int group_send_sig_info(int sig, struct siginfo *info,
 			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
-- 
cgit v1.2.3


From c2a7d2a115525d3501d38e23d24875a79a07e15e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:16 -0700
Subject: filesystem-dax: Introduce dax_lock_mapping_entry()

In preparation for implementing support for memory poison (media error)
handling via dax mappings, implement a lock_page() equivalent. Poison
error handling requires rmap and needs guarantees that the page->mapping
association is maintained / valid (inode not freed) for the duration of
the lookup.

In the device-dax case it is sufficient to simply hold a dev_pagemap
reference. In the filesystem-dax case we need to use the entry lock.

Export the entry lock via dax_lock_mapping_entry() that uses
rcu_read_lock() to protect against the inode being freed, and
revalidates the page->mapping association under xa_lock().

Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/dax.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index deb0f663252f..450b28db9533 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
+bool dax_lock_mapping_entry(struct page *page);
+void dax_unlock_mapping_entry(struct page *page);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline bool dax_lock_mapping_entry(struct page *page)
+{
+	if (IS_DAX(page->mapping->host))
+		return true;
+	return false;
+}
+
+static inline void dax_unlock_mapping_entry(struct page *page)
+{
+}
 #endif
 
 int dax_read_lock(void);
-- 
cgit v1.2.3


From 6100e34b2526e1dc3dbcc47fea2677974d6aaea5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:21 -0700
Subject: mm, memory_failure: Teach memory_failure() about dev_pagemap pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mce: Uncorrected hardware memory error in user-access at af34214200
    {1}[Hardware Error]: It has been corrected by h/w and requires no further action
    mce: [Hardware Error]: Machine check events logged
    {1}[Hardware Error]: event severity: corrected
    Memory failure: 0xaf34214: reserved kernel page still referenced by 1 users
    [..]
    Memory failure: 0xaf34214: recovery action for reserved kernel page: Failed
    mce: Memory error not recovered

In contrast to typical memory, dev_pagemap pages may be dax mapped. With
dax there is no possibility to map in another page dynamically since dax
establishes 1:1 physical address to file offset associations. Also
dev_pagemap pages associated with NVDIMM / persistent memory devices can
internal remap/repair addresses with poison. While memory_failure()
assumes that it can discard typical poisoned pages and keep them
unmapped indefinitely, dev_pagemap pages may be returned to service
after the error is cleared.

Teach memory_failure() to detect and handle MEMORY_DEVICE_HOST
dev_pagemap pages that have poison consumed by userspace. Mark the
memory as UC instead of unmapping it completely to allow ongoing access
via the device driver (nd_pmem). Later, nd_pmem will grow support for
marking the page back to WB when the error is cleared.

Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..374e5e9284f7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2725,6 +2725,7 @@ enum mf_action_page_type {
 	MF_MSG_TRUNCATED_LRU,
 	MF_MSG_BUDDY,
 	MF_MSG_BUDDY_2ND,
+	MF_MSG_DAX,
 	MF_MSG_UNKNOWN,
 };
 
-- 
cgit v1.2.3


From 10ac86884b4d7642a235f9da61367c3f8d2ab2ff Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbcon: introduce for_each_registered_fb() helper

Following pattern is often used:

 for (i = 0; i < FB_MAX; i++) {
        if (registered_fb[i]) {
                ...
        }
 }

Therefore, as Andy's suggestion, for_each_registered_fb() helper can
be introduced to make the code easier to read and write by reducing
indentation level. It also saves few lines of code in each occurrence.

This patch convert all part here at the same time.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/fb.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index aa74a228bb92..fd31e6f24b8d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -650,6 +650,10 @@ extern struct fb_info *registered_fb[FB_MAX];
 extern int num_registered_fb;
 extern struct class *fb_class;
 
+#define for_each_registered_fb(i)		\
+	for (i = 0; i < FB_MAX; i++)		\
+		if (!registered_fb[i]) {} else
+
 extern int lock_fb_info(struct fb_info *info);
 
 static inline void unlock_fb_info(struct fb_info *info)
-- 
cgit v1.2.3


From 3d910ef71732d15f251ca9b679da634adca72f5a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbdev: fix typo in comment

Change beeng to being and occured to occurred.

Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/fb.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index fd31e6f24b8d..3e7e75383d32 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -126,7 +126,7 @@ struct fb_cursor_user {
 
 /*	The resolution of the passed in fb_info about to change */ 
 #define FB_EVENT_MODE_CHANGE		0x01
-/*	The display on this fb_info is beeing suspended, no access to the
+/*	The display on this fb_info is being suspended, no access to the
  *	framebuffer is allowed any more after that call returns
  */
 #define FB_EVENT_SUSPEND		0x02
@@ -159,9 +159,9 @@ struct fb_cursor_user {
 #define FB_EVENT_FB_UNBIND              0x0E
 /*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */
 #define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
-/*      A hardware display blank early change occured */
+/*      A hardware display blank early change occurred */
 #define FB_EARLY_EVENT_BLANK		0x10
-/*      A hardware display blank revert early change occured */
+/*      A hardware display blank revert early change occurred */
 #define FB_R_EARLY_EVENT_BLANK		0x11
 
 struct fb_event {
-- 
cgit v1.2.3


From 1560d0848a1a84db6c1d9b17c14273c0dae41828 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:34:29 +0200
Subject: rtc: remove rtc_irq_register/rtc_irq_unregister

The rtc_irq_* interface is not used from outside the RTC subsytem since
2016.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6268208760e9..f868d6b619ab 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -204,10 +204,6 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_register(struct rtc_device *rtc,
-				struct rtc_task *task);
-extern void rtc_irq_unregister(struct rtc_device *rtc,
-				struct rtc_task *task);
 extern int rtc_irq_set_state(struct rtc_device *rtc,
 				struct rtc_task *task, int enabled);
 extern int rtc_irq_set_freq(struct rtc_device *rtc,
-- 
cgit v1.2.3


From acecb3ad8b21a519ce4ad728106d45d4e978bb56 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:58:10 +0200
Subject: rtc: remove irq_task and irq_task_lock

There is no way to set a periodic task anymore, remove task pointer and
lock.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index f868d6b619ab..8cc23fdfd4ee 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -121,8 +121,6 @@ struct rtc_device {
 	wait_queue_head_t irq_queue;
 	struct fasync_struct *async_queue;
 
-	struct rtc_task *irq_task;
-	spinlock_t irq_task_lock;
 	int irq_freq;
 	int max_user_freq;
 
-- 
cgit v1.2.3


From 8719d3c9188b38db462a77ecd8c7a8e25e7e8c4c Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 15:07:09 +0200
Subject: rtc: simplify rtc_irq_set_state/rtc_irq_set_freq

The PIE doesn't handle tasks anymore, remove the pointer from the
interface.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8cc23fdfd4ee..bf4d375025d1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -202,10 +202,8 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_set_state(struct rtc_device *rtc,
-				struct rtc_task *task, int enabled);
-extern int rtc_irq_set_freq(struct rtc_device *rtc,
-				struct rtc_task *task, int freq);
+extern int rtc_irq_set_state(struct rtc_device *rtc, int enabled);
+extern int rtc_irq_set_freq(struct rtc_device *rtc, int freq);
 extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
-- 
cgit v1.2.3


From 1f45a4db3657c5ecafb7d3331536c987e6d18311 Mon Sep 17 00:00:00 2001
From: Paul McKenney <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 11:21:43 -0700
Subject: srcu: Add notrace variants of srcu_read_{lock,unlock}

This is needed for a future tracepoint patch that uses srcu, and to make
sure it doesn't call into lockdep.

tracepoint code already calls notrace variants for rcu_read_lock_sched
so this patch does the same for srcu which will be used in a later
patch. Keeps it consistent with rcu-sched.

[Joel: Added commit message]
Link: http://lkml.kernel.org/r/20180628182149.226164-2-joel@joelfernandes.org

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	return retval;
 }
 
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval;
+
+	retval = __srcu_read_lock(sp);
+	return retval;
+}
+
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
  * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+	__srcu_read_unlock(sp, idx);
+}
+
 /**
  * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
  *
-- 
cgit v1.2.3


From 0b764a6e4e19dc254caca636002eaa47b1d0b0af Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 28 Jun 2018 11:21:44 -0700
Subject: srcu: Add notrace variant of srcu_dereference

In the last patch in this series, we are making lockdep register hooks
onto the irq_{disable,enable} tracepoints. These tracepoints use the
_rcuidle tracepoint variant. In this series we switch the _rcuidle
tracepoint callers to use SRCU instead of sched-RCU. Inorder to
dereference the pointer to the probe functions, we could call
srcu_dereference, however this API will call back into lockdep to check
if the lock is held *before* the lockdep probe hooks have a chance to
run and annotate the IRQ enabled/disabled state.

For this reason we need a notrace variant of srcu_dereference since
otherwise we get lockdep splats. This patch adds the needed
srcu_dereference_notrace variant.

Link: http://lkml.kernel.org/r/20180628182149.226164-3-joel@joelfernandes.org

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 3e72a291c401..67135d4a8a30 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -169,6 +169,11 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
  */
 #define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
 
+/**
+ * srcu_dereference_notrace - no tracing and no lockdep calls from here
+ */
+#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1)
+
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
  * @sp: srcu_struct in which to register the new reader.
-- 
cgit v1.2.3


From 72809cbf6748830ae4a59a45bcb2367a6c24d74d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 26 Jul 2018 21:44:32 +0900
Subject: tracing: Remove orphaned function ftrace_nr_registered_ops()

Remove ftrace_nr_registered_ops() because it is no longer used.

ftrace_nr_registered_ops() has been introduced by commit ea701f11da44
("ftrace: Add selftest to test function trace recursion protection"), but
its caller has been removed by commit 05cbbf643b8e ("tracing: Fix selftest
function recursion accounting"). So it is not called anymore.

Link: http://lkml.kernel.org/r/153260907227.12474.5234899025934963683.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ebb77674be90..63af5eb0ff46 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -234,10 +234,6 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1,
  */
 #define register_ftrace_function(ops) ({ 0; })
 #define unregister_ftrace_function(ops) ({ 0; })
-static inline int ftrace_nr_registered_ops(void)
-{
-	return 0;
-}
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
@@ -328,8 +324,6 @@ struct seq_file;
 
 extern int ftrace_text_reserved(const void *start, const void *end);
 
-extern int ftrace_nr_registered_ops(void);
-
 struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
 
 bool is_ftrace_trampoline(unsigned long addr);
-- 
cgit v1.2.3


From af9b6d7570ca9afbbc6076ab7920d8f00f7e55c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 29 Jun 2018 12:45:53 -0400
Subject: pNFS: Parse the results of layoutget on open even if permissions
 checks fail

Even if the results of the permissions checks failed, we should parse
the results of the layout on open call so that we can return the
layout if required.
Note that we also want to ignore the sequence counter for whether or not
a layout recall occurred. If the recall pertained to our OPEN, then the
callback will know, and will attempt to wait for us to finih processing
anyway.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs_sb.h | 1 -
 include/linux/nfs_xdr.h   | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 74ae3e1d19a0..2c18d618604e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -28,7 +28,6 @@ struct nfs41_impl_id;
 struct nfs_client {
 	refcount_t		cl_count;
 	atomic_t		cl_mds_count;
-	seqcount_t		cl_callback_count;
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 712eed156d09..3b7325cfb291 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -271,7 +271,6 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
-	unsigned callback_count;
 	gfp_t gfp_flags;
 };
 
-- 
cgit v1.2.3


From e15d54d5009688ccb2a5312f3b70d631615329c9 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Wed, 27 Jun 2018 14:46:21 +0800
Subject: f2fs: Allocate and stat mem used by free nid bitmap more accurately

This patch used f2fs_bitmap_size macro to calculate mem used by
free nid bitmap, and stat used mem including aligned part.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index aa5db8b5521a..f70f8ac9c4f4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED				\
-	((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) /		\
-	BITS_PER_LONG * BITS_PER_LONG)
-
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
cgit v1.2.3


From a5ec5a7bfd1f28d1905499641c9f589be36808c1 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 27 Jul 2018 13:47:02 -0700
Subject: ata: ahci: Support state with min power but Partial low power state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently when min_power policy is selected, the partial low power state
is not entered and link will try aggressively enter to only slumber state.
Add a new policy which still enable DEVSLP but also try to enter partial
low power state. This policy is presented as "min_power_with_partial".

For information the difference between partial and slumber
Partial – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ns.
Slumber – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ms.
Devslp – PHY logic is powered down. The link PM exit latency from this
state to active state maximum is 20 ms, unless otherwise specified by
DETO.

Suggested-and-reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index aa8583655a18..80c2bd202367 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -522,7 +522,8 @@ enum ata_lpm_policy {
 	ATA_LPM_MAX_POWER,
 	ATA_LPM_MED_POWER,
 	ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */
-	ATA_LPM_MIN_POWER,
+	ATA_LPM_MIN_POWER_WITH_PARTIAL, /* Min Power + partial and slumber */
+	ATA_LPM_MIN_POWER, /* Min power + no partial (slumber only) */
 };
 
 enum ata_lpm_hints {
-- 
cgit v1.2.3


From 82b98ca566ca2af170eb0ab50cef09dd7335fa55 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Thu, 5 Jul 2018 16:48:50 +0000
Subject: net/sunrpc: Make rpc_auth_create_args a const

This turns rpc_auth_create_args into a const as it gets passed through the
auth stack.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d9af474a857d..58a6765c1c5e 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -125,7 +125,8 @@ struct rpc_authops {
 	struct module		*owner;
 	rpc_authflavor_t	au_flavor;	/* flavor (RPC_AUTH_*) */
 	char *			au_name;
-	struct rpc_auth *	(*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
+	struct rpc_auth *	(*create)(const struct rpc_auth_create_args *,
+					  struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
 	int			(*hash_cred)(struct auth_cred *, unsigned int);
@@ -174,7 +175,7 @@ struct rpc_cred *	rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t);
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
-struct rpc_auth *	rpcauth_create(struct rpc_auth_create_args *,
+struct rpc_auth *	rpcauth_create(const struct rpc_auth_create_args *,
 				struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 rpc_authflavor_t	rpcauth_get_pseudoflavor(rpc_authflavor_t,
-- 
cgit v1.2.3


From e6753f23d961d601dbae50a2fc2a3975c9715b14 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:22 -0700
Subject: tracepoint: Make rcuidle tracepoint callers use SRCU

In recent tests with IRQ on/off tracepoints, a large performance
overhead ~10% is noticed when running hackbench. This is root caused to
calls to rcu_irq_enter_irqson and rcu_irq_exit_irqson from the
tracepoint code. Following a long discussion on the list [1] about this,
we concluded that srcu is a better alternative for use during rcu idle.
Although it does involve extra barriers, its lighter than the sched-rcu
version which has to do additional RCU calls to notify RCU idle about
entry into RCU sections.

In this patch, we change the underlying implementation of the
trace_*_rcuidle API to use SRCU. This has shown to improve performance
alot for the high frequency irq enable/disable tracepoints.

Test: Tested idle and preempt/irq tracepoints.

Here are some performance numbers:

With a run of the following 30 times on a single core x86 Qemu instance
with 1GB memory:
hackbench -g 4 -f 2 -l 3000

Completion times in seconds. CONFIG_PROVE_LOCKING=y.

No patches (without this series)
Mean: 3.048
Median: 3.025
Std Dev: 0.064

With Lockdep using irq tracepoints with RCU implementation:
Mean: 3.451   (-11.66 %)
Median: 3.447 (-12.22%)
Std Dev: 0.049

With Lockdep using irq tracepoints with SRCU implementation (this series):
Mean: 3.020   (I would consider the improvement against the "without
	       this series" case as just noise).
Median: 3.013
Std Dev: 0.033

[1] https://patchwork.kernel.org/patch/10344297/

[remove rcu_read_lock_sched_notrace as its the equivalent of
preempt_disable_notrace and is unnecessary to call in tracepoint code]
Link: http://lkml.kernel.org/r/20180730222423.196630-3-joel@joelfernandes.org

Cleaned-up-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ Simplified WARN_ON_ONCE() ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 19a690b559ca..d9a084c72541 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -15,6 +15,7 @@
  */
 
 #include <linux/smp.h>
+#include <linux/srcu.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
@@ -33,6 +34,8 @@ struct trace_eval_map {
 
 #define TRACEPOINT_DEFAULT_PRIO	10
 
+extern struct srcu_struct tracepoint_srcu;
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -75,10 +78,16 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
  */
+#ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
+	synchronize_srcu(&tracepoint_srcu);
 	synchronize_sched();
 }
+#else
+static inline void tracepoint_synchronize_unregister(void)
+{ }
+#endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 extern int syscall_regfunc(void);
@@ -129,18 +138,31 @@ extern void syscall_unregfunc(void);
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args, cond, rcucheck)			\
+#define __DO_TRACE(tp, proto, args, cond, rcuidle)			\
 	do {								\
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
+		int __maybe_unused idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
-		if (rcucheck)						\
-			rcu_irq_enter_irqson();				\
-		rcu_read_lock_sched_notrace();				\
-		it_func_ptr = rcu_dereference_sched((tp)->funcs);	\
+									\
+		/* srcu can't be used from NMI */			\
+		WARN_ON_ONCE(rcuidle && in_nmi());			\
+									\
+		/* keep srcu and sched-rcu usage consistent */		\
+		preempt_disable_notrace();				\
+									\
+		/*							\
+		 * For rcuidle callers, use srcu since sched-rcu	\
+		 * doesn't work from the idle path.			\
+		 */							\
+		if (rcuidle)						\
+			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+									\
+		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
+									\
 		if (it_func_ptr) {					\
 			do {						\
 				it_func = (it_func_ptr)->func;		\
@@ -148,9 +170,11 @@ extern void syscall_unregfunc(void);
 				((void(*)(proto))(it_func))(args);	\
 			} while ((++it_func_ptr)->func);		\
 		}							\
-		rcu_read_unlock_sched_notrace();			\
-		if (rcucheck)						\
-			rcu_irq_exit_irqson();				\
+									\
+		if (rcuidle)						\
+			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+									\
+		preempt_enable_notrace();				\
 	} while (0)
 
 #ifndef MODULE
-- 
cgit v1.2.3


From 56e6c104e4f151e19eb410004405ec52b4f8605a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 31 Jul 2018 13:06:57 +0200
Subject: console: Replace #if 0 with atomic var 'ignore_console_lock_warning'

The macro WARN_CONSOLE_UNLOCKED prints a warning when a thread enters
the console's critical section without having acquired the console
lock. The console lock can be ignored when debugging the console using
printk, but this makes WARN_CONSOLE_UNLOCKED generate unnecessary
warnings.

The variable ignore_console_lock_warning temporarily disables
WARN_CONSOLE_UNLOCKED. Developers interested in debugging the console's
critical sections should increment it before entering the CS and
decrement it after leaving the CS. Setting ignore_console_lock_warning
is only for debugging. Regular operation should not manipulate it.

Acknoledgements: This patch is based on an earlier version by Steven
Rostedt. The use of atomic increment/decrement was suggested by Petr
Mladek.

Link: http://lkml.kernel.org/r/717e6337-e7a6-7a92-1c1b-8929a25696b5@suse.de
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
[b.zolnierkie: use <linux/atomic.h>]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/console.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index f59f3dbca65c..ec9bdb3d7bab 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -14,6 +14,7 @@
 #ifndef _LINUX_CONSOLE_H_
 #define _LINUX_CONSOLE_H_ 1
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 
 struct vc_data;
@@ -201,11 +202,14 @@ void vcs_make_sysfs(int index);
 void vcs_remove_sysfs(int index);
 
 /* Some debug stub to catch some of the obvious races in the VT code */
-#if 1
-#define WARN_CONSOLE_UNLOCKED()	WARN_ON(!is_console_locked() && !oops_in_progress)
-#else
-#define WARN_CONSOLE_UNLOCKED()
-#endif
+#define WARN_CONSOLE_UNLOCKED()						\
+	WARN_ON(!atomic_read(&ignore_console_lock_warning) &&		\
+		!is_console_locked() && !oops_in_progress)
+/*
+ * Increment ignore_console_lock_warning if you need to quiet
+ * WARN_CONSOLE_UNLOCKED() for debugging purposes.
+ */
+extern atomic_t ignore_console_lock_warning;
 
 /* VESA Blanking Levels */
 #define VESA_NO_BLANKING        0
-- 
cgit v1.2.3


From c3bc8fd637a9623f5c507bd18f9677effbddf584 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:23 -0700
Subject: tracing: Centralize preemptirq tracepoints and unify their usage

This patch detaches the preemptirq tracepoints from the tracers and
keeps it separate.

Advantages:
* Lockdep and irqsoff event can now run in parallel since they no longer
have their own calls.

* This unifies the usecase of adding hooks to an irqsoff and irqson
event, and a preemptoff and preempton event.
  3 users of the events exist:
  - Lockdep
  - irqsoff and preemptoff tracers
  - irqs and preempt trace events

The unification cleans up several ifdefs and makes the code in preempt
tracer and irqsoff tracers simpler. It gets rid of all the horrific
ifdeferry around PROVE_LOCKING and makes configuration of the different
users of the tracepoints more easy and understandable. It also gets rid
of the time_* function calls from the lockdep hooks used to call into
the preemptirq tracer which is not needed anymore. The negative delta in
lines of code in this patch is quite large too.

In the patch we introduce a new CONFIG option PREEMPTIRQ_TRACEPOINTS
as a single point for registering probes onto the tracepoints. With
this,
the web of config options for preempt/irq toggle tracepoints and its
users becomes:

 PREEMPT_TRACER   PREEMPTIRQ_EVENTS  IRQSOFF_TRACER PROVE_LOCKING
       |                 |     \         |           |
       \    (selects)    /      \        \ (selects) /
      TRACE_PREEMPT_TOGGLE       ----> TRACE_IRQFLAGS
                      \                  /
                       \ (depends on)   /
                     PREEMPTIRQ_TRACEPOINTS

Other than the performance tests mentioned in the previous patch, I also
ran the locking API test suite. I verified that all tests cases are
passing.

I also injected issues by not registering lockdep probes onto the
tracepoints and I see failures to confirm that the probes are indeed
working.

This series + lockdep probes not registered (just to inject errors):
[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

With this series + lockdep probes registered, all locking tests pass:

[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

Link: http://lkml.kernel.org/r/20180730222423.196630-4-joel@joelfernandes.org

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h   | 11 +----------
 include/linux/irqflags.h | 11 ++++++++---
 include/linux/lockdep.h  |  8 +++++---
 include/linux/preempt.h  |  2 +-
 4 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 63af5eb0ff46..a397907e8d72 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -701,16 +701,7 @@ static inline unsigned long get_lock_parent_ip(void)
 	return CALLER_ADDR2;
 }
 
-#ifdef CONFIG_IRQSOFF_TRACER
-  extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
-  extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
-#else
-  static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
-  static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
-#endif
-
-#if defined(CONFIG_PREEMPT_TRACER) || \
-	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 9700f00bbc04..50edb9cbbd26 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -15,9 +15,16 @@
 #include <linux/typecheck.h>
 #include <asm/irqflags.h>
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+/* Currently trace_softirqs_on/off is used only by lockdep */
+#ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+#else
+# define trace_softirqs_on(ip)	do { } while (0)
+# define trace_softirqs_off(ip)	do { } while (0)
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
 # define trace_hardirq_context(p)	((p)->hardirq_context)
@@ -43,8 +50,6 @@ do {						\
 #else
 # define trace_hardirqs_on()		do { } while (0)
 # define trace_hardirqs_off()		do { } while (0)
-# define trace_softirqs_on(ip)		do { } while (0)
-# define trace_softirqs_off(ip)		do { } while (0)
 # define trace_hardirq_context(p)	0
 # define trace_softirq_context(p)	0
 # define trace_hardirqs_enabled(p)	0
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6fc77d4dbdcd..a8113357ceeb 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -266,7 +266,8 @@ struct held_lock {
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_info(void);
+extern void lockdep_init(void);
+extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -406,7 +407,8 @@ static inline void lockdep_on(void)
 # define lock_downgrade(l, i)			do { } while (0)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
-# define lockdep_info()				do { } while (0)
+# define lockdep_init()				do { } while (0)
+# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
@@ -532,7 +534,7 @@ do {								\
 
 #endif /* CONFIG_LOCKDEP */
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PROVE_LOCKING
 extern void print_irqtrace_events(struct task_struct *curr);
 #else
 static inline void print_irqtrace_events(struct task_struct *curr)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 5bd3f151da78..c01813c3fbe9 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -150,7 +150,7 @@
  */
 #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
 
-#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
 #define preempt_count_dec_and_test() \
-- 
cgit v1.2.3


From 016583d7030cec9b69e0d55269a5967f4ee871d2 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Tue, 31 Jul 2018 10:10:51 -0400
Subject: sunrpc: Change rpc_print_iostats to rpc_clnt_show_stats and handle
 rpc_clnt clones

The existing rpc_print_iostats has a few shortcomings.  First, the naming
is not consistent with other functions in the kernel that display stats.
Second, it is really displaying stats for an rpc_clnt structure as it
displays both xprt stats and per-op stats.  Third, it does not handle
rpc_clnt clones, which is important for the one in-kernel tree caller
of this function, the NFS client's nfs_show_stats function.

Fix all of the above by renaming the rpc_print_iostats to
rpc_clnt_show_stats and looping through any rpc_clnt clones via
cl_parent.

Once this interface is fixed, this addresses a problem with NFSv4.
Before this patch, the /proc/self/mountstats always showed incorrect
counts for NFSv4 lease and session related opcodes such as SEQUENCE,
RENEW, SETCLIENTID, CREATE_SESSION, etc.  These counts were always 0
even though many ops would go over the wire.  The reason for this is
there are multiple rpc_clnt structures allocated for any given NFSv4
mount, and inside nfs_show_stats() we callled into rpc_print_iostats()
which only handled one of them, nfs_server->client.  Fix these counts
by calling sunrpc's new rpc_clnt_show_stats() function, which handles
cloned rpc_clnt structs and prints the stats together.

Note that one side-effect of the above is that multiple mounts from
the same NFS server will show identical counts in the above ops due
to the fact the one rpc_clnt (representing the NFSv4 client state)
is shared across mounts.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/metrics.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 9baed7b355b2..1b3751327575 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -82,7 +82,7 @@ void			rpc_count_iostats(const struct rpc_task *,
 					  struct rpc_iostats *);
 void			rpc_count_iostats_metrics(const struct rpc_task *,
 					  struct rpc_iostats *);
-void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void			rpc_clnt_show_stats(struct seq_file *, struct rpc_clnt *);
 void			rpc_free_iostats(struct rpc_iostats *);
 
 #else  /*  CONFIG_PROC_FS  */
@@ -95,7 +95,7 @@ static inline void rpc_count_iostats_metrics(const struct rpc_task *task,
 {
 }
 
-static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
+static inline void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) {}
 static inline void rpc_free_iostats(struct rpc_iostats *stats) {}
 
 #endif  /*  CONFIG_PROC_FS  */
-- 
cgit v1.2.3


From 0f90be132cbf1537d87a6a8b9e80867adac892f6 Mon Sep 17 00:00:00 2001
From: Bill Baker <Bill.Baker@Oracle.com>
Date: Tue, 19 Jun 2018 16:24:58 -0500
Subject: NFSv4 client live hangs after live data migration recovery

After a live data migration event at the NFS server, the client may send
I/O requests to the wrong server, causing a live hang due to repeated
recovery events.  On the wire, this will appear as an I/O request failing
with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly.
NFS4ERR_BADSSESSION is returned because the session ID being used was
issued by the other server and is not valid at the old server.

The failure is caused by async worker threads having cached the transport
(xprt) in the rpc_task structure.  After the migration recovery completes,
the task is redispatched and the task resends the request to the wrong
server based on the old value still present in tk_xprt.

The solution is to recompute the tk_xprt field of the rpc_task structure
so that the request goes to the correct server.

Signed-off-by: Bill Baker <bill.baker@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Helen Chao <helen.chao@oracle.com>
Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 9b11b6a0978c..73d5c4a870fa 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -156,6 +156,7 @@ int		rpc_switch_client_transport(struct rpc_clnt *,
 
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+void		rpc_task_release_transport(struct rpc_task *);
 void		rpc_task_release_client(struct rpc_task *);
 
 int		rpcb_create_local(struct net *);
-- 
cgit v1.2.3


From 3ebea280d7e9b610fa3d31c9cfd556b1705eeedf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:08:30 -0400
Subject: ring-buffer: Make ring_buffer_record_is_on() return bool

The value of ring_buffer_record_is_on() is either true or false, so have its
return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 003d09ab308d..5176124fc4ba 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -164,7 +164,7 @@ void ring_buffer_record_disable(struct ring_buffer *buffer);
 void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
-- 
cgit v1.2.3


From d7224c0e128c7337c0b0f66ac20921fbbf4efc14 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:09:50 -0400
Subject: ring-buffer: Make ring_buffer_record_is_set_on() return bool

The value of ring_buffer_record_is_set_on() is either true or false, so have
its return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5176124fc4ba..0940fda59872 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -165,7 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 bool ring_buffer_record_is_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
-- 
cgit v1.2.3


From 5a5ba10f44fa1cd081cec38389e1b47f438fe25b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 26 Jul 2018 15:40:56 +0200
Subject: rtc: remove struct rtc_task

Include rtc_task members directly in rtc_timer member.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/rtc.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index bf4d375025d1..6aedc30003e7 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -87,16 +87,11 @@ struct rtc_class_ops {
 	int (*set_offset)(struct device *, long offset);
 };
 
-typedef struct rtc_task {
-	void (*func)(void *private_data);
-	void *private_data;
-} rtc_task_t;
-
-
 struct rtc_timer {
-	struct rtc_task	task;
 	struct timerqueue_node node;
 	ktime_t period;
+	void (*func)(void *private_data);
+	void *private_data;
 	int enabled;
 };
 
-- 
cgit v1.2.3


From 6d54228fd1f293d00576ab2c3d2e4992c7cce12f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 25 Jun 2018 17:26:55 +0200
Subject: libceph: make ceph_osdc_notify{,_ack}() payload_len u32

The wire format dictates that payload_len fits into 4 bytes.  Take u32
instead of size_t to reflect that.

All callers pass a small integer, so no changes required.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0d6ee04b4c41..d6fe7e4df8cf 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 			 u64 notify_id,
 			 u64 cookie,
 			 void *payload,
-			 size_t payload_len);
+			 u32 payload_len);
 int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     struct ceph_object_id *oid,
 		     struct ceph_object_locator *oloc,
 		     void *payload,
-		     size_t payload_len,
+		     u32 payload_len,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len);
-- 
cgit v1.2.3


From c9ed51c9123ab5e8f79b7d53a9afd786b43d4fe6 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 27 Jun 2018 16:42:51 +0200
Subject: libceph: change ceph_pagelist_encode_string() to take u32

The wire format dictates that the length of string fits into 4 bytes.
Take u32 instead of size_t to reflect that.

We were already truncating len in ceph_pagelist_encode_32() -- this
just pushes that truncation one level up.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/pagelist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 7edcded07641..d0223364349f 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
 	return ceph_pagelist_append(pl, &v, 1);
 }
 static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
-					      char *s, size_t len)
+					      char *s, u32 len)
 {
 	int ret = ceph_pagelist_encode_32(pl, len);
 	if (ret)
-- 
cgit v1.2.3


From 473bd2d780d1699d81b25f57c0ec4de633a28eb8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:34 +0200
Subject: libceph: use timespec64 in for keepalive2 and ticket validity

ceph_con_keepalive_expired() is the last user of timespec_add() and some
of the last uses of ktime_get_real_ts().  Replacing this with timespec64
based interfaces  lets us remove that deprecated API.

I'm introducing new ceph_encode_timespec64()/ceph_decode_timespec64()
here that take timespec64 structures and convert to/from ceph_timespec,
which is defined to have an unsigned 32-bit tv_sec member. This extends
the range of valid times to year 2106, avoiding the year 2038 overflow.

The ceph file system portion still uses the old functions for inode
timestamps, this will be done separately after the VFS layer is converted.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h    | 20 +++++++++++++++++++-
 include/linux/ceph/messenger.h |  2 +-
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index d143ac8879c6..094b9b4a34f3 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -194,8 +194,26 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad)
 	} while (0)
 
 /*
- * struct ceph_timespec <-> struct timespec
+ * struct ceph_timespec <-> struct timespec64
  */
+static inline void ceph_decode_timespec64(struct timespec64 *ts,
+					  const struct ceph_timespec *tv)
+{
+	/*
+	 * This will still overflow in year 2106.  We could extend
+	 * the protocol to steal two more bits from tv_nsec to
+	 * add three more 136 year epochs after that the way ext4
+	 * does if necessary.
+	 */
+	ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
+					  const struct timespec64 *ts)
+{
+	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
+}
 static inline void ceph_decode_timespec(struct timespec *ts,
 					const struct ceph_timespec *tv)
 {
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c7dfcb8a1fb2..a718b877c597 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -330,7 +330,7 @@ struct ceph_connection {
 	int in_base_pos;     /* bytes read */
 	__le64 in_temp_ack;  /* for reading an ack */
 
-	struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
+	struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
-- 
cgit v1.2.3


From fac02ddf910814c24f5d9d969dfdab5227f6f3eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:37 +0200
Subject: libceph: use timespec64 for r_mtime

The request mtime field is used all over ceph, and is currently
represented as a 'timespec' structure in Linux. This changes it to
timespec64 to allow times beyond 2038, modifying all users at the
same time.

[ Remove now redundant ts variable in writepage_nounlock(). ]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d6fe7e4df8cf..02096da01845 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -199,7 +199,7 @@ struct ceph_osd_request {
 	/* set by submitter */
 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
 	struct ceph_snap_context *r_snapc;    /* for writes */
-	struct timespec r_mtime;              /* ditto */
+	struct timespec64 r_mtime;            /* ditto */
 	u64 r_data_offset;                    /* ditto */
 	bool r_linger;                        /* don't resend on failure */
 
@@ -253,7 +253,7 @@ struct ceph_osd_linger_request {
 	struct ceph_osd_request_target t;
 	u32 map_dne_bound;
 
-	struct timespec mtime;
+	struct timespec64 mtime;
 
 	struct kref kref;
 	struct mutex lock;
@@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct ceph_snap_context *sc,
 				u64 off, u64 len,
 				u32 truncate_seq, u64 truncate_size,
-				struct timespec *mtime,
+				struct timespec64 *mtime,
 				struct page **pages, int nr_pages);
 
 /* watch/notify */
-- 
cgit v1.2.3


From f7e52d8efe8588c5d4b4c78eb33da81d89486c1a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 23 Jul 2018 14:11:40 +0200
Subject: libceph: remove now unused ceph_{en,de}code_timespec()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 094b9b4a34f3..a6c2a48d42e0 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -214,18 +214,6 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
 	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
 	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
 }
-static inline void ceph_decode_timespec(struct timespec *ts,
-					const struct ceph_timespec *tv)
-{
-	ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
-	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
-}
-static inline void ceph_encode_timespec(struct ceph_timespec *tv,
-					const struct timespec *ts)
-{
-	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
-	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
-}
 
 /*
  * sockaddr_storage <-> ceph_sockaddr
-- 
cgit v1.2.3


From 262614c4294d33b1f19e0d18c0091d9c329b544a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 26 Jul 2018 15:17:46 +0200
Subject: libceph: store ceph_auth_handshake pointer in ceph_connection

We already copy authorizer_reply_buf and authorizer_reply_buf_len into
ceph_connection.  Factoring out __prepare_write_connect() requires two
more: authorizer_buf and authorizer_buf_len.  Store the pointer to the
handshake in con->auth rather than piling on.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/messenger.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index a718b877c597..021718570b50 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -286,9 +286,8 @@ struct ceph_connection {
 				 attempt for this connection, client */
 	u32 peer_global_seq;  /* peer's global seq for this connection */
 
+	struct ceph_auth_handshake *auth;
 	int auth_retry;       /* true if we need a newer authorizer */
-	void *auth_reply_buf;   /* where to put the authorizer reply */
-	int auth_reply_buf_len;
 
 	struct mutex mutex;
 
-- 
cgit v1.2.3


From 6daca13d2e72bedaaacfc08f873114c9307d5aea Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:18:34 +0200
Subject: libceph: add authorizer challenge

When a client authenticates with a service, an authorizer is sent with
a nonce to the service (ceph_x_authorize_[ab]) and the service responds
with a mutation of that nonce (ceph_x_authorize_reply).  This lets the
client verify the service is who it says it is but it doesn't protect
against a replay: someone can trivially capture the exchange and reuse
the same authorizer to authenticate themselves.

Allow the service to reject an initial authorizer with a random
challenge (ceph_x_authorize_challenge).  The client then has to respond
with an updated authorizer proving they are able to decrypt the
service's challenge and that the new authorizer was produced for this
specific connection instance.

The accepting side requires this challenge and response unconditionally
if the client side advertises they have CEPHX_V2 feature bit.

This addresses CVE-2018-1128.

Link: http://tracker.ceph.com/issues/24836
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/auth.h      | 8 ++++++++
 include/linux/ceph/messenger.h | 3 +++
 include/linux/ceph/msgr.h      | 2 +-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index e931da8424a4..6728c2ee0205 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
 	/* ensure that an existing authorizer is up to date */
 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
+	int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
+					struct ceph_authorizer *a,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
 				       struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
 extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a,
+				       void *challenge_buf,
+				       int challenge_buf_len);
 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 					     struct ceph_authorizer *a);
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 021718570b50..fc2b4491ee0a 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
 	struct ceph_auth_handshake *(*get_authorizer) (
 				struct ceph_connection *con,
 			       int *proto, int force_new);
+	int (*add_authorizer_challenge)(struct ceph_connection *con,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply) (struct ceph_connection *con);
 	int (*invalidate_authorizer)(struct ceph_connection *con);
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 73ae2a926548..9e50aede46c8 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
 #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
 #define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
 #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
-
+#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16  /* cephx v2 doing server challenge */
 
 /*
  * connection negotiation
-- 
cgit v1.2.3


From cc255c76c70f7a87d97939621eae04b600d9f4a1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:25:32 +0200
Subject: libceph: implement CEPHX_V2 calculation mode

Derive the signature from the entire buffer (both AES cipher blocks)
instead of using just the first half of the first block, leaving out
data_crc entirely.

This addresses CVE-2018-1129.

Link: http://tracker.ceph.com/issues/24837
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 3901927cf6a0..6b92b3395fa9 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
 DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
 DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
 DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
+DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2)             // *do not share this bit*
 
-DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
 DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
 DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
 
@@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_SERVER_JEWEL |		\
 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
-	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
+	 CEPH_FEATURE_CEPHX_V2)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-- 
cgit v1.2.3


From 088fe47ce952542389e604e83f533811750aaf7c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 17:26:49 -0500
Subject: signal: Add calculate_sigpending()

Add a function calculate_sigpending to test to see if any signals are
pending for a new task immediately following fork.  Signals have to
happen either before or after fork.  Today our practice is to push
all of the signals to before the fork, but that has the downside that
frequent or periodic signals can make fork take much much longer than
normal or prevent fork from completing entirely.

So we need move signals that we can after the fork to prevent that.

This updates the code to set TIF_SIGPENDING on a new task if there
are signals or other activities that have moved so that they appear
to happen after the fork.

As the code today restarts if it sees any such activity this won't
immediately have an effect, as there will be no reason for it
to set TIF_SIGPENDING immediately after the fork.

Adding calculate_sigpending means the code in fork can safely be
changed to not always restart if a signal is pending.

The new calculate_sigpending function sets sigpending if there
are pending bits in jobctl, pending signals, the freezer needs
to freeze the new task or the live kernel patching framework
need the new thread to take the slow path to userspace.

I have verified that setting TIF_SIGPENDING does make a new process
take the slow path to userspace before it executes it's first userspace
instruction.

I have looked at the callers of signal_wake_up and the code paths
setting TIF_SIGPENDING and I don't see anything else that needs to be
handled.  The code probably doesn't need to set TIF_SIGPENDING for the
kernel live patching as it uses a separate thread flag as well.  But
at this point it seems safer reuse the recalc_sigpending logic and get
the kernel live patching folks to sort out their story later.

V2: I have moved the test into schedule_tail where siglock can
    be grabbed and recalc_sigpending can be reused directly.
    Further as the last action of setting up a new task this
    guarantees that TIF_SIGPENDING will be properly set in the
    new process.

    The helper calculate_sigpending takes the siglock and
    uncontitionally sets TIF_SIGPENDING and let's recalc_sigpending
    clear TIF_SIGPENDING if it is unnecessary.  This allows reusing
    the existing code and keeps maintenance of the conditions simple.

    Oleg Nesterov <oleg@redhat.com>  suggested the movement
    and pointed out the need to take siglock if this code
    was going to be called while the new task is discoverable.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 94558ffa82ab..b55fd293c1e5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -372,6 +372,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  */
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
+extern void calculate_sigpending(void);
 
 extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
 
-- 
cgit v1.2.3


From 4390e9eadbbb6774b7ba03fde0a0fdf3f07db4cd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Aug 2018 20:10:54 -0500
Subject: fork: Skip setting TIF_SIGPENDING in ptrace_init_task

The code in calculate_sigpending will now handle this so
it is just redundant and possibly a little confusing
to continue setting TIF_SIGPENDING in ptrace_init_task.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ptrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 037bf0ef1ae9..4f36431c380b 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -214,8 +214,6 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
 		else
 			sigaddset(&child->pending.signal, SIGSTOP);
-
-		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
 	else
 		child->ptracer_cred = NULL;
-- 
cgit v1.2.3


From 924de3b8c9410c404c6eda7abffd282b97b3ff7f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 13:38:00 -0500
Subject: fork: Have new threads join on-going signal group stops

There are only two signals that are delivered to every member of a
signal group: SIGSTOP and SIGKILL.  Signal delivery requires every
signal appear to be delivered either before or after a clone syscall.
SIGKILL terminates the clone so does not need to be considered.  Which
leaves only SIGSTOP that needs to be considered when creating new
threads.

Today in the event of a group stop TIF_SIGPENDING will get set and the
fork will restart ensuring the fork syscall participates in the group
stop.

A fork (especially of a process with a lot of memory) is one of the
most expensive system so we really only want to restart a fork when
necessary.

It is easy so check to see if a SIGSTOP is ongoing and have the new
thread join it immediate after the clone completes.  Making it appear
the clone completed happened just before the SIGSTOP.

The calculate_sigpending function will see the bits set in jobctl and
set TIF_SIGPENDING to ensure the new task takes the slow path to userspace.

V2: The call to task_join_group_stop was moved before the new task is
    added to the thread group list.  This should not matter as
    sighand->siglock is held over both the addition of the threads,
    the call to task_join_group_stop and do_signal_stop.  But the change
    is trivial and it is one less thing to worry about when reading
    the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b55fd293c1e5..ae2b0b81be25 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -385,6 +385,8 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
 	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
 }
 
+void task_join_group_stop(struct task_struct *task);
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on
-- 
cgit v1.2.3


From d9cfe2ce246845b9cca0ec1b881e826965893c58 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 23 Jul 2018 22:26:05 +0200
Subject: i2c: quirks: add zero length checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some adapters do not support a message length of 0. Add this as a quirk
so drivers don't have to open code it.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bc8d42f8544f..2a98d0886d2e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -661,6 +661,10 @@ struct i2c_adapter_quirks {
 					 I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR)
 /* clock stretching is not supported */
 #define I2C_AQ_NO_CLK_STRETCH		BIT(4)
+/* message cannot have length of 0 */
+#define I2C_AQ_NO_ZERO_LEN_READ		BIT(5)
+#define I2C_AQ_NO_ZERO_LEN_WRITE	BIT(6)
+#define I2C_AQ_NO_ZERO_LEN		(I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE)
 
 /*
  * i2c_adapter is the structure used to identify a physical i2c bus along
-- 
cgit v1.2.3


From eac7e072d7e99fad1b6e817c608b03c48205241e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Aug 2018 10:22:35 -0700
Subject: Revert "ata: ahci_platform: allow disabling of hotplug to save power"

This reverts commit aece27a2f01be4bb7683790f69cd1bed3a0929a2.

Causes boot failure on some devices.

 http://lore.kernel.org/r/CA+G9fYuKW_jCFZPqG4tz=QY9ROfHO38KiCp9XTA+KaDOFVtcqQ@mail.gmail.com

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ahci_platform.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6396e6982103..1b0a17b22cd3 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,7 +42,5 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
-int ahci_platform_runtime_suspend(struct device *dev);
-int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From d88e61faad526a5850e9330c846641b91cf971e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:36:26 +0200
Subject: iommu: Remove the ->map_sg indirection

All iommu drivers use the default_iommu_map_sg implementation, and there
is no good reason to ever override it.  Just expose it as iommu_map_sg
directly and remove the indirection, specially in our post-spectre world
where indirect calls are horribly expensive.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7447b0b0579a..87994c265bf5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -166,8 +166,6 @@ struct iommu_resv_region {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
- * @map_sg: map a scatter-gather list of physically contiguous memory chunks
- *          to an iommu domain
  * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
  * @tlb_range_add: Add a given iova range to the flush queue for this domain
  * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -201,8 +199,6 @@ struct iommu_ops {
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
-	size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova,
-			 struct scatterlist *sg, unsigned int nents, int prot);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_range_add)(struct iommu_domain *domain,
 				unsigned long iova, size_t size);
@@ -303,9 +299,8 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 			  size_t size);
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 			       unsigned long iova, size_t size);
-extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-				struct scatterlist *sg,unsigned int nents,
-				int prot);
+extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			   struct scatterlist *sg,unsigned int nents, int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -378,13 +373,6 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
 		domain->ops->iotlb_sync(domain);
 }
 
-static inline size_t iommu_map_sg(struct iommu_domain *domain,
-				  unsigned long iova, struct scatterlist *sg,
-				  unsigned int nents, int prot)
-{
-	return domain->ops->map_sg(domain, iova, sg, nents, prot);
-}
-
 /* PCI device grouping function */
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
-- 
cgit v1.2.3


From 4717be73c2843a3d6d8546872177a19358f6b7b5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 25 Jul 2018 17:39:25 +0300
Subject: i2c: core: Parse SDA hold time from firmware

There are two drivers already using the SDA hold time setting.
It might be more in the future, thus, make I2C core to parse the setting
for us if provided by firmware.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2a98d0886d2e..36f357ecdf67 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -564,6 +564,7 @@ struct i2c_lock_operations {
  * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification
  * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns
  * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification
+ * @sda_hold_ns: time IP core additionally needs to hold SDA in ns
  */
 struct i2c_timings {
 	u32 bus_freq_hz;
@@ -571,6 +572,7 @@ struct i2c_timings {
 	u32 scl_fall_ns;
 	u32 scl_int_delay_ns;
 	u32 sda_fall_ns;
+	u32 sda_hold_ns;
 };
 
 /**
-- 
cgit v1.2.3


From cb95deea0b4aa5c7c7423f4e075a3ddcd59e710b Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:29 -0400
Subject: NFS OFFLOAD_CANCEL xdr

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..c44b87293229 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -527,6 +527,7 @@ enum {
 	NFSPROC4_CLNT_LAYOUTSTATS,
 	NFSPROC4_CLNT_CLONE,
 	NFSPROC4_CLNT_COPY,
+	NFSPROC4_CLNT_OFFLOAD_CANCEL,
 
 	NFSPROC4_CLNT_LOOKUPP,
 };
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2c18d618604e..fbc735f08d7e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -255,5 +255,6 @@ struct nfs_server {
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
 #define NFS_CAP_CLONE		(1U << 23)
 #define NFS_CAP_COPY		(1U << 24)
+#define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3b7325cfb291..85e928a56cef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1403,6 +1403,18 @@ struct nfs42_copy_res {
 	struct nfs_commitres		commit_res;
 };
 
+struct nfs42_offload_status_args {
+	struct nfs4_sequence_args	osa_seq_args;
+	struct nfs_fh			*osa_src_fh;
+	nfs4_stateid			osa_stateid;
+};
+
+struct nfs42_offload_status_res {
+	struct nfs4_sequence_res	osr_seq_res;
+	uint64_t			osr_count;
+	int				osr_status;
+};
+
 struct nfs42_seek_args {
 	struct nfs4_sequence_args	seq_args;
 
-- 
cgit v1.2.3


From 67aa7444c4beb40aafedd8d2c60bbcc54987adda Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:30 -0400
Subject: NFS COPY xdr handle async reply

If server returns async reply, it must include a callback stateid,
wr_callback_id in the write_response4.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 85e928a56cef..06ddfa31cbef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1391,6 +1391,7 @@ struct nfs42_copy_args {
 };
 
 struct nfs42_write_res {
+	nfs4_stateid		stateid;
 	u64			count;
 	struct nfs_writeverf	verifier;
 };
-- 
cgit v1.2.3


From 62164f317972fcd36590578888f33a1994dda519 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:31 -0400
Subject: NFS add support for asynchronous COPY

Change xdr to always send COPY asynchronously.

Keep the list copies send in a list under a server structure.
Once copy is sent, it waits on a completion structure that will
be signalled by the callback thread that receives CB_OFFLOAD.

If CB_OFFLOAD returned an error and even if it returned partial
bytes, ignore them (as we can't commit without a verifier to
match) and return an error.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs.h    | 9 +++++++++
 include/linux/nfs_fs_sb.h | 1 +
 include/linux/nfs_xdr.h   | 1 +
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2f129bbfaae8..645ad8e342f6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,6 +185,15 @@ struct nfs_inode {
 	struct inode		vfs_inode;
 };
 
+struct nfs4_copy_state {
+	struct list_head	copies;
+	nfs4_stateid		stateid;
+	struct completion	completion;
+	uint64_t		count;
+	struct nfs_writeverf	verf;
+	int			error;
+};
+
 /*
  * Access bit flags
  */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbc735f08d7e..f88952d7b9fb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -208,6 +208,7 @@ struct nfs_server {
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
+	struct list_head	ss_copies;
 
 	unsigned long		mig_gen;
 	unsigned long		mig_status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 06ddfa31cbef..bd1c889a9ed9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1388,6 +1388,7 @@ struct nfs42_copy_args {
 	u64				dst_pos;
 
 	u64				count;
+	bool				sync;
 };
 
 struct nfs42_write_res {
-- 
cgit v1.2.3


From bc0c9079b48ddcf1f8a6e1aaa277288b263c78d8 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:32 -0400
Subject: NFS handle COPY reply CB_OFFLOAD call race

It's possible that server replies back with CB_OFFLOAD call and
COPY reply at the same time such that client will process
CB_OFFLOAD before reply to COPY. For that keep a list of pending
callback stateids received and then before waiting on completion
check the pending list.

Cleanup any pending copies on the client shutdown.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f88952d7b9fb..bf39d9c92201 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -121,6 +121,7 @@ struct nfs_client {
 #endif
 
 	struct net		*cl_net;
+	struct list_head	pending_cb_stateids;
 };
 
 /*
-- 
cgit v1.2.3


From c3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 15:20:37 -0500
Subject: signal: Don't restart fork when signals come in.

Wen Yang <wen.yang99@zte.com.cn> and majiang <ma.jiang@zte.com.cn>
report that a periodic signal received during fork can cause fork to
continually restart preventing an application from making progress.

The code was being overly pessimistic.  Fork needs to guarantee that a
signal sent to multiple processes is logically delivered before the
fork and just to the forking process or logically delivered after the
fork to both the forking process and it's newly spawned child.  For
signals like periodic timers that are always delivered to a single
process fork can safely complete and let them appear to logically
delivered after the fork().

While examining this issue I also discovered that fork today will miss
signals delivered to multiple processes during the fork and handled by
another thread.  Similarly the current code will also miss blocked
signals that are delivered to multiple process, as those signals will
not appear pending during fork.

Add a list of each thread that is currently forking, and keep on that
list a signal set that records all of the signals sent to multiple
processes.  When fork completes initialize the new processes
shared_pending signal set with it.  The calculate_sigpending function
will see those signals and set TIF_SIGPENDING causing the new task to
take the slow path to userspace to handle those signals.  Making it
appear as if those signals were received immediately after the fork.

It is not possible to send real time signals to multiple processes and
exceptions don't go to multiple processes, which means that that are
no signals sent to multiple processes that require siginfo.  This
means it is safe to not bother collecting siginfo on signals sent
during fork.

The sigaction of a child of fork is initially the same as the
sigaction of the parent process.  So a signal the parent ignores the
child will also initially ignore.  Therefore it is safe to ignore
signals sent to multiple processes and ignored by the forking process.

Signals sent to only a single process or only a single thread and delivered
during fork are treated as if they are received after the fork, and generally
not dealt with.  They won't cause any problems.

V2: Added removal from the multiprocess list on failure.
V3: Use -ERESTARTNOINTR directly
V4: - Don't queue both SIGCONT and SIGSTOP
    - Initialize signal_struct.multiprocess in init_task
    - Move setting of shared_pending to before the new task
      is visible to signals.  This prevents signals from comming
      in before shared_pending.signal is set to delayed.signal
      and being lost.
V5: - rework list add and delete to account for idle threads
v6: - Use sigdelsetmask when removing stop signals

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447
Reported-by: Wen Yang <wen.yang99@zte.com.cn> and
Reported-by: majiang <ma.jiang@zte.com.cn>
Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae2b0b81be25..4e9b77fb702d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -69,6 +69,11 @@ struct thread_group_cputimer {
 	bool checking_timer;
 };
 
+struct multiprocess_signals {
+	sigset_t signal;
+	struct hlist_node node;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
@@ -90,6 +95,9 @@ struct signal_struct {
 	/* shared signal handling: */
 	struct sigpending	shared_pending;
 
+	/* For collecting multiprocess signals during fork */
+	struct hlist_head	multiprocess;
+
 	/* thread group exit support */
 	int			group_exit_code;
 	/* overloaded:
-- 
cgit v1.2.3


From 64bed6cbe38bc95689fb9399872d9ce250192f90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 13 Jul 2018 17:22:24 +0300
Subject: nfsd: fix leaked file lock with nfs exported overlayfs

nfsd and lockd call vfs_lock_file() to lock/unlock the inode
returned by locks_inode(file).

Many places in nfsd/lockd code use the inode returned by
file_inode(file) for lock manipulation. With Overlayfs, file_inode()
(the underlying inode) is not the same object as locks_inode() (the
overlay inode). This can result in "Leaked POSIX lock" messages
and eventually to a kernel crash as reported by Eddie Horng:
https://marc.info/?l=linux-unionfs&m=153086643202072&w=2

Fix all the call sites in nfsd/lockd that should use locks_inode().
This is a correctness bug that manifested when overlayfs gained
NFS export support in v4.16.

Reported-by: Eddie Horng <eddiehorng.tw@gmail.com>
Tested-by: Eddie Horng <eddiehorng.tw@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>
Fixes: 8383f1748829 ("ovl: wire up NFS export operations")
Cc: stable@vger.kernel.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/lockd/lockd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 4fd95dbeb52f..b065ef406770 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -299,7 +299,7 @@ int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return file_inode(file->f_file);
+	return locks_inode(file->f_file);
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
@@ -359,7 +359,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 static inline int nlm_compare_locks(const struct file_lock *fl1,
 				    const struct file_lock *fl2)
 {
-	return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
+	return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file)
 	     && fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_owner == fl2->fl_owner
 	     && fl1->fl_start == fl2->fl_start
-- 
cgit v1.2.3


From 3fd9557aec919e2db99365ad5a2c00d04ae8893c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:05 -0400
Subject: NFSD: Refactor the generic write vector fill helper

fill_in_write_vector() is nearly the same logic as
svc_fill_write_vector(), but there are a few differences so that
the former can handle multiple WRITE payloads in a single COMPOUND.

svc_fill_write_vector() can be adjusted so that it can be used in
the NFSv4 WRITE code path too. Instead of assuming the pages are
coming from rq_args.pages, have the caller pass in the page list.

The immediate benefit is a reduction of code duplication. It also
prevents the NFSv4 WRITE decoder from passing an empty vector
element when the transport has provided the payload in the xdr_buf's
page array.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 574368e8a16f..43f88bd7b601 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -496,6 +496,7 @@ void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
+					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, size_t total);
-- 
cgit v1.2.3


From 11b4d66ea3313d9b03a83b80458ddee64990e3c3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:10 -0400
Subject: NFSD: Handle full-length symlinks

I've given up on the idea of zero-copy handling of SYMLINK on the
server side. This is because the Linux VFS symlink API requires the
symlink pathname to be in a NUL-terminated kmalloc'd buffer. The
NUL-termination is going to be problematic (watching out for
landing on a page boundary and dealing with a 4096-byte pathname).

I don't believe that SYMLINK creation is on a performance path or is
requested frequently enough that it will cause noticeable CPU cache
pollution due to data copies.

There will be two places where a transport callout will be necessary
to fill in the rqstp: one will be in the svc_fill_symlink_pathname()
helper that is used by NFSv2 and NFSv3, and the other will be in
nfsd4_decode_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 43f88bd7b601..73e130a840ce 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -499,7 +499,8 @@ unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
 					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
-					     struct kvec *first, size_t total);
+					     struct kvec *first, void *p,
+					     size_t total);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
-- 
cgit v1.2.3


From bff1b208a5d1dbb2355822ef859edcb9be0379e4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 6 Aug 2018 15:50:58 -0400
Subject: tracing: Partial revert of "tracing: Centralize preemptirq
 tracepoints and unify their usage"

Joel Fernandes created a nice patch that cleaned up the duplicate hooks used
by lockdep and irqsoff latency tracer. It made both use tracepoints. But it
caused lockdep to trigger several false positives. We have not figured out
why yet, but removing lockdep from using the trace event hooks and just call
its helper functions directly (like it use to), makes the problem go away.

This is a partial revert of the clean up patch c3bc8fd637a9 ("tracing:
Centralize preemptirq tracepoints and unify their usage") that adds direct
calls for lockdep, but also keeps most of the clean up done to get rid of
the horrible preprocessor if statements.

Link: http://lkml.kernel.org/r/20180806155058.5ee875f4@gandalf.local.home

Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Fixes: c3bc8fd637a9 ("tracing: Centralize preemptirq tracepoints and unify their usage")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/irqflags.h | 8 ++++++--
 include/linux/lockdep.h  | 2 --
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 50edb9cbbd26..21619c92c377 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,9 +19,13 @@
 #ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on(unsigned long ip);
+  extern void lockdep_hardirqs_off(unsigned long ip);
 #else
-# define trace_softirqs_on(ip)	do { } while (0)
-# define trace_softirqs_off(ip)	do { } while (0)
+  static inline void trace_softirqs_on(unsigned long ip) { }
+  static inline void trace_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on(unsigned long ip) { }
+  static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a8113357ceeb..b0d0b51c4d85 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -267,7 +267,6 @@ struct held_lock {
  * Initialization, self-test and debugging-output methods:
  */
 extern void lockdep_init(void);
-extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -408,7 +407,6 @@ static inline void lockdep_on(void)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
-# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
-- 
cgit v1.2.3


From e4648aa4f98a87cf0a83f73a5864cede073053a0 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 13 Aug 2018 15:33:01 -0400
Subject: NFS recover from destination server reboot for copies

Mark the destination state to indicate a server-side copy is
happening. On detecting a reboot and recovering open state check
if any state is engaged in a server-side copy, if so, find the
copy and mark it and then signal the waiting thread. Upon wakeup,
if copy was marked then propage EAGAIN to the nfsd_copy_file_range
and restart the copy from scratch.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 645ad8e342f6..a0831e9d19c9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -192,6 +192,8 @@ struct nfs4_copy_state {
 	uint64_t		count;
 	struct nfs_writeverf	verf;
 	int			error;
+	int			flags;
+	struct nfs4_state	*parent_state;
 };
 
 /*
-- 
cgit v1.2.3


From 6d43743e9079ac0531b60cde7eadd0f042873344 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Thu, 9 Aug 2018 09:48:52 +0530
Subject: Uprobe: Additional argument arch_uprobe to uprobe_write_opcode()

Add addition argument 'arch_uprobe' to uprobe_write_opcode().
We need this in later set of patches.

Link: http://lkml.kernel.org/r/20180809041856.1547-3-ravi.bangoria@linux.ibm.com

Reviewed-by: Song Liu <songliubraving@fb.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0a294e950df8..bb9d2084af03 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -121,7 +121,7 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn);
 extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From 6855dc41b24619c3d1de3dbd27dd0546b0e45272 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 6 Jun 2018 15:54:11 +0300
Subject: x86: Add entry trampolines to kcore

Without program headers for PTI entry trampoline pages, the trampoline
virtual addresses do not map to anything.

Example before:

 sudo gdb --quiet vmlinux /proc/kcore
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0 root=UUID=a6096b83-b763-4101-807e-f33daff63233'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  Cannot access memory at address 0xfffffe0000006000
 (gdb) quit

After:

 sudo gdb --quiet vmlinux /proc/kcore
 [sudo] password for ahunter:
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0-fix-4-00005-gd6e65a8b4072 root=UUID=a6096b83-b7'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  swapgs
    0xfffffe0000006003:  mov    %rsp,-0x3e12(%rip)        # 0xfffffe00000021f8
    0xfffffe000000600a:  xchg   %ax,%ax
    0xfffffe000000600c:  mov    %cr3,%rsp
    0xfffffe000000600f:  bts    $0x3f,%rsp
    0xfffffe0000006014:  and    $0xffffffffffffe7ff,%rsp
    0xfffffe000000601b:  mov    %rsp,%cr3
    0xfffffe000000601e:  mov    -0x3019(%rip),%rsp        # 0xfffffe000000300c
    0xfffffe0000006025:  pushq  $0x2b
    0xfffffe0000006027:  pushq  -0x3e35(%rip)        # 0xfffffe00000021f8
    0xfffffe000000602d:  push   %r11
    0xfffffe000000602f:  pushq  $0x33
    0xfffffe0000006031:  push   %rcx
    0xfffffe0000006032:  push   %rdi
    0xfffffe0000006033:  mov    $0xffffffff91a00010,%rdi
    0xfffffe000000603a:  callq  0xfffffe0000006046
    0xfffffe000000603f:  pause
    0xfffffe0000006041:  lfence
    0xfffffe0000006044:  jmp    0xfffffe000000603f
    0xfffffe0000006046:  mov    %rdi,(%rsp)
    0xfffffe000000604a:  retq
 (gdb) quit

In addition, entry trampolines all map to the same page.  Represent that
by giving the corresponding program headers in kcore the same offset.

This has the benefit that, when perf tools uses /proc/kcore as a source
for kernel object code, samples from different CPU trampolines are
aggregated together.  Note, such aggregation is normal for profiling
i.e. people want to profile the object code, not every different virtual
address the object code might be mapped to (across different processes
for example).

Notes by PeterZ:

This also adds the KCORE_REMAP functionality.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/1528289651-4113-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/kcore.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..bc088ef96358 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -12,11 +12,13 @@ enum kcore_type {
 	KCORE_VMEMMAP,
 	KCORE_USER,
 	KCORE_OTHER,
+	KCORE_REMAP,
 };
 
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
+	unsigned long vaddr;
 	size_t size;
 	int type;
 };
@@ -36,11 +38,22 @@ struct vmcoredd_node {
 
 #ifdef CONFIG_PROC_KCORE
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+	m->vaddr = (unsigned long)vaddr;
+	kclist_add(m, addr, sz, KCORE_REMAP);
+}
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
+
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+}
 #endif
 
 #endif /* _LINUX_KCORE_H */
-- 
cgit v1.2.3


From 284ce4011ba60d6c487b668eea729b6294930806 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:32 -0700
Subject: x86/memory_failure: Introduce {set, clear}_mce_nospec()

Currently memory_failure() returns zero if the error was handled. On
that result mce_unmap_kpfn() is called to zap the page out of the kernel
linear mapping to prevent speculative fetches of potentially poisoned
memory. However, in the case of dax mapped devmap pages the page may be
in active permanent use by the device driver, so it cannot be unmapped
from the kernel.

Instead of marking the page not present, marking the page UC should
be sufficient for preventing poison from being pre-fetched into the
cache. Convert mce_unmap_pfn() to set_mce_nospec() remapping the page as
UC, to hide it from speculative accesses.

Given that that persistent memory errors can be cleared by the driver,
include a facility to restore the page to cacheable operation,
clear_mce_nospec().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: <linux-edac@vger.kernel.org>
Cc: <x86@kernel.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/set_memory.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index da5178216da5..2a986d282a97 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,6 +17,20 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef set_mce_nospec
+static inline int set_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
+#ifndef clear_mce_nospec
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
 #ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
 static inline int set_memory_encrypted(unsigned long addr, int numpages)
 {
-- 
cgit v1.2.3


From 04f264d3a8b0eb25d378127bd78c3c9a0261c828 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 20 Aug 2018 15:36:17 -0700
Subject: compiler.h: Allow arch-specific asm/compiler.h

We have a need to override the definition of
barrier_before_unreachable() for MIPS, which means we either need to add
architecture-specific code into linux/compiler-gcc.h or we need to allow
the architecture to provide a header that can define the macro before
the generic definition. The latter seems like the better approach.

A straightforward approach to the per-arch header is to make use of
asm-generic to provide a default empty header & adjust architectures
which don't need anything specific to make use of that by adding the
header to generic-y. Unfortunately this doesn't work so well due to
commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed
struct attributes") which caused linux/compiler_types.h to be included
in the compilation of every C file via the -include linux/kconfig.h flag
in c_flags.

Because the -include flag is present for all C files we compile, we need
the architecture-provided header to be present before any C files are
compiled. If any C files can be compiled prior to the asm-generic header
wrappers being generated then we hit a build failure due to missing
header. Such cases do exist - one pointed out by the kbuild test robot
is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part
of the archprepare target [1].

This leaves us with a few options:

  1) Use generic-y & fix any build failures we find by enforcing
     ordering such that the asm-generic target occurs before any C
     compilation, such that linux/compiler_types.h can always include
     the generated asm-generic wrapper which in turn includes the empty
     asm-generic header. This would rely on us finding all the
     problematic cases - I don't know for sure that the ia64 issue is
     the only one.

  2) Add an actual empty header to each architecture, so that we don't
     need the generated asm-generic wrapper. This seems messy.

  3) Give up & add #ifdef CONFIG_MIPS or similar to
     linux/compiler_types.h. This seems messy too.

  4) Include the arch header only when it's actually needed, removing
     the need for the asm-generic wrapper for all other architectures.

This patch allows us to use approach 4, by including an asm/compiler.h
header from linux/compiler_types.h after the inclusion of the
compiler-specific linux/compiler-*.h header(s). We do this
conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in
order to avoid the need for asm-generic wrappers & the associated build
ordering issue described above. The asm/compiler.h header is included
after the generic linux/compiler-*.h header(s) for consistency with the
way linux/compiler-intel.h & linux/compiler-clang.h are included after
the linux/compiler-gcc.h header that they override.

[1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Patchwork: https://patchwork.linux-mips.org/patch/20269/
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Hogan <jhogan@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: linux-mips@linux-mips.org
---
 include/linux/compiler_types.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9bba9a7..4be464a07612 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <linux/compiler-clang.h>
 #endif
 
+/*
+ * Some architectures need to provide custom definitions of macros provided
+ * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that
+ * conditionally rather than using an asm-generic wrapper in order to avoid
+ * build failures if any C compilation, which will include this file via an
+ * -include argument in c_flags, occurs prior to the asm-generic wrappers being
+ * generated.
+ */
+#ifdef CONFIG_HAVE_ARCH_COMPILER_H
+#include <asm/compiler.h>
+#endif
+
 /*
  * Generic compiler-dependent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
-- 
cgit v1.2.3


From 5ade60dda43c8906d4554374226c2eb11cc2ffba Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 20 Mar 2018 17:07:11 -0400
Subject: ida: Add new API

Add ida_alloc(), ida_alloc_min(), ida_alloc_max(), ida_alloc_range()
and ida_free().  The ida_alloc_max() and ida_alloc_range() functions
differ from ida_simple_get() in that they take an inclusive 'max'
parameter instead of an exclusive 'end' parameter.  Callers are about
evenly split whether they'd like inclusive or exclusive parameters and
'max' is easier to document than 'end'.

Change the IDA allocation to first attempt to allocate a bit using
existing memory, and only allocate memory afterwards.  Also change the
behaviour of 'min' > INT_MAX from being a BUG() to returning -ENOSPC.

Leave compatibility wrappers in place for ida_simple_get() and
ida_simple_remove() to avoid changing all callers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..cd339da0b1aa 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -230,15 +230,68 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask);
-void ida_simple_remove(struct ida *ida, unsigned int id);
+int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
+void ida_free(struct ida *, unsigned int id);
+
+/**
+ * ida_alloc() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc(struct ida *ida, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, ~0, gfp);
+}
+
+/**
+ * ida_alloc_min() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
+{
+	return ida_alloc_range(ida, min, ~0, gfp);
+}
+
+/**
+ * ida_alloc_max() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and @max, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, max, gfp);
+}
 
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
 }
 
+#define ida_simple_get(ida, start, end, gfp)	\
+			ida_alloc_range(ida, start, (end) - 1, gfp)
+#define ida_simple_remove(ida, id)	ida_free(ida, id)
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
-- 
cgit v1.2.3


From b03f8e43c9261878bf29d8cc1c3ba458cc98287e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 18 Jun 2018 19:02:48 -0400
Subject: ida: Remove old API

Delete ida_pre_get(), ida_get_new(), ida_get_new_above() and ida_remove()
from the public API.  Some of these functions still exist as internal
helpers, but they should not be called by consumers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index cd339da0b1aa..2e1db0e36486 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -225,13 +225,9 @@ struct ida {
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
-void ida_remove(struct ida *ida, int id);
-void ida_destroy(struct ida *ida);
-
 int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
 void ida_free(struct ida *, unsigned int id);
+void ida_destroy(struct ida *ida);
 
 /**
  * ida_alloc() - Allocate an unused ID.
@@ -292,20 +288,11 @@ static inline void ida_init(struct ida *ida)
 			ida_alloc_range(ida, start, (end) - 1, gfp)
 #define ida_simple_remove(ida, id)	ida_free(ida, id)
 
-/**
- * ida_get_new - allocate new ID
- * @ida:	idr handle
- * @p_id:	pointer to the allocated handle
- *
- * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
- */
-static inline int ida_get_new(struct ida *ida, int *p_id)
-{
-	return ida_get_new_above(ida, 0, p_id);
-}
-
 static inline bool ida_is_empty(const struct ida *ida)
 {
 	return radix_tree_empty(&ida->ida_rt);
 }
+
+/* in lib/radix-tree.c */
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 #endif /* __IDR_H__ */
-- 
cgit v1.2.3


From fd991a23c8f6ef30692f77409602ccf3614353b2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 21 Aug 2018 22:33:00 +0200
Subject: y2038: Provide aliases for compat helpers

As part of the system call rework for 64-bit time_t, we are restructuring
the way that compat syscalls deal with 32-bit time_t, reusing the
implementation for 32-bit architectures. Christoph Hellwig suggested a
rename of the associated types and interfaces to avoid the confusing usage
of the 'compat' prefix for 32-bit architectures.

To prepare for doing that in linux-4.20, add a set of macros that allows to
convert subsystems separately to the new names and avoids some of the
nastier merge conflicts.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: y2038@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Link: https://lkml.kernel.org/r/20180821203329.2089473-1-arnd@arndb.de
---
 include/linux/time32.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time32.h b/include/linux/time32.h
index 0b14f936100a..d1ae43c13e25 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -207,4 +207,19 @@ static inline s64 timeval_to_ns(const struct timeval *tv)
 extern struct timeval ns_to_timeval(const s64 nsec);
 extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
+/*
+ * New aliases for compat time functions. These will be used to replace
+ * the compat code so it can be shared between 32-bit and 64-bit builds
+ * both of which provide compatibility with old 32-bit tasks.
+ */
+#define old_time32_t		compat_time_t
+#define old_timeval32		compat_timeval
+#define old_timespec32		compat_timespec
+#define old_itimerspec32	compat_itimerspec
+#define ns_to_old_timeval32	ns_to_compat_timeval
+#define get_old_itimerspec32	get_compat_itimerspec64
+#define put_old_itimerspec32	put_compat_itimerspec64
+#define get_old_timespec32	compat_get_timespec64
+#define put_old_timespec32	compat_put_timespec64
+
 #endif
-- 
cgit v1.2.3


From c4df32c80d04987023844c1fb13734a872c8f2e2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 19 Aug 2018 07:34:47 +0900
Subject: export.h: remove VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR()

With the special case handling for Blackfin and Metag was removed by
commit 94e58e0ac312 ("export.h: remove code for prefixing symbols with
underscore"), VMLINUX_SYMBOL() is no-op.

Replace the remaining usages, then remove the definition of
VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/linux/export.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..c363bde21bbe 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -10,13 +10,6 @@
  * hackers place grumpy comments in header files.
  */
 
-#define __VMLINUX_SYMBOL(x) x
-#define __VMLINUX_SYMBOL_STR(x) #x
-
-/* Indirect, so macros are expanded before pasting. */
-#define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x)
-#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
-
 #ifndef __ASSEMBLY__
 struct kernel_symbol
 {
-- 
cgit v1.2.3


From 16af2d65842d343c2f95733c3993a0b5baab08f9 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:01 +0900
Subject: ata: add an extra argument to ahci_platform_get_resources()

Add an extra argument to ahci_platform_get_resources(), that is
for the bitmap representing the resource to get in this function.

Currently there is no resources to be defined, so all the callers set
'0' to the argument.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ahci_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6490be1f8a16 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -30,7 +30,7 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv);
 int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv);
 struct ahci_host_priv *ahci_platform_get_resources(
-	struct platform_device *pdev);
+	struct platform_device *pdev, unsigned int flags);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
-- 
cgit v1.2.3


From 9d2ab99573970838108add835442a03e23f8577b Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:02 +0900
Subject: ata: libahci_platform: add reset control support

Add support to get and control a list of resets for the device
as optional and shared. These resets must be kept de-asserted until
the device is enabled.

This is specified as shared because some SoCs like UniPhier series
have common reset controls with all ahci controller instances.

However, according to Thierry's view,
https://www.spinics.net/lists/linux-ide/msg55357.html
some hardware-specific drivers already use their own resets,
and the common reset make a path to occur double controls of resets.

The ahci_platform_get_resources() can get and control the reset
only when the second argument includes AHCI_PLATFORM_GET_RESETS bit.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ahci_platform.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6490be1f8a16..eaedca5fe6fc 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -43,4 +43,6 @@ int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
 
+#define AHCI_PLATFORM_GET_RESETS	0x01
+
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 64d9d13828c6c8e188bba63794eee923df3d69a9 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Tue, 31 Jul 2018 01:37:30 +0000
Subject: fs/quota: Replace XQM_MAXQUOTAS usage with MAXQUOTAS

XQM_MAXQUOTAS and MAXQUOTAS are, it appears, equivalent. Replace all
usage of XQM_MAXQUOTAS and remove it along with the unused XQM_*QUOTA
definitions.

Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quota.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index ca9772c8e48b..f32dd270b8e3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -408,13 +408,7 @@ struct qc_type_state {
 
 struct qc_state {
 	unsigned int s_incoredqs;	/* Number of dquots in core */
-	/*
-	 * Per quota type information. The array should really have
-	 * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in
-	 * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough.  Once VFS
-	 * supports project quotas, this can be changed to MAXQUOTAS
-	 */
-	struct qc_type_state s_state[XQM_MAXQUOTAS];
+	struct qc_type_state s_state[MAXQUOTAS];  /* Per quota type information */
 };
 
 /* Structure for communicating via ->set_info */
-- 
cgit v1.2.3


From 92be775a3db343889ab159c44d55315c5ee0be4e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:53 -0700
Subject: mm: struct shrink_control: keep int fields together

Patch series "Reorderings in struct shrinker and struct shrink_control".

These structures are intensively used during reclaim and, displace other
data in cache, so there is no a reason they have int fields not grouped
together.

This patch (of 2):

gfp_t is of unsigned type, so let's move nid to keep them together.

Link: http://lkml.kernel.org/r/153199747930.21131.861043607301997810.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index b154fd2b084c..d58aaaed34a4 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -12,6 +12,9 @@
 struct shrink_control {
 	gfp_t gfp_mask;
 
+	/* current node being shrunk (for NUMA aware shrinkers) */
+	int nid;
+
 	/*
 	 * How many objects scan_objects should scan and try to reclaim.
 	 * This is reset before every call, so it is safe for callees
@@ -26,9 +29,6 @@ struct shrink_control {
 	 */
 	unsigned long nr_scanned;
 
-	/* current node being shrunk (for NUMA aware shrinkers) */
-	int nid;
-
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
 };
-- 
cgit v1.2.3


From e50ef89b0f58a2cb0e7d496a97b851e0dced62a6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:57 -0700
Subject: mm: struct shrinker: make flags of unsigned type

Currently, there are two flags only, so unsigned is more then enough.
Also, move int seeks to keep these fields together.

Link: http://lkml.kernel.org/r/153199748720.21131.6476256940113102483.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index d58aaaed34a4..9443cafd1969 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -63,9 +63,9 @@ struct shrinker {
 	unsigned long (*scan_objects)(struct shrinker *,
 				      struct shrink_control *sc);
 
-	int seeks;	/* seeks to recreate an obj */
 	long batch;	/* reclaim batch size, 0 = default */
-	unsigned long flags;
+	int seeks;	/* seeks to recreate an obj */
+	unsigned flags;
 
 	/* These are for internal use */
 	struct list_head list;
-- 
cgit v1.2.3


From 5d5e8f19544a35ae1609bd96ae6b28c9fcd1baf6 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 21 Aug 2018 21:52:20 -0700
Subject: mm, swap, get_swap_pages: use entry_size instead of cluster in
 parameter

As suggested by Matthew Wilcox, it is better to use "int entry_size"
instead of "bool cluster" as parameter to specify whether to operate for
huge or normal swap entries.  Because this improve the flexibility to
support other swap entry size.  And Dave Hansen thinks that this
improves code readability too.

So in this patch, the "bool cluster" parameter of get_swap_pages() is
replaced by "int entry_size".

And nr_swap_entries() trick is used to reduce the binary size when
!CONFIG_TRANSPARENT_HUGE_PAGE.

       text	   data	    bss	    dec	    hex	filename
base  24215	   2028	    340	  26583	   67d7	mm/swapfile.o
head  24123	   2004	    340	  26467	   6763	mm/swapfile.o

Link: http://lkml.kernel.org/r/20180720071845.17920-7-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1a8bd05a335e..8e2c11e692ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -447,7 +447,7 @@ extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(struct page *page);
 extern void put_swap_page(struct page *page, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
-- 
cgit v1.2.3


From 93065ac753e4443840a057bfef4be71ec766fde9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 21 Aug 2018 21:52:33 -0700
Subject: mm, oom: distinguish blockable mode for mmu notifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are several blockable mmu notifiers which might sleep in
mmu_notifier_invalidate_range_start and that is a problem for the
oom_reaper because it needs to guarantee a forward progress so it cannot
depend on any sleepable locks.

Currently we simply back off and mark an oom victim with blockable mmu
notifiers as done after a short sleep.  That can result in selecting a new
oom victim prematurely because the previous one still hasn't torn its
memory down yet.

We can do much better though.  Even if mmu notifiers use sleepable locks
there is no reason to automatically assume those locks are held.  Moreover
majority of notifiers only care about a portion of the address space and
there is absolutely zero reason to fail when we are unmapping an unrelated
range.  Many notifiers do really block and wait for HW which is harder to
handle and we have to bail out though.

This patch handles the low hanging fruit.
__mmu_notifier_invalidate_range_start gets a blockable flag and callbacks
are not allowed to sleep if the flag is set to false.  This is achieved by
using trylock instead of the sleepable lock for most callbacks and
continue as long as we do not block down the call chain.

I think we can improve that even further because there is a common pattern
to do a range lookup first and then do something about that.  The first
part can be done without a sleeping lock in most cases AFAICS.

The oom_reaper end then simply retries if there is at least one notifier
which couldn't make any progress in !blockable mode.  A retry loop is
already implemented to wait for the mmap_sem and this is basically the
same thing.

The simplest way for driver developers to test this code path is to wrap
userspace code which uses these notifiers into a memcg and set the hard
limit to hit the oom.  This can be done e.g.  after the test faults in all
the mmu notifier managed memory and set the hard limit to something really
small.  Then we are looking for a proper process tear down.

[akpm@linux-foundation.org: coding style fixes]
[akpm@linux-foundation.org: minor code simplification]
Link: http://lkml.kernel.org/r/20180716115058.5559-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Christian König <christian.koenig@amd.com> # AMD notifiers
Acked-by: Leon Romanovsky <leonro@mellanox.com> # mlx and umem_odp
Reported-by: David Rientjes <rientjes@google.com>
Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Cc: Sudeep Dutt <sudeep.dutt@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kvm_host.h     |  4 ++--
 include/linux/mmu_notifier.h | 33 +++++++++++++++++++++++++--------
 include/linux/oom.h          |  2 +-
 3 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7362dd2faa..0205aee44ded 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1289,8 +1289,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end);
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end, bool blockable);
 
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 392e6af82701..133ba78820ee 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,13 +151,15 @@ struct mmu_notifier_ops {
 	 * address space but may still be referenced by sptes until
 	 * the last refcount is dropped.
 	 *
-	 * If both of these callbacks cannot block, and invalidate_range
-	 * cannot block, mmu_notifier_ops.flags should have
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK set.
+	 * If blockable argument is set to false then the callback cannot
+	 * sleep and has to return with -EAGAIN. 0 should be returned
+	 * otherwise.
+	 *
 	 */
-	void (*invalidate_range_start)(struct mmu_notifier *mn,
+	int (*invalidate_range_start)(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
-				       unsigned long start, unsigned long end);
+				       unsigned long start, unsigned long end,
+				       bool blockable);
 	void (*invalidate_range_end)(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start, unsigned long end);
@@ -229,8 +231,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 				      unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
-				  unsigned long start, unsigned long end);
+extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end,
+				  bool blockable);
 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end,
 				  bool only_end);
@@ -281,7 +284,15 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		__mmu_notifier_invalidate_range_start(mm, start, end);
+		__mmu_notifier_invalidate_range_start(mm, start, end, true);
+}
+
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_invalidate_range_start(mm, start, end, false);
+	return 0;
 }
 
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -461,6 +472,12 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 {
 }
 
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	return 0;
+}
+
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6adac113e96d..92f70e4c6252 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,7 +95,7 @@ static inline int check_stable_address_space(struct mm_struct *mm)
 	return 0;
 }
 
-void __oom_reap_task_mm(struct mm_struct *mm);
+bool __oom_reap_task_mm(struct mm_struct *mm);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
-- 
cgit v1.2.3


From a670468f5e0b5fad4db6e4d195f15915dc2a35c1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 21 Aug 2018 21:53:06 -0700
Subject: mm: zero out the vma in vma_init()

Rather than in vm_area_alloc().  To ensure that the various oddball
stack-based vmas are in a good state.  Some of the callers were zeroing
them out, others were not.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3cae495f9ce..3a4b87d1a59a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -456,6 +456,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
 	static const struct vm_operations_struct dummy_vm_ops = {};
 
+	memset(vma, 0, sizeof(*vma));
 	vma->vm_mm = mm;
 	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
-- 
cgit v1.2.3


From 89696701ea847786f88ccc1c580fb4c3c20c4a3a Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:24 -0700
Subject: mm: remove zone_id() and make use of zone_idx() in is_dev_zone()

is_dev_zone() is using zone_id() to check if the zone is ZONE_DEVICE.
zone_id() looks pretty much the same as zone_idx(), and while the use of
zone_idx() is quite spread in the kernel, zone_id() is only being used by
is_dev_zone().

This patch removes zone_id() and makes is_dev_zone() use zone_idx() to
check the zone, so we do not have two things with the same functionality
around.

Link: http://lkml.kernel.org/r/20180730133718.28683-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2dc52a..3d4577a3ae7e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,25 +755,6 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 
-static inline int zone_id(const struct zone *zone)
-{
-	struct pglist_data *pgdat = zone->zone_pgdat;
-
-	return zone - pgdat->node_zones;
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return zone_id(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return false;
-}
-#endif
-
 #include <linux/memory_hotplug.h>
 
 void build_all_zonelists(pg_data_t *pgdat);
@@ -824,6 +805,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return false;
+}
+#endif
+
 /*
  * Returns true if a zone has pages managed by the buddy allocator.
  * All the reclaim decisions have to use this function rather than
-- 
cgit v1.2.3


From c1093b746c0576ed81c4d568d1e39cab651d37e6 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Tue, 21 Aug 2018 21:53:32 -0700
Subject: mm: access zone->node via zone_to_nid() and zone_set_nid()

zone->node is configured only when CONFIG_NUMA=y, so it is a good idea to
have inline functions to access this field in order to avoid ifdef's in c
files.

Link: http://lkml.kernel.org/r/20180730101757.28058-3-osalvador@techadventures.net
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h     |  9 ---------
 include/linux/mmzone.h | 26 ++++++++++++++++++++------
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3a4b87d1a59a..a55f5389c491 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -960,15 +960,6 @@ static inline int page_zone_id(struct page *page)
 	return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
 }
 
-static inline int zone_to_nid(struct zone *zone)
-{
-#ifdef CONFIG_NUMA
-	return zone->node;
-#else
-	return 0;
-#endif
-}
-
 #ifdef NODE_NOT_IN_PAGE_FLAGS
 extern int page_to_nid(const struct page *page);
 #else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3d4577a3ae7e..1e22d96734e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -834,6 +834,25 @@ static inline bool populated_zone(struct zone *zone)
 	return zone->present_pages;
 }
 
+#ifdef CONFIG_NUMA
+static inline int zone_to_nid(struct zone *zone)
+{
+	return zone->node;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid)
+{
+	zone->node = nid;
+}
+#else
+static inline int zone_to_nid(struct zone *zone)
+{
+	return 0;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid) {}
+#endif
+
 extern int movable_zone;
 
 #ifdef CONFIG_HIGHMEM
@@ -949,12 +968,7 @@ static inline int zonelist_zone_idx(struct zoneref *zoneref)
 
 static inline int zonelist_node_idx(struct zoneref *zoneref)
 {
-#ifdef CONFIG_NUMA
-	/* zone_to_nid not available in this context */
-	return zoneref->zone->node;
-#else
-	return 0;
-#endif /* CONFIG_NUMA */
+	return zone_to_nid(zoneref->zone);
 }
 
 struct zoneref *__next_zones_zonelist(struct zoneref *z,
-- 
cgit v1.2.3


From 03e85f9d5f1f8c74f127c5f7a87575d74a78d248 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:43 -0700
Subject: mm/page_alloc: Introduce free_area_init_core_hotplug

Currently, whenever a new node is created/re-used from the memhotplug
path, we call free_area_init_node()->free_area_init_core().  But there is
some code that we do not really need to run when we are coming from such
path.

free_area_init_core() performs the following actions:

1) Initializes pgdat internals, such as spinlock, waitqueues and more.
2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on
   when creating hash tables.
3) Account number of managed_pages per zone, substracting dma_reserved and
   memmap pages.
4) Initializes some fields of the zone structure data
5) Calls init_currently_empty_zone to initialize all the freelists
6) Calls memmap_init to initialize all pages belonging to certain zone

When called from memhotplug path, free_area_init_core() only performs
actions #1 and #4.

Action #2 is pointless as the zones do not have any pages since either the
node was freed, or we are re-using it, eitherway all zones belonging to
this node should have 0 pages.  For the same reason, action #3 results
always in manages_pages being 0.

Action #5 and #6 are performed later on when onlining the pages:
 online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone()
 online_pages()->move_pfn_range_to_zone()->memmap_init_zone()

This patch does two things:

First, moves the node/zone initializtion to their own function, so it
allows us to create a small version of free_area_init_core, where we only
perform:

1) Initialization of pgdat internals, such as spinlock, waitqueues and more
4) Initialization of some fields of the zone structure data

These two functions are: pgdat_init_internals() and zone_init_internals().

The second thing this patch does, is to introduce
free_area_init_core_hotplug(), the memhotplug version of
free_area_init_core():

Currently, we call free_area_init_node() from the memhotplug path.  In
there, we set some pgdat's fields, and call calculate_node_totalpages().
calculate_node_totalpages() calculates the # of pages the node has.

Since the node is either new, or we are re-using it, the zones belonging
to this node should not have any pages, so there is no point to calculate
this now.

Actually, we re-set these values to 0 later on with the calls to:

reset_node_managed_pages()
reset_node_present_pages()

The # of pages per node and the # of pages per zone will be calculated when
onlining the pages:

online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range()
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range()

Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace
__paginginit with __init, so their code gets freed up.

[osalvador@techadventures.net: fix section usage]
  Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net
[osalvador@suse.de: v6]
  Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net
Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 1 +
 include/linux/mm.h             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4e9828cda7a2..34a28227068d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 static inline void remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+extern void __ref free_area_init_core_hotplug(int nid);
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a55f5389c491..a9e733b5fb76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
 
 extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, unsigned long * zones_size,
+extern void __init free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
 
-- 
cgit v1.2.3


From 3d8b38eb81cac81395f6a823f6bf401b327268e6 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 21 Aug 2018 21:53:54 -0700
Subject: mm, oom: introduce memory.oom.group

For some workloads an intervention from the OOM killer can be painful.
Killing a random task can bring the workload into an inconsistent state.

Historically, there are two common solutions for this
problem:
1) enabling panic_on_oom,
2) using a userspace daemon to monitor OOMs and kill
   all outstanding processes.

Both approaches have their downsides: rebooting on each OOM is an obvious
waste of capacity, and handling all in userspace is tricky and requires a
userspace agent, which will monitor all cgroups for OOMs.

In most cases an in-kernel after-OOM cleaning-up mechanism can eliminate
the necessity of enabling panic_on_oom.  Also, it can simplify the cgroup
management for userspace applications.

This commit introduces a new knob for cgroup v2 memory controller:
memory.oom.group.  The knob determines whether the cgroup should be
treated as an indivisible workload by the OOM killer.  If set, all tasks
belonging to the cgroup or to its descendants (if the memory cgroup is not
a leaf cgroup) are killed together or not at all.

To determine which cgroup has to be killed, we do traverse the cgroup
hierarchy from the victim task's cgroup up to the OOMing cgroup (or root)
and looking for the highest-level cgroup with memory.oom.group set.

Tasks with the OOM protection (oom_score_adj set to -1000) are treated as
an exception and are never killed.

This patch doesn't change the OOM victim selection algorithm.

Link: http://lkml.kernel.org/r/20180802003201.817-4-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0e6c515fb698..652f602167df 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -225,6 +225,11 @@ struct mem_cgroup {
 	 */
 	bool use_hierarchy;
 
+	/*
+	 * Should the OOM killer kill all belonging tasks, had it kill one?
+	 */
+	bool oom_group;
+
 	/* protected by memcg_oom_lock */
 	bool		oom_lock;
 	int		under_oom;
@@ -542,6 +547,9 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+					    struct mem_cgroup *oom_domain);
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@@ -1001,6 +1009,16 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline struct mem_cgroup *mem_cgroup_get_oom_group(
+	struct task_struct *victim, struct mem_cgroup *oom_domain)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 					     int idx)
 {
-- 
cgit v1.2.3


From 7e8a6304d5419cbf056a59de92939e5eef039c57 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 21 Aug 2018 21:53:58 -0700
Subject: /proc/meminfo: add percpu populated pages count

Currently, percpu memory only exposes allocation and utilization
information via debugfs.  This more or less is only really useful for
understanding the fragmentation and allocation information at a per-chunk
level with a few global counters.  This is also gated behind a config.
BPF and cgroup, for example, have seen an increase in use causing
increased use of percpu memory.  Let's make it easier for someone to
identify how much memory is being used.

This patch adds the "Percpu" stat to meminfo to more easily look up how
much percpu memory is in use.  This number includes the cost for all
allocated backing pages and not just insight at the per a unit, per chunk
level.  Metadata is excluded.  I think excluding metadata is fair because
the backing memory scales with the numbere of cpus and can quickly
outweigh the metadata.  It also makes this calculation light.

Link: http://lkml.kernel.org/r/20180807184723.74919-1-dennisszhou@gmail.com
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 296bbe49d5d1..70b7123f38c7 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,4 +149,6 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
 						__alignof__(type))
 
+extern unsigned long pcpu_nr_pages(void);
+
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.2.3


From 5df66d306ec9b1952d3d183fe4e2ced1a7b2bddb Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:54:06 -0700
Subject: mm: fix comment for NODEMASK_ALLOC

Currently, NODEMASK_ALLOC allocates a nodemask_t with kmalloc when
NODES_SHIFT is higher than 8, otherwise it declares it within the stack.

The comment says that the reasoning behind this, is that nodemask_t will
be 256 bytes when NODES_SHIFT is higher than 8, but this is not true.  For
example, NODES_SHIFT = 9 will give us a 64 bytes nodemask_t.  Let us fix
up the comment for that.

Another thing is that it might make sense to let values lower than
128bytes be allocated in the stack.  Although this all depends on the
depth of the stack (and this changes from function to function), I think
that 64 bytes is something we can easily afford.  So we could even bump
the limit by 1 (from > 8 to > 9).

Link: http://lkml.kernel.org/r/20180820085516.9687-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 1fbde8a880d9..5a30ad594ccc 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -518,7 +518,7 @@ static inline int node_random(const nodemask_t *mask)
  * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
  * name.
  */
-#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
 #define NODEMASK_ALLOC(type, name, gfp_flags)	\
 			type *name = kmalloc(sizeof(*name), gfp_flags)
 #define NODEMASK_FREE(m)			kfree(m)
-- 
cgit v1.2.3


From 891ae71dc4fbd2454a3fa569e115a7ca86630949 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 21 Aug 2018 21:54:37 -0700
Subject: proc: spread "const" a bit

Link: http://lkml.kernel.org/r/20180627200614.GB18434@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 626fc65c4336..d0e1f1522a78 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -129,7 +129,7 @@ int open_related_ns(struct ns_common *ns,
 		   struct ns_common *(*get_ns)(struct ns_common *ns));
 
 /* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
 {
 	return inode->i_sb->s_fs_info;
 }
-- 
cgit v1.2.3


From a8dd9c4df18edc873d244790d163564a5d17626b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:54:51 -0700
Subject: proc/kcore: don't grab lock for kclist_add()

Patch series "/proc/kcore improvements", v4.

This series makes a few improvements to /proc/kcore.  It fixes a couple of
small issues in v3 but is otherwise the same.  Patches 1, 2, and 3 are
prep patches.  Patch 4 is a fix/cleanup.  Patch 5 is another prep patch.
Patches 6 and 7 are optimizations to ->read().  Patch 8 makes it possible
to enable CRASH_CORE on any architecture, which is needed for patch 9.
Patch 9 adds vmcoreinfo to /proc/kcore.

This patch (of 9):

kclist_add() is only called at init time, so there's no point in grabbing
any locks.  We're also going to replace the rwlock with a rwsem, which we
don't want to try grabbing during early boot.

While we're here, mark kclist_add() with __init so that we'll get a
warning if it's called from non-init code.

Link: http://lkml.kernel.org/r/98208db1faf167aa8b08eebfa968d95c70527739.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kcore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..c20f296438fb 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -35,7 +35,7 @@ struct vmcoredd_node {
 };
 
 #ifdef CONFIG_PROC_KCORE
-extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+void __init kclist_add(struct kcore_list *, void *, size_t, int type);
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
-- 
cgit v1.2.3


From 23c85094fe1895caefdd19ef624ee687ec5f4507 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:55:20 -0700
Subject: proc/kcore: add vmcoreinfo note to /proc/kcore

The vmcoreinfo information is useful for runtime debugging tools, not just
for crash dumps.  A lot of this information can be determined by other
means, but this is much more convenient, and it only adds a page at most
to the file.

Link: http://lkml.kernel.org/r/fddbcd08eed76344863303878b12de1c1e2a04b6.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index b511f6d24b42..525510a9f965 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -60,6 +60,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
 extern u32 *vmcoreinfo_note;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
-- 
cgit v1.2.3


From cedc5b6aab493f6b1b1d381dccc0cc082da7d3d8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 21 Aug 2018 21:55:28 -0700
Subject: kernel.h: documentation for roundup() vs round_up()

Things like 3619dec5103d ("dh key: fix rounding up KDF output length")
expose the lack of explicit documentation for roundup() vs round_up().  At
least we can try to document it better if anyone goes looking.

Link: http://lkml.kernel.org/r/20180703041950.GA43464@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 941dc0a5a877..d6aac75b51ba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,23 @@
  * arguments just once each.
  */
 #define __round_mask(x, y) ((__typeof__(x))((y)-1))
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
 /**
@@ -110,13 +126,30 @@
 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
 #endif
 
-/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ *
+ * The `const' here prevents gcc-3.3 from calling __divdi3
+ */
 #define roundup(x, y) (					\
 {							\
 	const typeof(y) __y = y;			\
 	(((x) + (__y - 1)) / __y) * __y;		\
 }							\
 )
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
 #define rounddown(x, y) (				\
 {							\
 	typeof(x) __x = (x);				\
-- 
cgit v1.2.3


From e58dd0de5eadf145895b13451a1fef8ef03946eb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:31 -0700
Subject: bdi: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
when the variable is used as a reference counter.  This permits avoiding
accidental refcounter overflows that might lead to use-after-free
situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-4-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h | 3 ++-
 include/linux/backing-dev.h      | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 24251762c20c..9a6bc0951cfa 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/kref.h>
+#include <linux/refcount.h>
 
 struct page;
 struct device;
@@ -75,7 +76,7 @@ enum wb_reason {
  */
 struct bdi_writeback_congested {
 	unsigned long state;		/* WB_[a]sync_congested flags */
-	atomic_t refcnt;		/* nr of attached wb's and blkg */
+	refcount_t refcnt;		/* nr of attached wb's and blkg */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct backing_dev_info *__bdi;	/* the associated bdi, set to NULL
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 72ca0f3d39f3..c28a47cbe355 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -404,13 +404,13 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 static inline struct bdi_writeback_congested *
 wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 {
-	atomic_inc(&bdi->wb_congested->refcnt);
+	refcount_inc(&bdi->wb_congested->refcnt);
 	return bdi->wb_congested;
 }
 
 static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 {
-	if (atomic_dec_and_test(&congested->refcnt))
+	if (refcount_dec_and_test(&congested->refcnt))
 		kfree(congested);
 }
 
-- 
cgit v1.2.3


From fc37191272a972d449bab3d8247cf52cadf5d4e6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:38 -0700
Subject: userns: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
wh en the variable is used as a reference counter.  This avoids accidental
refcounter overflows that might lead to use-after-free situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-6-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/user.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 96fe289c4c6e..39ad98c09c58 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/ratelimit.h>
 
 struct key;
@@ -12,7 +13,7 @@ struct key;
  * Some day this will be a full-fledged user tracking system..
  */
 struct user_struct {
-	atomic_t __count;	/* reference count */
+	refcount_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
 extern struct user_struct * alloc_uid(kuid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
-	atomic_inc(&u->__count);
+	refcount_inc(&u->__count);
 	return u;
 }
 extern void free_uid(struct user_struct *);
-- 
cgit v1.2.3


From 203583990cb675aeddff6d7623f68268068c078c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 21 Aug 2018 21:55:45 -0700
Subject: linux/compiler.h: don't use bool

Appararently, it's possible to have a non-trivial TU include a few
headers, including linux/build_bug.h, without ending up with
linux/types.h.  So the 0day bot sent me

config: um-x86_64_defconfig (attached as .config)

>> include/linux/compiler.h:316:3: error: unknown type name 'bool'; did you mean '_Bool'?
      bool __cond = !(condition);    \

for something I'm working on.

Rather than contributing to the #include madness and including
linux/types.h from compiler.h, just use int.

Link: http://lkml.kernel.org/r/20180817101036.20969-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Christopher Li <sparse@chrisli.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 42506e4d1f53..c8eab637a2a7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr)
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		bool __cond = !(condition);				\
+		int __cond = !(condition);				\
 		extern void prefix ## suffix(void) __compiletime_error(msg); \
 		if (__cond)						\
 			prefix ## suffix();				\
-- 
cgit v1.2.3


From a2e514453861dd39b53b7a50b6771bd3f9852078 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 21 Aug 2018 21:55:52 -0700
Subject: kernel/hung_task.c: allow to set checking interval separately from
 timeout

Currently task hung checking interval is equal to timeout, as the result
hung is detected anywhere between timeout and 2*timeout.  This is fine for
most interactive environments, but this hurts automated testing setups
(syzbot).  In an automated setup we need to strictly order CPU lockup <
RCU stall < workqueue lockup < task hung < silent loss, so that RCU stall
is not detected as task hung and task hung is not detected as silent
machine loss.  The large variance in task hung detection timeout requires
setting silent machine loss timeout to a very large value (e.g.  if task
hung is 3 mins, then silent loss need to be set to ~7 mins).  The
additional 3 minutes significantly reduce testing efficiency because
usually we crash kernel within a minute, and this can add hours to bug
localization process as it needs to do dozens of tests.

Allow setting checking interval separately from timeout.  This allows to
set timeout to, say, 3 minutes, but checking interval to 10 secs.

The interval is controlled via a new hung_task_check_interval_secs sysctl,
similar to the existing hung_task_timeout_secs sysctl.  The default value
of 0 results in the current behavior: checking interval is equal to
timeout.

[akpm@linux-foundation.org: update hung_task_timeout_max's comment]
Link: http://lkml.kernel.org/r/20180611111004.203513-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h        | 1 +
 include/linux/sched/sysctl.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 789923fbee3a..58eb3a2bc695 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -853,6 +853,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	unsigned long			last_switch_count;
+	unsigned long			last_switch_time;
 #endif
 	/* Filesystem information: */
 	struct fs_struct		*fs;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 913488d828cb..a9c32daeb9d8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -10,6 +10,7 @@ struct ctl_table;
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 void __user *buffer,
-- 
cgit v1.2.3


From f922c4abdf7648523589abee9460c87f51630d2f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:04 -0700
Subject: module: allow symbol exports to be disabled

To allow existing C code to be incorporated into the decompressor or the
UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports are
undesirable.  Note that this gets rid of a rather dodgy redefine of
linux/export.h's header guard.

Link: http://lkml.kernel.org/r/20180704083651.24360-3-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/export.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..ea7df303d68d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -66,7 +66,16 @@ extern struct module __this_module;
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
-#if defined(__KSYM_DEPS__)
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
+#elif defined(__KSYM_DEPS__)
 
 /*
  * For fine grained build dependencies, we want to tell the build system
-- 
cgit v1.2.3


From 7290d58095712a89f845e1bca05334796dd49ed2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:09 -0700
Subject: module: use relative references for __ksymtab entries

An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.

When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.

Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead.  This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name.  This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.

Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures.  So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 19 +++++++++++++++++++
 include/linux/export.h   | 46 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 54 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c8eab637a2a7..681d866efb1e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __KERNEL__ */
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+	static void * __attribute__((section(".discard.addressable"), used)) \
+		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off:	the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+	return (void *)((unsigned long)off + *off);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
diff --git a/include/linux/export.h b/include/linux/export.h
index ea7df303d68d..ae072bc5aacf 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,12 +18,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-	unsigned long value;
-	const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -54,17 +48,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include <linux/compiler.h>
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit architectures, and eliminates the need for
+ * absolute relocations that require runtime processing on relocatable
+ * kernels.
+ */
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	__ADDRESSABLE(sym)						\
+	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.balign	8					\n"	\
+	    "__ksymtab_" #sym ":				\n"	\
+	    "	.long	" #sym "- .				\n"	\
+	    "	.long	__kstrtab_" #sym "- .			\n"	\
+	    "	.previous					\n")
+
+struct kernel_symbol {
+	int value_offset;
+	int name_offset;
+};
+#else
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	static const struct kernel_symbol __ksymtab_##sym		\
+	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
+	= { (unsigned long)&sym, __kstrtab_##sym }
+
+struct kernel_symbol {
+	unsigned long value;
+	const char *name;
+};
+#endif
+
 /* For every exported symbol, place a struct in the __ksymtab section */
 #define ___EXPORT_SYMBOL(sym, sec)					\
 	extern typeof(sym) sym;						\
 	__CRC_SYMBOL(sym, sec)						\
 	static const char __kstrtab_##sym[]				\
-	__attribute__((section("__ksymtab_strings"), aligned(1)))	\
+	__attribute__((section("__ksymtab_strings"), used, aligned(1)))	\
 	= #sym;								\
-	static const struct kernel_symbol __ksymtab_##sym		\
-	__used								\
-	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
-	= { (unsigned long)&sym, __kstrtab_##sym }
+	__KSYMTAB_ENTRY(sym, sec)
 
 #if defined(__DISABLE_EXPORTS)
 
-- 
cgit v1.2.3


From 1b1eeca7e4c19fa76d409d4c7b338dba21f2df45 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:13 -0700
Subject: init: allow initcall tables to be emitted using relative references

Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Link: http://lkml.kernel.org/r/20180704083651.24360-5-ard.biesheuvel@linaro.org
Acked-by: James Morris <james.morris@microsoft.com>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index bc27cf03c41e..2538d176dd1f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -116,8 +116,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -167,9 +183,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)			\
+	__ADDRESSABLE(fn)					\
+	asm(".section	\"" #__sec ".init\", \"a\"	\n"	\
+	"__initcall_" #fn #id ":			\n"	\
+	    ".long	" #fn " - .			\n"	\
+	    ".previous					\n");
+#else
+#define ___define_initcall(fn, id, __sec) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" #id ".init"))) = fn;
+		__attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -208,13 +235,8 @@ extern bool initcall_debug;
 #define __exitcall(fn)						\
 	static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.security_initcall.init) = fn
+#define console_initcall(fn)	___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)	___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
 	const char *str;
-- 
cgit v1.2.3


From c9d8b55fa0191623fccb9ed67d2ff8f9159e9a89 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:18 -0700
Subject: PCI: Add support for relative addressing in quirk tables

Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Link: http://lkml.kernel.org/r/20180704083651.24360-6-ard.biesheuvel@linaro.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9b87f1936906..e72ca8dd6241 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1809,7 +1809,11 @@ struct pci_fixup {
 	u16 device;			/* Or PCI_ANY_ID */
 	u32 class;			/* Or PCI_ANY_ID */
 	unsigned int class_shift;	/* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	int hook_offset;
+#else
 	void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1823,12 +1827,28 @@ enum pci_fixup_pass {
 	pci_fixup_suspend_late,	/* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				    class_shift, hook)			\
+	__ADDRESSABLE(hook)						\
+	asm(".section "	#sec ", \"a\"				\n"	\
+	    ".balign	16					\n"	\
+	    ".short "	#vendor ", " #device "			\n"	\
+	    ".long "	#class ", " #class_shift "		\n"	\
+	    ".long "	#hook " - .				\n"	\
+	    ".previous						\n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)			\
+	__DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,	\
 				  class_shift, hook)			\
 	static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used	\
 	__attribute__((__section__(#section), aligned((sizeof(void *)))))    \
 		= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,		\
 					 class_shift, hook)		\
-- 
cgit v1.2.3


From 46e0c9be206fa7b11aca75da2d6b8535d0139752 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:22 -0700
Subject: kernel: tracepoints: add support for relative references

To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracepoint.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d9a084c72541..7f2e16e76ac4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,19 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+#define __TRACEPOINT_ENTRY(name)					 \
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
@@ -258,11 +271,9 @@ extern void syscall_unregfunc(void);
 	static const char __tpstrtab_##name[]				 \
 	__attribute__((section("__tracepoints_strings"))) = #name;	 \
 	struct tracepoint __tracepoint_##name				 \
-	__attribute__((section("__tracepoints"))) =			 \
+	__attribute__((section("__tracepoints"), used)) =		 \
 		{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name;
+	__TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name)						\
 	DEFINE_TRACE_FN(name, NULL, NULL);
-- 
cgit v1.2.3


From 9144d75e22cad3c89e6b2ccab551db9ee28d250a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Aug 2018 21:57:03 -0700
Subject: include/linux/bitops.h: introduce BITS_PER_TYPE

net_dim.h has a rather useful extension to BITS_PER_BYTE to compute the
number of bits in a type (BITS_PER_BYTE * sizeof(T)), so promote the macro
to bitops.h, alongside BITS_PER_BYTE, for wider usage.

Link: http://lkml.kernel.org/r/20180706094458.14116-1-chris@chris-wilson.co.uk
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andy Gospodarek <gospo@broadcom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h  | 3 ++-
 include/linux/net_dim.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index af419012d77d..7ddb1349394d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,7 +4,8 @@
 #include <asm/types.h>
 #include <linux/bits.h>
 
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index db99240d00bd..c79e859408e6 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -363,7 +363,6 @@ static inline void net_dim_sample(u16 event_ctr,
 }
 
 #define NET_DIM_NEVENTS 64
-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
 #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static inline void net_dim_calc_stats(struct net_dim_sample *start,
-- 
cgit v1.2.3


From feba04fd2cf8f6a74865338df2e3e1e94d6cfd13 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 Aug 2018 21:57:11 -0700
Subject: lib: add crc64 calculation routines

Patch series "add crc64 calculation as kernel library", v5.

This patchset adds basic implementation of crc64 calculation as a Linux
kernel library.  Since bcache already does crc64 by itself, this patchset
also modifies bcache code to use the new crc64 library routine.

Currently bcache is the only user of crc64 calculation, another potential
user is bcachefs which is on the way to be in mainline kernel.  Therefore
it makes sense to make crc64 calculation to be a public library.

bcache uses crc64 as storage checksum, if a change of crc lib routines
results an inconsistent result, the unmatched checksum may make bcache
'think' the on-disk is corrupted, such a change should be avoided or
detected as early as possible.  Therefore a patch is being prepared which
adds a crc test framework, to check consistency of different calculations.

This patch (of 2):

Add the re-write crc64 calculation routines for Linux kernel.  The CRC64
polynomical arithmetic follows ECMA-182 specification, inspired by CRC
paper of Dr.  Ross N.  Williams (see
http://www.ross.net/crc/download/crc_v3.txt) and other public domain
implementations.

All the changes work in this way,
- When Linux kernel is built, host program lib/gen_crc64table.c will be
  compiled to lib/gen_crc64table and executed.
- The output of gen_crc64table execution is an array called as lookup
  table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64-bit long
  numbers, this table is dumped into header file lib/crc64table.h.
- Then the header file is included by lib/crc64.c for normal 64bit crc
  calculation.
- Function declaration of the crc64 calculation routines is placed in
  include/linux/crc64.h

Currently bcache is the only user of crc64_be(), another potential user is
bcachefs which is on the way to be in mainline kernel.  Therefore it makes
sense to move crc64 calculation into lib/crc64.c as public code.

[colyli@suse.de: fix review comments from v4]
  Link: http://lkml.kernel.org/r/20180726053352.2781-2-colyli@suse.de
Link: http://lkml.kernel.org/r/20180718165545.1622-2-colyli@suse.de
Signed-off-by: Coly Li <colyli@suse.de>
Co-developed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Noah Massey <noah.massey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crc64.h | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 include/linux/crc64.h

(limited to 'include/linux')

diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644
index 000000000000..c756e65a1b58
--- /dev/null
+++ b/include/linux/crc64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
-- 
cgit v1.2.3


From 52cba1a274818c3ee6299c1a1b476e7bc5037a2f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 21:59:51 -0700
Subject: signal: make force_sigsegv() void

Patch series "signal: refactor some functions", v3.

This series refactors a bunch of functions in signal.c to simplify parts
of the code.

The greatest single change is declaring the static do_sigpending() helper
as void which makes it possible to remove a bunch of unnecessary checks in
the syscalls later on.

This patch (of 17):

force_sigsegv() returned 0 unconditionally so it doesn't make sense to have
it return at all. In addition, there are no callers that check
force_sigsegv()'s return value.

Link: http://lkml.kernel.org/r/20180602103653.18181-2-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..e138ac16c650 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -314,7 +314,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
+extern void force_sigsegv(int sig, struct task_struct *p);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
-- 
cgit v1.2.3


From 67a48a24478841525ba73ec6d64caaf079cf3b7c Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:34 -0700
Subject: signal: make unhandled_signal() return bool

unhandled_signal() already behaves like a boolean function.  Let's
actually declare it as such too.  All callers treat it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-13-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..fa23cae66758 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -287,7 +287,7 @@ static inline void disallow_signal(int sig)
 
 extern struct kmem_cache *sighand_cachep;
 
-int unhandled_signal(struct task_struct *tsk, int sig);
+extern bool unhandled_signal(struct task_struct *tsk, int sig);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
-- 
cgit v1.2.3


From 20ab7218d2507b2dbec9c053c2f1b96054e266b6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:54 -0700
Subject: signal: make get_signal() return bool

make get_signal() already behaves like a boolean function.  Let's actually
declare it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-18-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fa23cae66758..e3d9e0640e6e 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -265,7 +265,7 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-extern int get_signal(struct ksignal *ksig);
+extern bool get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
-- 
cgit v1.2.3


From dc2c8c84def6ce450c63529e08c1db100020994e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 21 Aug 2018 22:01:52 -0700
Subject: ipc: get rid of ids->tables_initialized hack

In sysvipc we have an ids->tables_initialized regarding the rhashtable,
introduced in 0cfb6aee70bd ("ipc: optimize semget/shmget/msgget for lots
of keys")

It's there, specifically, to prevent nil pointer dereferences, from using
an uninitialized api.  Considering how rhashtable_init() can fail
(probably due to ENOMEM, if anything), this made the overall ipc
initialization capable of failure as well.  That alone is ugly, but fine,
however I've spotted a few issues regarding the semantics of
tables_initialized (however unlikely they may be):

- There is inconsistency in what we return to userspace: ipc_addid()
  returns ENOSPC which is certainly _wrong_, while ipc_obtain_object_idr()
  returns EINVAL.

- After we started using rhashtables, ipc_findkey() can return nil upon
  !tables_initialized, but the caller expects nil for when the ipc
  structure isn't found, and can therefore call into ipcget() callbacks.

Now that rhashtable initialization cannot fail, we can properly get rid of
the hack altogether.

[manfred@colorfullife.com: commit id extended to 12 digits]
Link: http://lkml.kernel.org/r/20180712185241.4017-10-manfred@colorfullife.com
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6cea726612b7..e9ccdfdb1928 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,7 +16,6 @@ struct user_namespace;
 struct ipc_ids {
 	int in_use;
 	unsigned short seq;
-	bool tables_initialized;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
 	int max_id;
-- 
cgit v1.2.3


From 27c331a174614208d0b539019583990967ad9479 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Tue, 21 Aug 2018 22:02:00 -0700
Subject: ipc/util.c: further variable name cleanups

The varable names got a mess, thus standardize them again:

id: user space id. Called semid, shmid, msgid if the type is known.
    Most functions use "id" already.
idx: "index" for the idr lookup
    Right now, some functions use lid, ipc_addid() already uses idx as
    the variable name.
seq: sequence number, to avoid quick collisions of the user space id
key: user space key, used for the rhash tree

Link: http://lkml.kernel.org/r/20180712185241.4017-12-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e9ccdfdb1928..6ab8c1bada3f 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,7 +18,7 @@ struct ipc_ids {
 	unsigned short seq;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
-	int max_id;
+	int max_idx;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	int next_id;
 #endif
-- 
cgit v1.2.3


From 9abdda5ddab8a899ca8c4b859ef0a7710f40e0dd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 16 Aug 2018 12:05:59 -0400
Subject: sunrpc: Extract target name into svc_cred

NFSv4.0 callback needs to know the GSS target name the client used
when it established its lease. That information is available from
the GSS context created by gssproxy. Make it available in each
svc_cred.

Note this will also give us access to the real target service
principal name (which is typically "nfs", but spec does not require
that).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 7c3656505847..04e404a07882 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -31,6 +31,7 @@ struct svc_cred {
 	/* name of form servicetype@hostname, passed down by
 	 * rpc.svcgssd, or computed from the above: */
 	char			*cr_principal;
+	char			*cr_targ_princ;
 	struct gss_api_mech	*cr_gss_mech;
 };
 
@@ -39,6 +40,7 @@ static inline void init_svc_cred(struct svc_cred *cred)
 	cred->cr_group_info = NULL;
 	cred->cr_raw_principal = NULL;
 	cred->cr_principal = NULL;
+	cred->cr_targ_princ = NULL;
 	cred->cr_gss_mech = NULL;
 }
 
@@ -48,6 +50,7 @@ static inline void free_svc_cred(struct svc_cred *cred)
 		put_group_info(cred->cr_group_info);
 	kfree(cred->cr_raw_principal);
 	kfree(cred->cr_principal);
+	kfree(cred->cr_targ_princ);
 	gss_mech_put(cred->cr_gss_mech);
 	init_svc_cred(cred);
 }
-- 
cgit v1.2.3


From 815f0ddb346c196018d4d8f8f55c12b83da1de3f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 22 Aug 2018 16:37:24 -0700
Subject: include/linux/compiler*.h: make compiler-*.h mutually exclusive

Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
recently exposed a brittle part of the build for supporting non-gcc
compilers.

Both Clang and ICC define __GNUC__, __GNUC_MINOR__, and
__GNUC_PATCHLEVEL__ for quick compatibility with code bases that haven't
added compiler specific checks for __clang__ or __INTEL_COMPILER.

This is brittle, as they happened to get compatibility by posing as a
certain version of GCC.  This broke when upgrading the minimal version
of GCC required to build the kernel, to a version above what ICC and
Clang claim to be.

Rather than always including compiler-gcc.h then undefining or
redefining macros in compiler-intel.h or compiler-clang.h, let's
separate out the compiler specific macro definitions into mutually
exclusive headers, do more proper compiler detection, and keep shared
definitions in compiler_types.h.

Fixes: cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
Reported-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Eli Friedman <efriedma@codeaurora.org>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h |  20 ++--
 include/linux/compiler-gcc.h   |  88 ---------------
 include/linux/compiler-intel.h |  13 +--
 include/linux/compiler_types.h | 238 ++++++++++++++++++++---------------------
 4 files changed, 127 insertions(+), 232 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 7087446c24c8..b1ce500fe8b3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -6,11 +6,7 @@
 /* Some compiler specific definitions are overwritten here
  * for Clang compiler
  */
-
-#ifdef uninitialized_var
-#undef uninitialized_var
 #define uninitialized_var(x) x = *(&(x))
-#endif
 
 /* same as gcc, this was present in clang-2.6 so we can assume it works
  * with any version that can compile the kernel
@@ -25,14 +21,8 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#undef __no_sanitize_address
 #define __no_sanitize_address __attribute__((no_sanitize("address")))
 
-/* Clang doesn't have a way to turn it off per-function, yet. */
-#ifdef __noretpoline
-#undef __noretpoline
-#endif
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -40,9 +30,17 @@
  * checks. Unfortunately, we don't know which version of gcc clang
  * pretends to be, so the macro may or may not be defined.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
 #if __has_builtin(__builtin_mul_overflow) && \
     __has_builtin(__builtin_add_overflow) && \
     __has_builtin(__builtin_sub_overflow)
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
+
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like ICC.
+ */
+#define barrier() __asm__ __volatile__("" : : : "memory")
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define __assume_aligned(a, ...)	\
+	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 250b9b7cfd60..763bbad1e258 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -75,48 +75,6 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((gnu_inline))
-#else
-# define __gnu_inline
-#endif
-
-/*
- * Force always-inline if the user requests it so via the .config,
- * or if gcc is too old.
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well by using "unused"
- * function attribute, which is redundant but not harmful for gcc.
- * Prefer gnu_inline, so that extern inline functions do not emit an
- * externally visible function. This makes extern inline behave as per gnu89
- * semantics rather than c99. This prevents multiple symbol definition errors
- * of extern inline functions at link time.
- * A lot of inline functions can cause havoc with function tracing.
- */
-#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
-    !defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-	inline __attribute__((always_inline, unused)) notrace __gnu_inline
-#else
-#define inline inline		__attribute__((unused)) notrace __gnu_inline
-#endif
-
-#define __inline__ inline
-#define __inline inline
-#define __always_inline	inline __attribute__((always_inline))
-#define  noinline	__attribute__((noinline))
-
-#define __packed	__attribute__((packed))
-#define __weak		__attribute__((weak))
-#define __alias(symbol)	__attribute__((alias(#symbol)))
-
 #ifdef RETPOLINE
 #define __noretpoline __attribute__((indirect_branch("keep")))
 #endif
@@ -135,55 +93,9 @@
  */
 #define __naked		__attribute__((naked)) noinline __noclone notrace
 
-#define __noreturn	__attribute__((noreturn))
-
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure			__attribute__((pure))
-#define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
-#define __printf(a, b)		__attribute__((format(printf, a, b)))
-#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
-#define __attribute_const__	__attribute__((__const__))
-#define __maybe_unused		__attribute__((unused))
-#define __always_unused		__attribute__((unused))
-#define __mode(x)               __attribute__((mode(x)))
-
-#define __must_check		__attribute__((warn_unused_result))
-#define __malloc		__attribute__((__malloc__))
-
-#define __used			__attribute__((__used__))
-#define __compiler_offsetof(a, b)					\
-	__builtin_offsetof(a, b)
-
-/* Mark functions as cold. gcc will assume any path leading to a call
- * to them will be unlikely.  This means a lot of manual unlikely()s
- * are unnecessary now for any paths leading to the usual suspects
- * like BUG(), printk(), panic() etc. [but let's keep them for now for
- * older compilers]
- *
- * Early snapshots of gcc 4.3 don't support this and we can't detect this
- * in the preprocessor, but we can live with this because they're unreleased.
- * Maketime probing would be overkill here.
- *
- * gcc also has a __attribute__((__hot__)) to move hot functions into
- * a special section, but I don't see any sense in this right now in
- * the kernel context
- */
-#define __cold			__attribute__((__cold__))
-
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 #define __optimize(level)	__attribute__((__optimize__(level)))
-#define __nostackprotector	__optimize("no-stack-protector")
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 547cdc920a3c..4c7f9befa9f6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -14,10 +14,6 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
-#undef barrier
-#undef barrier_data
-#undef RELOC_HIDE
-#undef OPTIMIZER_HIDE_VAR
 
 #define barrier() __memory_barrier()
 #define barrier_data(ptr) barrier()
@@ -38,13 +34,12 @@
 
 #endif
 
-#ifndef __HAVE_BUILTIN_BSWAP16__
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-#endif
 
-/*
- * icc defines __GNUC__, but does not implement the builtin overflow checkers.
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like clang.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+#define __visible	__attribute__((externally_visible))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index fbf337933fd8..90479a0f3986 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -54,32 +54,20 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
-#ifdef __GNUC__
-#include <linux/compiler-gcc.h>
-#endif
-
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
-#define notrace __attribute__((hotpatch(0,0)))
-#else
-#define notrace __attribute__((no_instrument_function))
-#endif
-
-/* Intel compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
-#ifdef __INTEL_COMPILER
-# include <linux/compiler-intel.h>
-#endif
-
-/* Clang compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
+/* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
+#elif defined(__INTEL_COMPILER)
+#include <linux/compiler-intel.h>
+#elif defined(__GNUC__)
+/* The above compilers also define __GNUC__, so order is important here. */
+#include <linux/compiler-gcc.h>
+#else
+#error "Unknown compiler"
 #endif
 
 /*
- * Generic compiler-dependent macros required for kernel
+ * Generic compiler-independent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
  * specific implementations come from the above header files
  */
@@ -106,93 +94,19 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-#endif /* __KERNEL__ */
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-
 /* Don't. Just don't. */
 #define __deprecated
 #define __deprecated_for_modules
 
-#ifndef __must_check
-#define __must_check
-#endif
-
-#ifndef CONFIG_ENABLE_MUST_CHECK
-#undef __must_check
-#define __must_check
-#endif
-
-#ifndef __malloc
-#define __malloc
-#endif
-
-/*
- * Allow us to avoid 'defined but not used' warnings on functions and data,
- * as well as force them to be emitted to the assembly file.
- *
- * As of gcc 3.4, static functions that are not marked with attribute((used))
- * may be elided from the assembly file.  As of gcc 3.4, static data not so
- * marked will not be elided, but this may change in a future gcc version.
- *
- * NOTE: Because distributions shipped with a backported unit-at-a-time
- * compiler in gcc 3.3, we must define __used to be __attribute__((used))
- * for gcc >=3.3 instead of 3.4.
- *
- * In prior versions of gcc, such functions and data would be emitted, but
- * would be warned about except with attribute((unused)).
- *
- * Mark functions that are referenced only in inline assembly as __used so
- * the code is emitted even though it appears to be unreferenced.
- */
-#ifndef __used
-# define __used			/* unimplemented */
-#endif
-
-#ifndef __maybe_unused
-# define __maybe_unused		/* unimplemented */
-#endif
-
-#ifndef __always_unused
-# define __always_unused	/* unimplemented */
-#endif
-
-#ifndef noinline
-#define noinline
-#endif
-
-/*
- * Rather then using noinline to prevent stack consumption, use
- * noinline_for_stack instead.  For documentation reasons.
- */
-#define noinline_for_stack noinline
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
 #endif /* __KERNEL__ */
 
+#endif /* __ASSEMBLY__ */
+
 /*
- * From the GCC manual:
- *
- * Many functions do not examine any values except their arguments,
- * and have no effects except the return value.  Basically this is
- * just slightly more strict class than the `pure' attribute above,
- * since function is not allowed to read global memory.
- *
- * Note that a function that has pointer arguments and examines the
- * data pointed to must _not_ be declared `const'.  Likewise, a
- * function that calls a non-`const' function usually must not be
- * `const'.  It does not make sense for a `const' function to return
- * `void'.
+ * The below symbols may be defined for one or more, but not ALL, of the above
+ * compilers. We don't consider that to be an error, so set them to nothing.
+ * For example, some of them are for compiler specific plugins.
  */
-#ifndef __attribute_const__
-# define __attribute_const__	/* unimplemented */
-#endif
-
 #ifndef __designated_init
 # define __designated_init
 #endif
@@ -214,28 +128,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-/*
- * Tell gcc if a function is cold. The compiler will assume any path
- * directly leading to the call is unlikely.
- */
-
-#ifndef __cold
-#define __cold
-#endif
-
-/* Simple shorthand for a section definition */
-#ifndef __section
-# define __section(S) __attribute__ ((__section__(#S)))
-#endif
-
 #ifndef __visible
 #define __visible
 #endif
 
-#ifndef __nostackprotector
-# define __nostackprotector
-#endif
-
 /*
  * Assume alignment of return value.
  */
@@ -243,17 +139,23 @@ struct ftrace_likely_data {
 #define __assume_aligned(a, ...)
 #endif
 
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
-#ifndef __same_type
-# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-#endif
+#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
 /* Is this type a native word size -- useful for atomic operations */
-#ifndef __native_word
-# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#define __native_word(t) \
+	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
+	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+
+#ifndef __attribute_const__
+#define __attribute_const__	__attribute__((__const__))
 #endif
 
+#ifndef __noclone
+#define __noclone
+#endif
+
+/* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
 #endif
@@ -272,4 +174,92 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
 	__diag_ ## compiler(version, error, option)
 
+/*
+ * From the GCC manual:
+ *
+ * Many functions have no effects except the return value and their
+ * return value depends only on the parameters and/or global
+ * variables.  Such a function can be subject to common subexpression
+ * elimination and loop optimization just as an arithmetic operator
+ * would be.
+ * [...]
+ */
+#define __pure			__attribute__((pure))
+#define __aligned(x)		__attribute__((aligned(x)))
+#define __aligned_largest	__attribute__((aligned))
+#define __printf(a, b)		__attribute__((format(printf, a, b)))
+#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
+#define __maybe_unused		__attribute__((unused))
+#define __always_unused		__attribute__((unused))
+#define __mode(x)		__attribute__((mode(x)))
+#define __malloc		__attribute__((__malloc__))
+#define __used			__attribute__((__used__))
+#define __noreturn		__attribute__((noreturn))
+#define __packed		__attribute__((packed))
+#define __weak			__attribute__((weak))
+#define __alias(symbol)		__attribute__((alias(#symbol)))
+#define __cold			__attribute__((cold))
+#define __section(S)		__attribute__((__section__(#S)))
+
+
+#ifdef CONFIG_ENABLE_MUST_CHECK
+#define __must_check		__attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
+#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#define notrace			__attribute__((hotpatch(0, 0)))
+#else
+#define notrace			__attribute__((no_instrument_function))
+#endif
+
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
+ * Force always-inline if the user requests it so via the .config.
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well by using "unused"
+ * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
+ */
+#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
+	!defined(CONFIG_OPTIMIZE_INLINING)
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
+#else
+#define inline inline	__attribute__((unused)) notrace __gnu_inline
+#endif
+
+#define __inline__ inline
+#define __inline inline
+#define noinline	__attribute__((noinline))
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
+/*
+ * Rather then using noinline to prevent stack consumption, use
+ * noinline_for_stack instead.  For documentation reasons.
+ */
+#define noinline_for_stack noinline
+
 #endif /* __LINUX_COMPILER_TYPES_H */
-- 
cgit v1.2.3


From 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files

Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag.  The purpose
is to make data spoofing attacks harder.  This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection.  This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.

This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:

CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489

This list is not meant to be complete.  It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector.  In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.

[s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter]
  Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
  Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com
[keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook@chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Solar Designer <solar@openwall.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5710541183b..33322702c910 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,6 +74,8 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern int sysctl_protected_fifos;
+extern int sysctl_protected_regular;
 
 typedef __kernel_rwf_t rwf_t;
 
-- 
cgit v1.2.3


From d4ae9916ea2947341180d2b538f48875ff393a86 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 23 Aug 2018 17:00:42 -0700
Subject: mm: soft-offline: close the race against page allocation

A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.  This is
undesirable because soft-offline (which is about corrected error) is
less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good
reason like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

[akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment]
Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zy.zhengyi@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  5 +++++
 include/linux/swapops.h    | 10 ----------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 901943e4754b..74bee8cecf4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 1d3877c39a00..568a3553d918 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
@@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
 	return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
-- 
cgit v1.2.3


From 263fade51f7bdd5ad7ebbfe82113a44c5ea4c36c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 23 Aug 2018 17:01:15 -0700
Subject: docs/mm: make GFP flags descriptions usable as kernel-doc

This patch adds DOC: headings for GFP flag descriptions and adjusts the
formatting to fit sphinx expectations of paragraphs.

Link: http://lkml.kernel.org/r/1532626360-16650-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 291 +++++++++++++++++++++++++++-------------------------
 1 file changed, 154 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a6afcec53795..24bcc5eec6b4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -59,29 +59,32 @@ struct vm_area_struct;
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 
-/*
+/**
+ * DOC: Page mobility and placement hints
+ *
  * Page mobility and placement hints
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  * These flags provide hints about how mobile the page is. Pages with similar
  * mobility are placed within the same pageblocks to minimise problems due
  * to external fragmentation.
  *
- * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
- *   moved by page migration during memory compaction or can be reclaimed.
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
  *
- * __GFP_RECLAIMABLE is used for slab allocations that specify
- *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
  *
- * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
- *   these pages will be spread between local zones to avoid all the dirty
- *   pages being in one zone (fair zone allocation policy).
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
  *
- * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
  *
- * __GFP_THISNODE forces the allocation to be satisified from the requested
- *   node with no fallbacks or placement policy enforcements.
+ * %__GFP_THISNODE forces the allocation to be satisified from the requested
+ * node with no fallbacks or placement policy enforcements.
  *
- * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
@@ -89,54 +92,60 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
 #define __GFP_ACCOUNT	((__force gfp_t)___GFP_ACCOUNT)
 
-/*
+/**
+ * DOC: Watermark modifiers
+ *
  * Watermark modifiers -- controls access to emergency reserves
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
- * __GFP_HIGH indicates that the caller is high-priority and that granting
- *   the request is necessary before the system can make forward progress.
- *   For example, creating an IO context to clean pages.
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example, creating an IO context to clean pages.
  *
- * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
- *   high priority. Users are typically interrupt handlers. This may be
- *   used in conjunction with __GFP_HIGH
+ * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ * high priority. Users are typically interrupt handlers. This may be
+ * used in conjunction with %__GFP_HIGH
  *
- * __GFP_MEMALLOC allows access to all memory. This should only be used when
- *   the caller guarantees the allocation will allow more memory to be freed
- *   very shortly e.g. process exiting or swapping. Users either should
- *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
  *
- * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
- *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
  */
 #define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
 #define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
 #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
 
-/*
+/**
+ * DOC: Reclaim modifiers
+ *
  * Reclaim modifiers
+ * ~~~~~~~~~~~~~~~~~
  *
- * __GFP_IO can start physical IO.
+ * %__GFP_IO can start physical IO.
  *
- * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
- *   allocator recursing into the filesystem which might already be holding
- *   locks.
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
  *
- * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
- *   This flag can be cleared to avoid unnecessary delays when a fallback
- *   option is available.
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
  *
- * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
- *   the low watermark is reached and have it reclaim pages until the high
- *   watermark is reached. A caller may wish to clear this flag when fallback
- *   options are available and the reclaim is likely to disrupt the system. The
- *   canonical example is THP allocation where a fallback is cheap but
- *   reclaim/compaction may cause indirect stalls.
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
  *
- * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
  * The default allocator behavior depends on the request size. We have a concept
- * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
  * !costly allocations are too essential to fail so they are implicitly
  * non-failing by default (with some exceptions like OOM victims might fail so
  * the caller still has to check for failures) while costly requests try to be
@@ -144,40 +153,40 @@ struct vm_area_struct;
  * The following three modifiers might be used to override some of these
  * implicit rules
  *
- * __GFP_NORETRY: The VM implementation will try only very lightweight
- *   memory direct reclaim to get some memory under memory pressure (thus
- *   it can sleep). It will avoid disruptive actions like OOM killer. The
- *   caller must handle the failure which is quite likely to happen under
- *   heavy memory pressure. The flag is suitable when failure can easily be
- *   handled at small cost, such as reduced throughput
- *
- * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
- *   procedures that have previously failed if there is some indication
- *   that progress has been made else where.  It can wait for other
- *   tasks to attempt high level approaches to freeing memory such as
- *   compaction (which removes fragmentation) and page-out.
- *   There is still a definite limit to the number of retries, but it is
- *   a larger limit than with __GFP_NORETRY.
- *   Allocations with this flag may fail, but only when there is
- *   genuinely little unused memory. While these allocations do not
- *   directly trigger the OOM killer, their failure indicates that
- *   the system is likely to need to use the OOM killer soon.  The
- *   caller must handle failure, but can reasonably do so by failing
- *   a higher-level request, or completing it only in a much less
- *   efficient manner.
- *   If the allocation does fail, and the caller is in a position to
- *   free some non-essential memory, doing so could benefit the system
- *   as a whole.
- *
- * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. The allocation could block
- *   indefinitely but will never return with failure. Testing for
- *   failure is pointless.
- *   New users should be evaluated carefully (and the flag should be
- *   used only when there is no reasonable failure policy) but it is
- *   definitely preferable to use the flag rather than opencode endless
- *   loop around allocator.
- *   Using this flag for costly allocations is _highly_ discouraged.
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made else where.  It can wait for other
+ * tasks to attempt high level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon.  The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
@@ -188,14 +197,17 @@ struct vm_area_struct;
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
-/*
+/**
+ * DOC: Action modifiers
+ *
  * Action modifiers
+ * ~~~~~~~~~~~~~~~~
  *
- * __GFP_NOWARN suppresses allocation failure reports.
+ * %__GFP_NOWARN suppresses allocation failure reports.
  *
- * __GFP_COMP address compound page metadata.
+ * %__GFP_COMP address compound page metadata.
  *
- * __GFP_ZERO returns a zeroed page on success.
+ * %__GFP_ZERO returns a zeroed page on success.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
@@ -208,66 +220,71 @@ struct vm_area_struct;
 #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/*
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
  * Useful GFP flag combinations that are commonly used. It is recommended
  * that subsystems start with one of these combinations and then set/clear
- * __GFP_FOO flags as necessary.
- *
- * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
- *   watermark is applied to allow access to "atomic reserves"
- *
- * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
- *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
- *
- * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
- *   accounted to kmemcg.
- *
- * GFP_NOWAIT is for kernel allocations that should not stall for direct
- *   reclaim, start physical IO or use any filesystem callback.
- *
- * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
- *   that do not require the starting of any physical IO.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_noio_{save,restore} to mark the whole scope which cannot
- *   perform any IO with a short explanation why. All allocation requests
- *   will inherit GFP_NOIO implicitly.
- *
- * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
- *   recurse into the FS layer with a short explanation why. All allocation
- *   requests will inherit GFP_NOFS implicitly.
- *
- * GFP_USER is for userspace allocations that also need to be directly
- *   accessibly by the kernel or hardware. It is typically used by hardware
- *   for buffers that are mapped to userspace (e.g. graphics) that hardware
- *   still must DMA to. cpuset limits are enforced for these allocations.
- *
- * GFP_DMA exists for historical reasons and should be avoided where possible.
- *   The flags indicates that the caller requires that the lowest zone be
- *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
- *   it would require careful auditing as some users really require it and
- *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
- *   lowest zone as a type of emergency reserve.
- *
- * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
- *   address.
- *
- * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
- *   do not need to be directly accessible by the kernel but that cannot
- *   move once in use. An example may be a hardware allocation that maps
- *   data directly into userspace but has no addressing limitations.
- *
- * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
- *   need direct access to but can use kmap() when access is required. They
- *   are expected to be movable via page reclaim or page migration. Typically,
- *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
- *
- * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
- *   compound allocations that will generally fail quickly if memory is not
- *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
- *   version does not attempt reclaim/compaction at all and is by default used
- *   in page fault path, while the non-light is used by khugepaged.
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves"
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address.
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
-- 
cgit v1.2.3


From 2b7403035459c75e193c6b04a293e518a4212de0 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 23 Aug 2018 17:01:36 -0700
Subject: mm: Change return type int to vm_fault_t for fault handlers

Use new return type vm_fault_t for fault handler.  For now, this is just
documenting that the function returns a VM_FAULT value rather than an
errno.  Once all instances are converted, vm_fault_t will become a
distinct type.

Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t")

The aim is to change the return type of finish_fault() and
handle_mm_fault() to vm_fault_t type.  As part of that clean up return
type of all other recursively called functions have been changed to
vm_fault_t type.

The places from where handle_mm_fault() is getting invoked will be
change to vm_fault_t type but in a separate patch.

vmf_error() is the newly introduce inline function in 4.17-rc6.

[akpm@linux-foundation.org: don't shadow outer local `ret' in __do_huge_pmd_anonymous_page()]
Link: http://lkml.kernel.org/r/20180604171727.GA20279@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h       |  9 +++++----
 include/linux/hugetlb.h       |  2 +-
 include/linux/mm.h            | 14 +++++++-------
 include/linux/oom.h           |  2 +-
 include/linux/swapops.h       |  5 +++--
 include/linux/userfaultfd_k.h |  5 +++--
 6 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..27e3e32135a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,7 +6,7 @@
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
-extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
@@ -23,7 +23,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 }
 #endif
 
-extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pmd_t *pmd,
@@ -216,7 +216,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t *pud, int flags);
 
-extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
 
@@ -321,7 +321,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 	return NULL;
 }
 
-static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
+		pmd_t orig_pmd)
 {
 	return 0;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c39d9170a8a0..6b68e345f0ca 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -105,7 +105,7 @@ void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
 void hugetlb_show_meminfo(void);
 unsigned long hugetlb_total_pages(void);
-int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
 int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 				struct vm_area_struct *dst_vma,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a9e733b5fb76..8fcc36660de6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -728,10 +728,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
-int finish_fault(struct vm_fault *vmf);
-int finish_mkwrite_fault(struct vm_fault *vmf);
+vm_fault_t finish_fault(struct vm_fault *vmf);
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
 #endif
 
 /*
@@ -1403,8 +1403,8 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
-extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags);
+extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
+			unsigned long address, unsigned int flags);
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
@@ -1413,7 +1413,7 @@ void unmap_mapping_pages(struct address_space *mapping,
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 #else
-static inline int handle_mm_fault(struct vm_area_struct *vma,
+static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
 {
 	/* should never happen if there's no MMU */
@@ -2563,7 +2563,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
 
-static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
 	if (vm_fault & VM_FAULT_OOM)
 		return -ENOMEM;
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 92f70e4c6252..69864a547663 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -88,7 +88,7 @@ static inline bool mm_is_oom_victim(struct mm_struct *mm)
  *
  * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
  */
-static inline int check_stable_address_space(struct mm_struct *mm)
+static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
 {
 	if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
 		return VM_FAULT_SIGBUS;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 568a3553d918..22af9d8a84ae 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -4,6 +4,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/bug.h>
+#include <linux/mm_types.h>
 
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
@@ -134,7 +135,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return pfn_to_page(swp_offset(entry));
 }
 
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
 		       swp_entry_t entry,
 		       unsigned int flags,
@@ -169,7 +170,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return NULL;
 }
 
-static inline int device_private_entry_fault(struct vm_area_struct *vma,
+static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 				     unsigned long addr,
 				     swp_entry_t entry,
 				     unsigned int flags,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e091f0a11b11..37c9eba75c98 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,7 +28,7 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
-extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
+extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len,
@@ -77,7 +77,8 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
+				unsigned long reason)
 {
 	return VM_FAULT_SIGBUS;
 }
-- 
cgit v1.2.3


From 0c36dd37d5b80b43f4e3dc5a1dbfe6dbd86e8f2a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 21 Aug 2018 17:02:40 +0200
Subject: i2c: remove deprecated attach_adapter callback

There aren't any users left. Remove this callback from the 2.4 times.
Phew, finally, that took years to reach...

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 36f357ecdf67..b79387fd57da 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -231,7 +231,6 @@ enum i2c_alert_protocol {
 /**
  * struct i2c_driver - represent an I2C device driver
  * @class: What kind of i2c device we instantiate (for detect)
- * @attach_adapter: Callback for bus addition (deprecated)
  * @probe: Callback for device binding - soon to be deprecated
  * @probe_new: New callback for device binding
  * @remove: Callback for device unbinding
@@ -268,11 +267,6 @@ enum i2c_alert_protocol {
 struct i2c_driver {
 	unsigned int class;
 
-	/* Notifies the driver that a new bus has appeared. You should avoid
-	 * using this, it will be removed in a near future.
-	 */
-	int (*attach_adapter)(struct i2c_adapter *) __deprecated;
-
 	/* Standard driver model interfaces */
 	int (*probe)(struct i2c_client *, const struct i2c_device_id *);
 	int (*remove)(struct i2c_client *);
-- 
cgit v1.2.3


From 5d3a01a228c703dad971cad11f14c0512c4c2713 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <peter@korsgaard.com>
Date: Wed, 22 Aug 2018 18:16:07 +0200
Subject: i2c: ocores: update my email address

The old @sunsite.dk address is no longer active, so update the references.

Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/platform_data/i2c-ocores.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/i2c-ocores.h b/include/linux/platform_data/i2c-ocores.h
index 01edd96fe1f7..113d6b12f650 100644
--- a/include/linux/platform_data/i2c-ocores.h
+++ b/include/linux/platform_data/i2c-ocores.h
@@ -1,7 +1,7 @@
 /*
  * i2c-ocores.h - definitions for the i2c-ocores interface
  *
- * Peter Korsgaard <jacmet@sunsite.dk>
+ * Peter Korsgaard <peter@korsgaard.com>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2.  This program is licensed "as is" without any warranty of any
-- 
cgit v1.2.3


From 3ad867001c91657c46dcf6656d52eb6080286fd5 Mon Sep 17 00:00:00 2001
From: Lothar Felten <lothar.felten@gmail.com>
Date: Tue, 14 Aug 2018 09:09:37 +0200
Subject: hwmon: (ina2xx) fix sysfs shunt resistor read access

fix the sysfs shunt resistor read access: return the shunt resistor
value, not the calibration register contents.

update email address

Signed-off-by: Lothar Felten <lothar.felten@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/platform_data/ina2xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ina2xx.h b/include/linux/platform_data/ina2xx.h
index 9abc0ca7259b..9f0aa1b48c78 100644
--- a/include/linux/platform_data/ina2xx.h
+++ b/include/linux/platform_data/ina2xx.h
@@ -1,7 +1,7 @@
 /*
  * Driver for Texas Instruments INA219, INA226 power monitor chips
  *
- * Copyright (C) 2012 Lothar Felten <l-felten@ti.com>
+ * Copyright (C) 2012 Lothar Felten <lothar.felten@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
-- 
cgit v1.2.3


From 1d8f574708a3fb6f18c85486d0c5217df893c0cf Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 24 Aug 2018 15:08:29 +0100
Subject: arm/arm64: smccc-1.1: Make return values unsigned long

An unfortunate consequence of having a strong typing for the input
values to the SMC call is that it also affects the type of the
return values, limiting r0 to 32 bits and r{1,2,3} to whatever
was passed as an input.

Let's turn everything into "unsigned long", which satisfies the
requirements of both architectures, and allows for the full
range of return values.

Reported-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/arm-smccc.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index ca1d2cc2cdfa..5a91ff33720b 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -199,31 +199,31 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 
 #define __declare_arg_0(a0, res)					\
 	struct arm_smccc_res   *___res = res;				\
-	register u32           r0 asm("r0") = a0;			\
+	register unsigned long r0 asm("r0") = (u32)a0;			\
 	register unsigned long r1 asm("r1");				\
 	register unsigned long r2 asm("r2");				\
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_1(a0, a1, res)					\
 	struct arm_smccc_res   *___res = res;				\
-	register u32           r0 asm("r0") = a0;			\
-	register typeof(a1)    r1 asm("r1") = a1;			\
+	register unsigned long r0 asm("r0") = (u32)a0;			\
+	register unsigned long r1 asm("r1") = a1;			\
 	register unsigned long r2 asm("r2");				\
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_2(a0, a1, a2, res)				\
 	struct arm_smccc_res   *___res = res;				\
-	register u32           r0 asm("r0") = a0;			\
-	register typeof(a1)    r1 asm("r1") = a1;			\
-	register typeof(a2)    r2 asm("r2") = a2;			\
+	register unsigned long r0 asm("r0") = (u32)a0;			\
+	register unsigned long r1 asm("r1") = a1;			\
+	register unsigned long r2 asm("r2") = a2;			\
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_3(a0, a1, a2, a3, res)				\
 	struct arm_smccc_res   *___res = res;				\
-	register u32           r0 asm("r0") = a0;			\
-	register typeof(a1)    r1 asm("r1") = a1;			\
-	register typeof(a2)    r2 asm("r2") = a2;			\
-	register typeof(a3)    r3 asm("r3") = a3
+	register unsigned long r0 asm("r0") = (u32)a0;			\
+	register unsigned long r1 asm("r1") = a1;			\
+	register unsigned long r2 asm("r2") = a2;			\
+	register unsigned long r3 asm("r3") = a3
 
 #define __declare_arg_4(a0, a1, a2, a3, a4, res)			\
 	__declare_arg_3(a0, a1, a2, a3, res);				\
-- 
cgit v1.2.3


From 36156f9241cb0f9e37d998052873ca7501ad4b36 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 27 Aug 2018 10:21:45 +0200
Subject: of: add helper to lookup compatible child node

Add of_get_compatible_child() helper that can be used to lookup
compatible child nodes.

Several drivers currently use of_find_compatible_node() to lookup child
nodes while failing to notice that the of_find_ functions search the
entire tree depth-first (from a given start node) and therefore can
match unrelated nodes. The fact that these functions also drop a
reference to the node they start searching from (e.g. the parent node)
is typically also overlooked, something which can lead to use-after-free
bugs.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 4d25e4f952d9..b99a1a8c2952 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -290,6 +290,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
 extern struct device_node *of_get_next_available_child(
 	const struct device_node *node, struct device_node *prev);
 
+extern struct device_node *of_get_compatible_child(const struct device_node *parent,
+					const char *compatible);
 extern struct device_node *of_get_child_by_name(const struct device_node *node,
 					const char *name);
 
@@ -632,6 +634,12 @@ static inline bool of_have_populated_dt(void)
 	return false;
 }
 
+static inline struct device_node *of_get_compatible_child(const struct device_node *parent,
+					const char *compatible)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_get_child_by_name(
 					const struct device_node *node,
 					const char *name)
-- 
cgit v1.2.3


From 755a8bf5579d22eb5636685c516d8dede799e27b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 24 Aug 2018 15:08:30 +0100
Subject: arm/arm64: smccc-1.1: Handle function result as parameters

If someone has the silly idea to write something along those lines:

	extern u64 foo(void);

	void bar(struct arm_smccc_res *res)
	{
		arm_smccc_1_1_smc(0xbad, foo(), res);
	}

they are in for a surprise, as this gets compiled as:

	0000000000000588 <bar>:
	 588:   a9be7bfd        stp     x29, x30, [sp, #-32]!
	 58c:   910003fd        mov     x29, sp
	 590:   f9000bf3        str     x19, [sp, #16]
	 594:   aa0003f3        mov     x19, x0
	 598:   aa1e03e0        mov     x0, x30
	 59c:   94000000        bl      0 <_mcount>
	 5a0:   94000000        bl      0 <foo>
	 5a4:   aa0003e1        mov     x1, x0
	 5a8:   d4000003        smc     #0x0
	 5ac:   b4000073        cbz     x19, 5b8 <bar+0x30>
	 5b0:   a9000660        stp     x0, x1, [x19]
	 5b4:   a9010e62        stp     x2, x3, [x19, #16]
	 5b8:   f9400bf3        ldr     x19, [sp, #16]
	 5bc:   a8c27bfd        ldp     x29, x30, [sp], #32
	 5c0:   d65f03c0        ret
	 5c4:   d503201f        nop

The call to foo "overwrites" the x0 register for the return value,
and we end up calling the wrong secure service.

A solution is to evaluate all the parameters before assigning
anything to specific registers, leading to the expected result:

	0000000000000588 <bar>:
	 588:   a9be7bfd        stp     x29, x30, [sp, #-32]!
	 58c:   910003fd        mov     x29, sp
	 590:   f9000bf3        str     x19, [sp, #16]
	 594:   aa0003f3        mov     x19, x0
	 598:   aa1e03e0        mov     x0, x30
	 59c:   94000000        bl      0 <_mcount>
	 5a0:   94000000        bl      0 <foo>
	 5a4:   aa0003e1        mov     x1, x0
	 5a8:   d28175a0        mov     x0, #0xbad
	 5ac:   d4000003        smc     #0x0
	 5b0:   b4000073        cbz     x19, 5bc <bar+0x34>
	 5b4:   a9000660        stp     x0, x1, [x19]
	 5b8:   a9010e62        stp     x2, x3, [x19, #16]
	 5bc:   f9400bf3        ldr     x19, [sp, #16]
	 5c0:   a8c27bfd        ldp     x29, x30, [sp], #32
	 5c4:   d65f03c0        ret

Reported-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/arm-smccc.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 5a91ff33720b..18863d56273c 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -205,41 +205,51 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_1(a0, a1, res)					\
+	typeof(a1) __a1 = a1;						\
 	struct arm_smccc_res   *___res = res;				\
 	register unsigned long r0 asm("r0") = (u32)a0;			\
-	register unsigned long r1 asm("r1") = a1;			\
+	register unsigned long r1 asm("r1") = __a1;			\
 	register unsigned long r2 asm("r2");				\
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_2(a0, a1, a2, res)				\
+	typeof(a1) __a1 = a1;						\
+	typeof(a2) __a2 = a2;						\
 	struct arm_smccc_res   *___res = res;				\
 	register unsigned long r0 asm("r0") = (u32)a0;			\
-	register unsigned long r1 asm("r1") = a1;			\
-	register unsigned long r2 asm("r2") = a2;			\
+	register unsigned long r1 asm("r1") = __a1;			\
+	register unsigned long r2 asm("r2") = __a2;			\
 	register unsigned long r3 asm("r3")
 
 #define __declare_arg_3(a0, a1, a2, a3, res)				\
+	typeof(a1) __a1 = a1;						\
+	typeof(a2) __a2 = a2;						\
+	typeof(a3) __a3 = a3;						\
 	struct arm_smccc_res   *___res = res;				\
 	register unsigned long r0 asm("r0") = (u32)a0;			\
-	register unsigned long r1 asm("r1") = a1;			\
-	register unsigned long r2 asm("r2") = a2;			\
-	register unsigned long r3 asm("r3") = a3
+	register unsigned long r1 asm("r1") = __a1;			\
+	register unsigned long r2 asm("r2") = __a2;			\
+	register unsigned long r3 asm("r3") = __a3
 
 #define __declare_arg_4(a0, a1, a2, a3, a4, res)			\
+	typeof(a4) __a4 = a4;						\
 	__declare_arg_3(a0, a1, a2, a3, res);				\
-	register typeof(a4) r4 asm("r4") = a4
+	register unsigned long r4 asm("r4") = __a4
 
 #define __declare_arg_5(a0, a1, a2, a3, a4, a5, res)			\
+	typeof(a5) __a5 = a5;						\
 	__declare_arg_4(a0, a1, a2, a3, a4, res);			\
-	register typeof(a5) r5 asm("r5") = a5
+	register unsigned long r5 asm("r5") = __a5
 
 #define __declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res)		\
+	typeof(a6) __a6 = a6;						\
 	__declare_arg_5(a0, a1, a2, a3, a4, a5, res);			\
-	register typeof(a6) r6 asm("r6") = a6
+	register unsigned long r6 asm("r6") = __a6
 
 #define __declare_arg_7(a0, a1, a2, a3, a4, a5, a6, a7, res)		\
+	typeof(a7) __a7 = a7;						\
 	__declare_arg_6(a0, a1, a2, a3, a4, a5, a6, res);		\
-	register typeof(a7) r7 asm("r7") = a7
+	register unsigned long r7 asm("r7") = __a7
 
 #define ___declare_args(count, ...) __declare_arg_ ## count(__VA_ARGS__)
 #define __declare_args(count, ...)  ___declare_args(count, __VA_ARGS__)
-- 
cgit v1.2.3


From f42b0e18f2e5cf34f73ef1b6327b49040b307a33 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 27 Aug 2018 07:50:47 -0500
Subject: of: add node name compare helper functions

In preparation to remove device_node.name pointer, add helper functions
for node name comparisons which are a common pattern throughout the kernel.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index b99a1a8c2952..688c52dd7b3e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -256,6 +256,9 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
 
+extern bool of_node_name_eq(const struct device_node *np, const char *name);
+extern bool of_node_name_prefix(const struct device_node *np, const char *prefix);
+
 static inline const char *of_node_full_name(const struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
@@ -563,6 +566,16 @@ static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode)
 	return NULL;
 }
 
+static inline bool of_node_name_eq(const struct device_node *np, const char *name)
+{
+	return false;
+}
+
+static inline bool of_node_name_prefix(const struct device_node *np, const char *prefix)
+{
+	return false;
+}
+
 static inline const char* of_node_full_name(const struct device_node *np)
 {
 	return "<no-node>";
-- 
cgit v1.2.3


From 82fe39a6bc7b866fc3ffd838e3c5a4cadb328b04 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 24 Aug 2018 16:52:44 +0200
Subject: i2c: refactor function to release a DMA safe buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

a) rename to 'put' instead of 'release' to match 'get' when obtaining
   the buffer
b) change the argument order to have the buffer as first argument
c) add a new argument telling the function if the message was
   transferred. This allows the function to be used also in cases
   where setting up DMA failed, so the buffer needs to be freed without
   syncing to the message buffer.

Also convert the only user.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b79387fd57da..65b4eaed1d96 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -855,7 +855,7 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
 }
 
 u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
-void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf);
+void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred);
 
 int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
 /**
-- 
cgit v1.2.3


From 0413bedabc886c3a56804d1c80a58e99077b1d91 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 28 Aug 2018 15:10:48 -0500
Subject: of: Add device_type access helper functions

In preparation to remove direct access to device_node.type, add
of_node_is_type() and of_node_get_device_type() helpers to check and
retrieve the device type.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 688c52dd7b3e..99b0ebf49632 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -988,6 +988,18 @@ static inline struct device_node *of_find_matching_node(
 	return of_find_matching_node_and_match(from, matches, NULL);
 }
 
+static inline const char *of_node_get_device_type(const struct device_node *np)
+{
+	return of_get_property(np, "type", NULL);
+}
+
+static inline bool of_node_is_type(const struct device_node *np, const char *type)
+{
+	const char *match = of_node_get_device_type(np);
+
+	return np && match && type && !strcmp(match, type);
+}
+
 /**
  * of_property_count_u8_elems - Count the number of u8 elements in a property
  *
-- 
cgit v1.2.3


From 6b06546206868f723f2061d703a3c3c378dcbf4c Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:42 -0400
Subject: Revert "blk-throttle: fix race between blkcg_bio_issue_check() and
 cgroup_rmdir()"

This reverts commit 4c6994806f708559c2812b73501406e21ae5dcd0.

Destroying blkgs is tricky because of the nature of the relationship. A
blkg should go away when either a blkcg or a request_queue goes away.
However, blkg's pin the blkcg to ensure they remain valid. To break this
cycle, when a blkcg is offlined, blkgs put back their css ref. This
eventually lets css_free() get called which frees the blkcg.

The above commit (4c6994806f70) breaks this order of events by trying to
destroy blkgs in css_free(). As the blkgs still hold references to the
blkcg, css_free() is never called.

The race between blkcg_bio_issue_check() and cgroup_rmdir() will be
addressed in the following patch by delaying destruction of a blkg until
all writeback associated with the blkcg has been finished.

Fixes: 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 34aec30e06c7..1615cdd4c797 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -89,7 +89,6 @@ struct blkg_policy_data {
 	/* the blkg and policy id this per-policy data belongs to */
 	struct blkcg_gq			*blkg;
 	int				plid;
-	bool				offline;
 };
 
 /*
-- 
cgit v1.2.3


From 59b57717fff8b562825d9d25e0180ad7e8048ca9 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:43 -0400
Subject: blkcg: delay blkg destruction until after writeback has finished

Currently, blkcg destruction relies on a sequence of events:
  1. Destruction starts. blkcg_css_offline() is called and blkgs
     release their reference to the blkcg. This immediately destroys
     the cgwbs (writeback).
  2. With blkgs giving up their reference, the blkcg ref count should
     become zero and eventually call blkcg_css_free() which finally
     frees the blkcg.

Jiufei Xue reported that there is a race between blkcg_bio_issue_check()
and cgroup_rmdir(). To remedy this, blkg destruction becomes contingent
on the completion of all writeback associated with the blkcg. A count of
the number of cgwbs is maintained and once that goes to zero, blkg
destruction can follow. This should prevent premature blkg destruction
related to writeback.

The new process for blkcg cleanup is as follows:
  1. Destruction starts. blkcg_css_offline() is called which offlines
     writeback. Blkg destruction is delayed on the cgwb_refcnt count to
     avoid punting potentially large amounts of outstanding writeback
     to root while maintaining any ongoing policies. Here, the base
     cgwb_refcnt is put back.
  2. When the cgwb_refcnt becomes zero, blkcg_destroy_blkgs() is called
     and handles destruction of blkgs. This is where the css reference
     held by each blkg is released.
  3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
     This finally frees the blkg.

It seems in the past blk-throttle didn't do the most understandable
things with taking data from a blkg while associating with current. So,
the simplification and unification of what blk-throttle is doing caused
this.

Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1615cdd4c797..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
 	struct list_head		all_blkcgs_node;
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head		cgwb_list;
+	refcount_t			cgwb_refcnt;
 #endif
 };
 
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 	return cpd ? cpd->blkcg : NULL;
 }
 
+extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ */
+static inline void blkcg_cgwb_get(struct blkcg *blkcg)
+{
+	refcount_inc(&blkcg->cgwb_refcnt);
+}
+
+/**
+ * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ * When this count goes to zero, all active wb has finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
+ * workqueue.
+ */
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
+		blkcg_destroy_blkgs(blkcg);
+}
+
+#else
+
+static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
+
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	/* wb isn't being accounted, so trigger destruction right away */
+	blkcg_destroy_blkgs(blkcg);
+}
+
+#endif
+
 /**
  * blkg_path - format cgroup path of blkg
  * @blkg: blkg of interest
-- 
cgit v1.2.3


From c43c5e9f524ec914e7e202f9c5ab91779629ccc6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 3 Sep 2018 10:15:33 +0200
Subject: timekeeping: Fix declaration of
 read_persistent_wall_and_boot_offset()

It is read_persistent_wall_and_boot_offset() and not
read_persistent_clock_and_boot_offset()

Fixes: 3eca993740b8eb40f51 ("timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()")
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Link: https://lkml.kernel.org/r/20180903081533.34366-1-borntraeger@de.ibm.com
---
 include/linux/timekeeping.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 5d738804e3d6..a5a3cfc3c2fa 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -258,8 +258,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
 extern int persistent_clock_is_local;
 
 extern void read_persistent_clock64(struct timespec64 *ts);
-void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
-					   struct timespec64 *boot_offset);
+void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
+					  struct timespec64 *boot_offset);
 extern int update_persistent_clock64(struct timespec64 now);
 
 /*
-- 
cgit v1.2.3


From 9fd0e09a4e86499639653243edfcb417a05c5c46 Mon Sep 17 00:00:00 2001
From: Anthony Wong <anthony.wong@ubuntu.com>
Date: Fri, 31 Aug 2018 20:06:42 +0800
Subject: r8169: add support for NCube 8168 network card

This card identifies itself as:
  Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06)
  Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468]

Adding a new entry to rtl8169_pci_tbl makes the card work.

Link: http://launchpad.net/bugs/1788730
Signed-off-by: Anthony Wong <anthony.wong@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 99d366cb0e9f..d157983b84cf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3084,4 +3084,6 @@
 
 #define PCI_VENDOR_ID_OCZ		0x1b85
 
+#define PCI_VENDOR_ID_NCUBE		0x10ff
+
 #endif /* _LINUX_PCI_IDS_H */
-- 
cgit v1.2.3


From 865e63b04e9b2a658d7f26bd13a71dcd964a9118 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 4 Sep 2018 16:26:11 -0400
Subject: tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
 tracepoints

Borislav reported the following splat:

 =============================
 WARNING: suspicious RCU usage
 4.19.0-rc1+ #1 Not tainted
 -----------------------------
 ./include/linux/rcupdate.h:631 rcu_read_lock() used illegally while idle!
 other info that might help us debug this:

 RCU used illegally from idle CPU!
 rcu_scheduler_active = 2, debug_locks = 1
 RCU used illegally from extended quiescent state!
 1 lock held by swapper/0/0:
  #0: 000000004557ee0e (rcu_read_lock){....}, at: perf_event_output_forward+0x0/0x130

 stack backtrace:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0-rc1+ #1
 Hardware name: LENOVO 2320CTO/2320CTO, BIOS G2ET86WW (2.06 ) 11/13/2012
 Call Trace:
  dump_stack+0x85/0xcb
  perf_event_output_forward+0xf6/0x130
  __perf_event_overflow+0x52/0xe0
  perf_swevent_overflow+0x91/0xb0
  perf_tp_event+0x11a/0x350
  ? find_held_lock+0x2d/0x90
  ? __lock_acquire+0x2ce/0x1350
  ? __lock_acquire+0x2ce/0x1350
  ? retint_kernel+0x2d/0x2d
  ? find_held_lock+0x2d/0x90
  ? tick_nohz_get_sleep_length+0x83/0xb0
  ? perf_trace_cpu+0xbb/0xd0
  ? perf_trace_buf_alloc+0x5a/0xa0
  perf_trace_cpu+0xbb/0xd0
  cpuidle_enter_state+0x185/0x340
  do_idle+0x1eb/0x260
  cpu_startup_entry+0x5f/0x70
  start_kernel+0x49b/0x4a6
  secondary_startup_64+0xa4/0xb0

This is due to the tracepoints moving to SRCU usage which does not require
RCU to be "watching". But perf uses these tracepoints with RCU and expects
it to be. Hence, we still need to add in the rcu_irq_enter/exit_irqson()
calls for "rcuidle" tracepoints. This is a temporary fix until we have SRCU
working in NMI context, and then perf can be converted to use that instead
of normal RCU.

Link: http://lkml.kernel.org/r/20180904162611.6a120068@gandalf.local.home

Cc: x86-ml <x86@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Fixes: e6753f23d961d ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7f2e16e76ac4..041f7e56a289 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -158,8 +158,10 @@ extern void syscall_unregfunc(void);
 		 * For rcuidle callers, use srcu since sched-rcu	\
 		 * doesn't work from the idle path.			\
 		 */							\
-		if (rcuidle)						\
+		if (rcuidle) {						\
 			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			rcu_irq_enter_irqson();				\
+		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
 									\
@@ -171,8 +173,10 @@ extern void syscall_unregfunc(void);
 			} while ((++it_func_ptr)->func);		\
 		}							\
 									\
-		if (rcuidle)						\
+		if (rcuidle) {						\
+			rcu_irq_exit_irqson();				\
 			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+		}							\
 									\
 		preempt_enable_notrace();				\
 	} while (0)
-- 
cgit v1.2.3