From 9e4d59ada4d602e78eee9fb5f898ce61fdddb446 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 16 Dec 2016 18:26:54 +0900
Subject: ASoC: hdmi-codec: use unsigned type to structure members with
 bit-field

This is a fix for Linux 4.10-rc1.

In C language specification, a bit-field is interpreted as a signed or
unsigned integer type consisting of the specified number of bits.

In GCC manual, the range of a signed bit field of N bits is from
-(2^N) / 2 to ((2^N) / 2) - 1
https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Bit-Fields

Therefore, when defined as 1 bit-field with signed type, variables can
represents -1 and 0.

The snd-soc-hdmi-codec module includes a structure which has signed type
members with bit-fields. Codes of this module assign 0 and 1 to the
members. This seems to result in implementation-dependent behaviours.

As of v4.10-rc1 merge window, outside of sound subsystem, this structure
is referred by below GPU modules.
 - tda998x
 - sti-drm
 - mediatek-drm-hdmi
 - msm

As long as I review their codes relevant to the structure, the structure
members are used just for condition statements and printk formats.
My proposal of change is a bit intrusive to the printk formats but this
may be acceptable.

Totally, it's reasonable to use unsigned type for the structure members.
This bug is detected by Sparse, static code analyzer with below warnings.

./include/sound/hdmi-codec.h:39:26: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:40:28: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:41:29: error: dubious one-bit signed bitfield
./include/sound/hdmi-codec.h:42:31: error: dubious one-bit signed bitfield

Fixes: 09184118a8ab ("ASoC: hdmi-codec: Add hdmi-codec for external HDMI-encoders")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
CC: stable@vger.kernel.org
---
 include/sound/hdmi-codec.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h
index 530c57bdefa0..915c4357945c 100644
--- a/include/sound/hdmi-codec.h
+++ b/include/sound/hdmi-codec.h
@@ -36,10 +36,10 @@ struct hdmi_codec_daifmt {
 		HDMI_AC97,
 		HDMI_SPDIF,
 	} fmt;
-	int bit_clk_inv:1;
-	int frame_clk_inv:1;
-	int bit_clk_master:1;
-	int frame_clk_master:1;
+	unsigned int bit_clk_inv:1;
+	unsigned int frame_clk_inv:1;
+	unsigned int bit_clk_master:1;
+	unsigned int frame_clk_master:1;
 };
 
 /*
-- 
cgit v1.2.3


From ae7871be189cb41184f1e05742b4a99e2c59774d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 16 Dec 2016 14:28:41 +0100
Subject: swiotlb: Convert swiotlb_force from int to enum

Convert the flag swiotlb_force from an int to an enum, to prepare for
the advent of more possible values.

Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h        |  7 ++++++-
 include/trace/events/swiotlb.h | 16 +++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 183f37c8a5e1..71d104e4c849 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -9,7 +9,12 @@ struct device;
 struct page;
 struct scatterlist;
 
-extern int swiotlb_force;
+enum swiotlb_force {
+	SWIOTLB_NORMAL,		/* Default - depending on HW DMA mask etc. */
+	SWIOTLB_FORCE,		/* swiotlb=force */
+};
+
+extern enum swiotlb_force swiotlb_force;
 
 /*
  * Maximum allowable number of contiguous slabs to map,
diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h
index 7ea4c5e7c448..5e2e30a7efce 100644
--- a/include/trace/events/swiotlb.h
+++ b/include/trace/events/swiotlb.h
@@ -11,16 +11,16 @@ TRACE_EVENT(swiotlb_bounced,
 	TP_PROTO(struct device *dev,
 		 dma_addr_t dev_addr,
 		 size_t size,
-		 int swiotlb_force),
+		 enum swiotlb_force swiotlb_force),
 
 	TP_ARGS(dev, dev_addr, size, swiotlb_force),
 
 	TP_STRUCT__entry(
-		__string(	dev_name,	dev_name(dev)	)
-		__field(	u64,	dma_mask		)
-		__field(	dma_addr_t,	dev_addr	)
-		__field(	size_t,	size			)
-		__field(	int,	swiotlb_force		)
+		__string(	dev_name,	dev_name(dev)		)
+		__field(	u64,	dma_mask			)
+		__field(	dma_addr_t,	dev_addr		)
+		__field(	size_t,	size				)
+		__field(	enum swiotlb_force,	swiotlb_force	)
 	),
 
 	TP_fast_assign(
@@ -37,7 +37,9 @@ TRACE_EVENT(swiotlb_bounced,
 		__entry->dma_mask,
 		(unsigned long long)__entry->dev_addr,
 		__entry->size,
-		__entry->swiotlb_force ? "swiotlb_force" : "" )
+		__print_symbolic(__entry->swiotlb_force,
+			{ SWIOTLB_NORMAL,	"NORMAL" },
+			{ SWIOTLB_FORCE,	"FORCE" }))
 );
 
 #endif /*  _TRACE_SWIOTLB_H */
-- 
cgit v1.2.3


From fff5d99225107f5f13fe4a9805adc2a1c4b5fb00 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 16 Dec 2016 14:28:42 +0100
Subject: swiotlb: Add swiotlb=noforce debug option

On architectures like arm64, swiotlb is tied intimately to the core
architecture DMA support. In addition, ZONE_DMA cannot be disabled.

To aid debugging and catch devices not supporting DMA to memory outside
the 32-bit address space, add a kernel command line option
"swiotlb=noforce", which disables the use of bounce buffers.
If specified, trying to map memory that cannot be used with DMA will
fail, and a rate-limited warning will be printed.

Note that io_tlb_nslabs is set to 1, which is the minimal supported
value.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h        | 1 +
 include/trace/events/swiotlb.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 71d104e4c849..d9c84a9cde3d 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -12,6 +12,7 @@ struct scatterlist;
 enum swiotlb_force {
 	SWIOTLB_NORMAL,		/* Default - depending on HW DMA mask etc. */
 	SWIOTLB_FORCE,		/* swiotlb=force */
+	SWIOTLB_NO_FORCE,	/* swiotlb=noforce */
 };
 
 extern enum swiotlb_force swiotlb_force;
diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h
index 5e2e30a7efce..288c0c54a2b4 100644
--- a/include/trace/events/swiotlb.h
+++ b/include/trace/events/swiotlb.h
@@ -39,7 +39,8 @@ TRACE_EVENT(swiotlb_bounced,
 		__entry->size,
 		__print_symbolic(__entry->swiotlb_force,
 			{ SWIOTLB_NORMAL,	"NORMAL" },
-			{ SWIOTLB_FORCE,	"FORCE" }))
+			{ SWIOTLB_FORCE,	"FORCE" },
+			{ SWIOTLB_NO_FORCE,	"NO_FORCE" }))
 );
 
 #endif /*  _TRACE_SWIOTLB_H */
-- 
cgit v1.2.3


From f3854973f196baad5be6b62d8f5ea24b0346b63f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Tue, 6 Dec 2016 11:17:12 -0200
Subject: [media] cec: fix report_current_latency

In the (very) small print of the REPORT_CURRENT_LATENCY message there is a
line that says that the last byte of the message (audio out delay) is only
present if the 'audio out compensated' value is 3.

I missed this, and so if this message was sent with a total length of 6 (i.e.
without the audio out delay byte), then it was rejected by the framework
since a minimum length of 7 was expected.

Fix this minimum length check and update the wrappers in cec-funcs.h to do
the right thing based on the message length.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/uapi/linux/cec-funcs.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h
index 3cbc327801d6..c451eec42a83 100644
--- a/include/uapi/linux/cec-funcs.h
+++ b/include/uapi/linux/cec-funcs.h
@@ -1665,14 +1665,15 @@ static inline void cec_msg_report_current_latency(struct cec_msg *msg,
 						  __u8 audio_out_compensated,
 						  __u8 audio_out_delay)
 {
-	msg->len = 7;
+	msg->len = 6;
 	msg->msg[0] |= 0xf; /* broadcast */
 	msg->msg[1] = CEC_MSG_REPORT_CURRENT_LATENCY;
 	msg->msg[2] = phys_addr >> 8;
 	msg->msg[3] = phys_addr & 0xff;
 	msg->msg[4] = video_latency;
 	msg->msg[5] = (low_latency_mode << 2) | audio_out_compensated;
-	msg->msg[6] = audio_out_delay;
+	if (audio_out_compensated == 3)
+		msg->msg[msg->len++] = audio_out_delay;
 }
 
 static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
@@ -1686,7 +1687,10 @@ static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
 	*video_latency = msg->msg[4];
 	*low_latency_mode = (msg->msg[5] >> 2) & 1;
 	*audio_out_compensated = msg->msg[5] & 3;
-	*audio_out_delay = msg->msg[6];
+	if (*audio_out_compensated == 3 && msg->len >= 7)
+		*audio_out_delay = msg->msg[6];
+	else
+		*audio_out_delay = 0;
 }
 
 static inline void cec_msg_request_current_latency(struct cec_msg *msg,
-- 
cgit v1.2.3


From 72c5296f9d64d8f5f27c2133e5f108a45a353d71 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Tue, 20 Dec 2016 14:38:14 -0700
Subject: genhd: remove dead and duplicated scsi code

blk_scsi_cmd_filter use was deprecated by 4beab5c6 and the SCSI macros
are duplicated in blkdev.h, both likely reintroduced by a bad merge from
540eed56.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/genhd.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e0341af6950e..76f39754e7b0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -146,15 +146,6 @@ enum {
 	DISK_EVENT_EJECT_REQUEST		= 1 << 1, /* eject requested */
 };
 
-#define BLK_SCSI_MAX_CMDS	(256)
-#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-
-struct blk_scsi_cmd_filter {
-	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
-	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
-	struct kobject kobj;
-};
-
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
-- 
cgit v1.2.3


From e3ba730702af370563f66cb610b71aa0ca67955e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 22 Dec 2016 10:15:20 +0100
Subject: fsnotify: Remove fsnotify_duplicate_mark()

There are only two calls sites of fsnotify_duplicate_mark(). Those are
in kernel/audit_tree.c and both are bogus. Vfsmount pointer is unused
for audit tree, inode pointer and group gets set in
fsnotify_add_mark_locked() later anyway, mask and free_mark are already
set in alloc_chunk(). In fact, calling fsnotify_duplicate_mark() is
actively harmful because following fsnotify_add_mark_locked() will leak
group reference by overwriting the group pointer. So just remove the two
calls to fsnotify_duplicate_mark() and the function.

Signed-off-by: Jan Kara <jack@suse.cz>
[PM: line wrapping to fit in 80 chars]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/fsnotify_backend.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7268ed076be8..ce77caa2bb10 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -324,8 +324,6 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(str
 extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode);
 /* find (and take a reference) to a mark associated with group and vfsmount */
 extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt);
-/* copy the values from old into new */
-extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old);
 /* set the ignored_mask of a mark */
 extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask);
 /* set the mask of a mark (might pin the object into memory */
-- 
cgit v1.2.3


From be53e38f0df21c3d45cdf4cede37ee73554cdbb8 Mon Sep 17 00:00:00 2001
From: Milo Kim <woogyom.kim@gmail.com>
Date: Fri, 9 Dec 2016 15:28:31 +0900
Subject: dt-bindings: mfd: Remove TPS65217 interrupts

Interrupt numbers are from the datasheet, so no need to keep them in
the ABI. Use the number in the DT file.

Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/dt-bindings/mfd/tps65217.h | 26 --------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 include/dt-bindings/mfd/tps65217.h

(limited to 'include')

diff --git a/include/dt-bindings/mfd/tps65217.h b/include/dt-bindings/mfd/tps65217.h
deleted file mode 100644
index cafb9e60cf12..000000000000
--- a/include/dt-bindings/mfd/tps65217.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This header provides macros for TI TPS65217 DT bindings.
- *
- * Copyright (C) 2016 Texas Instruments
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __DT_BINDINGS_TPS65217_H__
-#define __DT_BINDINGS_TPS65217_H__
-
-#define TPS65217_IRQ_USB	0
-#define TPS65217_IRQ_AC		1
-#define TPS65217_IRQ_PB		2
-
-#endif
-- 
cgit v1.2.3


From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:39 +0200
Subject: Revert "net/mlx5: Add MPCNT register infrastructure"

This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51.

Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  5 ---
 include/linux/mlx5/driver.h   |  1 -
 include/linux/mlx5/mlx5_ifc.h | 93 -------------------------------------------
 3 files changed, 99 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9f489365b3d3..52b437431c6a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,11 +1071,6 @@ enum {
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
-enum {
-	MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
-	MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2,
-};
-
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
 	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ae55361e674..735b36335f29 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -123,7 +123,6 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
-	MLX5_REG_MPCNT		 = 0x9051,
 };
 
 enum mlx5_dcbx_oper_mode {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 57bec544e20a..a852e9db6f0d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 	u8         reserved_at_4c0[0x300];
 };
 
-struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         rx_errors[0x20];
-
-	u8         tx_errors[0x20];
-
-	u8         l0_to_recovery_eieos[0x20];
-
-	u8         l0_to_recovery_ts[0x20];
-
-	u8         l0_to_recovery_framing[0x20];
-
-	u8         l0_to_recovery_retrain[0x20];
-
-	u8         crc_error_dllp[0x20];
-
-	u8         crc_error_tlp[0x20];
-
-	u8         reserved_at_140[0x680];
-};
-
-struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         time_to_boot_image_start[0x20];
-
-	u8         time_to_link_image[0x20];
-
-	u8         calibration_time[0x20];
-
-	u8         time_to_first_perst[0x20];
-
-	u8         time_to_detect_state[0x20];
-
-	u8         time_to_l0[0x20];
-
-	u8         time_to_crs_en[0x20];
-
-	u8         time_to_plastic_image_start[0x20];
-
-	u8         time_to_iron_image_start[0x20];
-
-	u8         perst_handler[0x20];
-
-	u8         times_in_l1[0x20];
-
-	u8         times_in_l23[0x20];
-
-	u8         dl_down[0x20];
-
-	u8         config_cycle1usec[0x20];
-
-	u8         config_cycle2to7usec[0x20];
-
-	u8         config_cycle_8to15usec[0x20];
-
-	u8         config_cycle_16_to_63usec[0x20];
-
-	u8         config_cycle_64usec[0x20];
-
-	u8         correctable_err_msg_sent[0x20];
-
-	u8         non_fatal_err_msg_sent[0x20];
-
-	u8         fatal_err_msg_sent[0x20];
-
-	u8         reserved_at_2e0[0x4e0];
-};
-
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
@@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	u8         reserved_at_0[0x7c0];
 };
 
-union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
-	struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
-	struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout;
-	u8         reserved_at_0[0x7c0];
-};
-
 union mlx5_ifc_event_auto_bits {
 	struct mlx5_ifc_comp_event_bits comp_event;
 	struct mlx5_ifc_dct_events_bits dct_events;
@@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
-struct mlx5_ifc_mpcnt_reg_bits {
-	u8         reserved_at_0[0x8];
-	u8         pcie_index[0x8];
-	u8         reserved_at_10[0xa];
-	u8         grp[0x6];
-
-	u8         clr[0x1];
-	u8         reserved_at_21[0x1f];
-
-	union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
-};
-
 struct mlx5_ifc_ppad_reg_bits {
 	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
@@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
 	struct mlx5_ifc_ppad_reg_bits ppad_reg;
 	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
-	struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
 	struct mlx5_ifc_pplm_reg_bits pplm_reg;
 	struct mlx5_ifc_pplr_reg_bits pplr_reg;
 	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
-- 
cgit v1.2.3


From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 Dec 2016 18:37:13 +0200
Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV

Demoting simple flow steering rule priority (for DPDK) was achieved by
wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF
as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper
function for the PF and all VFs.

In function mlx4_ib_create_flow(), this change caused the main rule
creation for the PF to be wrapped, while it left the associated
tunnel steering rule creation unwrapped for the PF.

This mismatch caused rule deletion failures in mlx4_ib_destroy_flow()
for the PF when the detach wrapper function did not find the associated
tunnel-steering rule (since creation of that rule for the PF did not
go through the wrapper function).

Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native"
(so that the PF invocation does not go through the wrapper), and perform
the required priority demotion for the PF in the mlx4_ib_create_flow()
code path.

Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 93bdb3485192..6533c16e27ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
 int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
 int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
 				      bool *vlan_offload_disabled);
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+				       struct _rule_hw *eth_header);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From a0c10687ec9506b5e14fe3dd47832a77f2f2500c Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 30 Dec 2016 03:21:38 -0800
Subject: Revert "remoteproc: Merge table_ptr and cached_table pointers"

Following any fw_rsc_vdev entries in the resource table are two variable
length arrays, the first one reference vring resources and the second
one is the virtio config space.  The virtio config space is used by
virtio to communicate status and configuration changes and must as such
be shared with the remote.

The reverted commit incorrectly made any changes to the virtio config
space only affect the local copy, in an attempt to allowing memory
protection of the shared resource table.

This reverts commit cda8529346935fc86f476999ac4fbfe4e17abf11.
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e2f3a3281d8f..8265d351c9f0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -408,7 +408,8 @@ enum rproc_crash_type {
  * @crash_comp: completion used to sync crash handler and the rproc reload
  * @recovery_disabled: flag that state if recovery was disabled
  * @max_notifyid: largest allocated notify id.
- * @table_ptr: our copy of the resource table
+ * @table_ptr: pointer to the resource table in effect
+ * @cached_table: copy of the resource table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  */
 struct rproc {
@@ -440,6 +441,7 @@ struct rproc {
 	bool recovery_disabled;
 	int max_notifyid;
 	struct resource_table *table_ptr;
+	struct resource_table *cached_table;
 	bool has_iommu;
 	bool auto_boot;
 };
-- 
cgit v1.2.3


From 42930553a7c11f06351bc08b889808d0f6020f08 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 30 Dec 2016 08:13:38 -0700
Subject: vfio-mdev: de-polute the namespace, rename parent_device & parent_ops

Add an mdev_ prefix so we're not poluting the namespace so much.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Jike Song <jike.song@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed by: Kirti Wankhede <kwankhede@nvidia.com>
---
 include/linux/mdev.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index ec819e9a115a..853bb78e5866 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -14,9 +14,9 @@
 #define MDEV_H
 
 /* Parent device */
-struct parent_device {
-	struct device		*dev;
-	const struct parent_ops	*ops;
+struct mdev_parent {
+	struct device			*dev;
+	const struct mdev_parent_ops	*ops;
 
 	/* internal */
 	struct kref		ref;
@@ -29,7 +29,7 @@ struct parent_device {
 /* Mediated device */
 struct mdev_device {
 	struct device		dev;
-	struct parent_device	*parent;
+	struct mdev_parent	*parent;
 	uuid_le			uuid;
 	void			*driver_data;
 
@@ -40,7 +40,7 @@ struct mdev_device {
 };
 
 /**
- * struct parent_ops - Structure to be registered for each parent device to
+ * struct mdev_parent_ops - Structure to be registered for each parent device to
  * register the device to mdev module.
  *
  * @owner:		The module owner.
@@ -86,10 +86,10 @@ struct mdev_device {
  *			@mdev: mediated device structure
  *			@vma: vma structure
  * Parent device that support mediated device should be registered with mdev
- * module with parent_ops structure.
+ * module with mdev_parent_ops structure.
  **/
 
-struct parent_ops {
+struct mdev_parent_ops {
 	struct module   *owner;
 	const struct attribute_group **dev_attr_groups;
 	const struct attribute_group **mdev_attr_groups;
@@ -159,7 +159,7 @@ extern struct bus_type mdev_bus_type;
 #define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
 
 extern int  mdev_register_device(struct device *dev,
-				 const struct parent_ops *ops);
+				 const struct mdev_parent_ops *ops);
 extern void mdev_unregister_device(struct device *dev);
 
 extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
-- 
cgit v1.2.3


From 9372e6feaafb65d88f667ffb5b7b425f8568344f Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 30 Dec 2016 08:13:41 -0700
Subject: vfio-mdev: Make mdev_parent private

Rather than hoping for good behavior by marking some elements
internal, enforce it by making the entire structure private and
creating an accessor function for the one useful external field.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Jike Song <jike.song@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed by: Kirti Wankhede <kwankhede@nvidia.com>
---
 include/linux/mdev.h | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 853bb78e5866..f586222b6c25 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -13,19 +13,6 @@
 #ifndef MDEV_H
 #define MDEV_H
 
-/* Parent device */
-struct mdev_parent {
-	struct device			*dev;
-	const struct mdev_parent_ops	*ops;
-
-	/* internal */
-	struct kref		ref;
-	struct mutex		lock;
-	struct list_head	next;
-	struct kset		*mdev_types_kset;
-	struct list_head	type_list;
-};
-
 /* Mediated device */
 struct mdev_device {
 	struct device		dev;
@@ -165,4 +152,6 @@ extern void mdev_unregister_device(struct device *dev);
 extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
 extern void mdev_unregister_driver(struct mdev_driver *drv);
 
+extern struct device *mdev_parent_dev(struct mdev_device *mdev);
+
 #endif /* MDEV_H */
-- 
cgit v1.2.3


From 99e3123e3d72616a829dad6d25aa005ef1ef9b13 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 30 Dec 2016 08:13:44 -0700
Subject: vfio-mdev: Make mdev_device private and abstract interfaces

Abstract access to mdev_device so that we can define which interfaces
are public rather than relying on comments in the structure.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Jike Song <jike.song@intel.com>
Reviewed by: Kirti Wankhede <kwankhede@nvidia.com>
---
 include/linux/mdev.h | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index f586222b6c25..3ee44b8d2bb3 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -13,18 +13,7 @@
 #ifndef MDEV_H
 #define MDEV_H
 
-/* Mediated device */
-struct mdev_device {
-	struct device		dev;
-	struct mdev_parent	*parent;
-	uuid_le			uuid;
-	void			*driver_data;
-
-	/* internal */
-	struct kref		ref;
-	struct list_head	next;
-	struct kobject		*type_kobj;
-};
+struct mdev_device;
 
 /**
  * struct mdev_parent_ops - Structure to be registered for each parent device to
@@ -75,7 +64,6 @@ struct mdev_device {
  * Parent device that support mediated device should be registered with mdev
  * module with mdev_parent_ops structure.
  **/
-
 struct mdev_parent_ops {
 	struct module   *owner;
 	const struct attribute_group **dev_attr_groups;
@@ -129,22 +117,13 @@ struct mdev_driver {
 };
 
 #define to_mdev_driver(drv)	container_of(drv, struct mdev_driver, driver)
-#define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)
 
-static inline void *mdev_get_drvdata(struct mdev_device *mdev)
-{
-	return mdev->driver_data;
-}
-
-static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data)
-{
-	mdev->driver_data = data;
-}
+extern void *mdev_get_drvdata(struct mdev_device *mdev);
+extern void mdev_set_drvdata(struct mdev_device *mdev, void *data);
+extern uuid_le mdev_uuid(struct mdev_device *mdev);
 
 extern struct bus_type mdev_bus_type;
 
-#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type)
-
 extern int  mdev_register_device(struct device *dev,
 				 const struct mdev_parent_ops *ops);
 extern void mdev_unregister_device(struct device *dev);
@@ -153,5 +132,7 @@ extern int  mdev_register_driver(struct mdev_driver *drv, struct module *owner);
 extern void mdev_unregister_driver(struct mdev_driver *drv);
 
 extern struct device *mdev_parent_dev(struct mdev_device *mdev);
+extern struct device *mdev_dev(struct mdev_device *mdev);
+extern struct mdev_device *mdev_from_dev(struct device *dev);
 
 #endif /* MDEV_H */
-- 
cgit v1.2.3


From 65e4345c8ef8811bbb4860fe5f2df10646b7f2e1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 30 Dec 2016 23:54:18 +0100
Subject: iio: accel: st_accel: fix LIS3LV02 reading and scaling

The LIS3LV02 has a special bit that need to be set to get the
read values left aligned. Before this patch we get gibberish
like this:

iio_generic_buffer -a -c10 -n lis3lv02dl_accel
(...)
0.000000 -0.010042 -0.642688 19155832931907
0.000000 -0.010042 -0.642688 19155858751073

Which is because we read a raw value for 1g as 64 which is
the nominal 1024 for 1g shifted 4 bits to the left by being
right-aligned rather than left aligned.

Since all other sensors are left aligned, add some code to
set the special DAS (data alignment setting) bit to 1 so that
the right value is now read like this:

iio_generic_buffer -a -c10 -n lis3lv02dl_accel
(...)
0.000000 -0.147095 -10.120135 24761614364956
-0.029419 -0.176514 -10.120135 24761631624540

The scaling was weird as well: we have a gain of 1000 for 1g
and 3000 for 6g. I don't even remember how I came up with the
old values but they are wrong.

Fixes: 3acddf74f807 ("iio: st-sensors: add support for lis3lv02d accelerometer")
Cc: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Cc: Giuseppe Barba <giuseppe.barba@st.com>
Cc: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/common/st_sensors.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 228bd44efa4c..497f2b3a5a62 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -115,6 +115,16 @@ struct st_sensor_bdu {
 	u8 mask;
 };
 
+/**
+ * struct st_sensor_das - ST sensor device data alignment selection
+ * @addr: address of the register.
+ * @mask: mask to write the das flag for left alignment.
+ */
+struct st_sensor_das {
+	u8 addr;
+	u8 mask;
+};
+
 /**
  * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt
  * @addr: address of the register.
@@ -185,6 +195,7 @@ struct st_sensor_transfer_function {
  * @enable_axis: Enable one or more axis of the sensor.
  * @fs: Full scale register and full scale list available.
  * @bdu: Block data update register.
+ * @das: Data Alignment Selection register.
  * @drdy_irq: Data ready register of the sensor.
  * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
  * @bootime: samples to discard when sensor passing from power-down to power-up.
@@ -200,6 +211,7 @@ struct st_sensor_settings {
 	struct st_sensor_axis enable_axis;
 	struct st_sensor_fullscale fs;
 	struct st_sensor_bdu bdu;
+	struct st_sensor_das das;
 	struct st_sensor_data_ready_irq drdy_irq;
 	bool multi_read_bit;
 	unsigned int bootime;
-- 
cgit v1.2.3


From d2e3a1358c37cd82eef92b5e908b4f0472194481 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 29 Dec 2016 14:11:21 +0100
Subject: ASoC: Fix binding and probing of auxiliary components

Currently binding of auxiliary devices doesn't work as in
soc_bind_aux_dev() function a bound component is not being added
to any list and in soc_probe_aux_devices() we are trying to walk
the component_dev_list list to probe auxiliary components but
at that time this list doesn't contain any auxiliary components
since they are being added to the card only in soc_probe_component().

This patch adds a list to the card where are stored bound but not
probed auxiliary devices, so that all aux devices can be probed.

Fixes: 1a653aa44725 "ASoC: core: replace aux_comp_list to component_dev_list"
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2b502f6cc6d0..b86168a21d56 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -813,6 +813,7 @@ struct snd_soc_component {
 	unsigned int suspended:1; /* is in suspend PM state */
 
 	struct list_head list;
+	struct list_head card_aux_list; /* for auxiliary bound components */
 	struct list_head card_list;
 
 	struct snd_soc_dai_driver *dai_drv;
@@ -1152,6 +1153,7 @@ struct snd_soc_card {
 	 */
 	struct snd_soc_aux_dev *aux_dev;
 	int num_aux_devs;
+	struct list_head aux_comp_list;
 
 	const struct snd_kcontrol_new *controls;
 	int num_controls;
@@ -1547,6 +1549,7 @@ static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
 	INIT_LIST_HEAD(&card->widgets);
 	INIT_LIST_HEAD(&card->paths);
 	INIT_LIST_HEAD(&card->dapm_list);
+	INIT_LIST_HEAD(&card->aux_comp_list);
 	INIT_LIST_HEAD(&card->component_dev_list);
 }
 
-- 
cgit v1.2.3


From 96a420d2d37cc019d0fbb95c9f0e965fa1080e1f Mon Sep 17 00:00:00 2001
From: Vincent Pelletier <plr.vincent@gmail.com>
Date: Thu, 15 Dec 2016 12:47:41 +0000
Subject: usb: gadget: f_fs: Document eventfd effect on descriptor format.

When FUNCTIONFS_EVENTFD flag is set, __ffs_data_got_descs reads a 32bits,
little-endian value right after the fixed structure header, and passes it
to eventfd_ctx_fdget. Document this.

Also, rephrase a comment to be affirmative about the role of string
descriptor at index 0. Ref: USB 2.0 spec paragraph "9.6.7 String", and
also checked to still be current in USB 3.0 spec paragraph "9.6.9 String".

Signed-off-by: Vincent Pelletier <plr.vincent@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/uapi/linux/usb/functionfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index acc63697a0cc..b2a31a55a612 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -93,6 +93,7 @@ struct usb_ext_prop_desc {
  * |   0 | magic     | LE32         | FUNCTIONFS_DESCRIPTORS_MAGIC_V2      |
  * |   4 | length    | LE32         | length of the whole data chunk       |
  * |   8 | flags     | LE32         | combination of functionfs_flags      |
+ * |     | eventfd   | LE32         | eventfd file descriptor              |
  * |     | fs_count  | LE32         | number of full-speed descriptors     |
  * |     | hs_count  | LE32         | number of high-speed descriptors     |
  * |     | ss_count  | LE32         | number of super-speed descriptors    |
-- 
cgit v1.2.3


From c6ef7fd40eddad38a8825cbd6bb2ce8bdbba88f5 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 4 Jan 2017 15:08:15 -0500
Subject: vfio-mdev: fix non-standard ioctl return val causing i386 build fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

What appears to be a copy and paste error from the line above gets
the ioctl a ssize_t return value instead of the traditional "int".

The associated sample code used "long" which meant it would compile
for x86-64 but not i386, with the latter failing as follows:

  CC [M]  samples/vfio-mdev/mtty.o
samples/vfio-mdev/mtty.c:1418:20: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
  .ioctl          = mtty_ioctl,
                    ^
samples/vfio-mdev/mtty.c:1418:20: note: (near initialization for ‘mdev_fops.ioctl’)
cc1: some warnings being treated as errors

Since in this case, vfio is working with struct file_operations; as such:

    long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
    long (*compat_ioctl) (struct file *, unsigned int, unsigned long);

...and so here we just standardize on long vs. the normal int that user
space typically sees and documents as per "man ioctl" and similar.

Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Cc: Neo Jia <cjia@nvidia.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/mdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 3ee44b8d2bb3..b6e048e1045f 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -78,7 +78,7 @@ struct mdev_parent_ops {
 			size_t count, loff_t *ppos);
 	ssize_t (*write)(struct mdev_device *mdev, const char __user *buf,
 			 size_t count, loff_t *ppos);
-	ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd,
+	long	(*ioctl)(struct mdev_device *mdev, unsigned int cmd,
 			 unsigned long arg);
 	int	(*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma);
 };
-- 
cgit v1.2.3


From c7858bf16c0b2cc62f475f31e6df28c3a68da1d6 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 3 Jan 2017 13:49:42 +0100
Subject: asm-prototypes: Clear any CPP defines before declaring the functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The asm-prototypes.h file is used to provide dummy function declarations
for genksyms, when processing asm files with EXPORT_SYMBOL. Make sure
that any architecture defines get out of our way. x86 currently has an
issue with memcpy on 64bit with CONFIG_KMEMCHECK=y and with
memset/__memset on 32bit:

	$ cat init/test.c
	#include <asm/asm-prototypes.h>
	$ make -s init/test.o
	In file included from ./arch/x86/include/asm/string.h:4:0,
			 from ./include/linux/string.h:18,
			 from ./include/linux/bitmap.h:8,
			 from ./include/linux/cpumask.h:11,
			 from ./arch/x86/include/asm/cpumask.h:4,
			 from ./arch/x86/include/asm/msr.h:10,
			 from ./arch/x86/include/asm/processor.h:20,
			 from ./arch/x86/include/asm/cpufeature.h:4,
			 from ./arch/x86/include/asm/thread_info.h:52,
			 from ./include/linux/thread_info.h:25,
			 from ./arch/x86/include/asm/preempt.h:6,
			 from ./include/linux/preempt.h:59,
			 from ./include/linux/spinlock.h:50,
			 from ./include/linux/seqlock.h:35,
			 from ./include/linux/time.h:5,
			 from ./include/uapi/linux/timex.h:56,
			 from ./include/linux/timex.h:56,
			 from ./include/linux/sched.h:19,
			 from ./include/linux/uaccess.h:4,
			 from ./arch/x86/include/asm/asm-prototypes.h:2,
			 from init/test.c:1:
	./arch/x86/include/asm/string_64.h:52:47: error: expected declaration specifiers or ‘...’ before ‘(’ token
	 #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
	 ./include/asm-generic/asm-prototypes.h:6:14: note: in expansion of macro ‘memcpy’
	  extern void *memcpy(void *, const void *, __kernel_size_t);

						       ^
	...

During real build, this manifests itself by genksyms segfaulting.

Fixes: 334bb7738764 ("x86/kbuild: enable modversions for symbols exported from asm")
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Cc: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Michal Marek <mmarek@suse.com>
---
 include/asm-generic/asm-prototypes.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h
index df13637e4017..939869c772b1 100644
--- a/include/asm-generic/asm-prototypes.h
+++ b/include/asm-generic/asm-prototypes.h
@@ -1,7 +1,13 @@
 #include <linux/bitops.h>
+#undef __memset
 extern void *__memset(void *, int, __kernel_size_t);
+#undef __memcpy
 extern void *__memcpy(void *, const void *, __kernel_size_t);
+#undef __memmove
 extern void *__memmove(void *, const void *, __kernel_size_t);
+#undef memset
 extern void *memset(void *, int, __kernel_size_t);
+#undef memcpy
 extern void *memcpy(void *, const void *, __kernel_size_t);
+#undef memmove
 extern void *memmove(void *, const void *, __kernel_size_t);
-- 
cgit v1.2.3


From 7453c549f5f6485c0d79cad7844870dcc7d1b34d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 20 Dec 2016 10:02:02 -0500
Subject: swiotlb: Export swiotlb_max_segment to users

So they can figure out what is the optimal number of pages
that can be contingously stitched together without fear of
bounce buffer.

We also expose an mechanism for sub-users of SWIOTLB API, such
as Xen-SWIOTLB to set the max segment value. And lastly
if swiotlb=force is set (which mandates we bounce buffer everything)
we set max_segment so at least we can bounce buffer one 4K page
instead of a giant 512KB one for which we may not have space.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reported-and-Tested-by: Juergen Gross <jgross@suse.com>
---
 include/linux/swiotlb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c84a9cde3d..4ee479f2f355 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -114,11 +114,14 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
 #ifdef CONFIG_SWIOTLB
 extern void __init swiotlb_free(void);
+unsigned int swiotlb_max_segment(void);
 #else
 static inline void swiotlb_free(void) { }
+static inline unsigned int swiotlb_max_segment(void) { return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
 extern int is_swiotlb_buffer(phys_addr_t paddr);
+extern void swiotlb_set_max_segment(unsigned int);
 
 #endif /* __LINUX_SWIOTLB_H */
-- 
cgit v1.2.3


From 20b1e22d01a4b0b11d3a1066e9feb04be38607ec Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nicstange@gmail.com>
Date: Thu, 5 Jan 2017 13:51:29 +0100
Subject: x86/efi: Don't allocate memmap through memblock after mm_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the following commit:

  4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")

...  efi_bgrt_init() calls into the memblock allocator through
efi_mem_reserve() => efi_arch_mem_reserve() *after* mm_init() has been called.

Indeed, KASAN reports a bad read access later on in efi_free_boot_services():

  BUG: KASAN: use-after-free in efi_free_boot_services+0xae/0x24c
            at addr ffff88022de12740
  Read of size 4 by task swapper/0/0
  page:ffffea0008b78480 count:0 mapcount:-127
  mapping:          (null) index:0x1 flags: 0x5fff8000000000()
  [...]
  Call Trace:
   dump_stack+0x68/0x9f
   kasan_report_error+0x4c8/0x500
   kasan_report+0x58/0x60
   __asan_load4+0x61/0x80
   efi_free_boot_services+0xae/0x24c
   start_kernel+0x527/0x562
   x86_64_start_reservations+0x24/0x26
   x86_64_start_kernel+0x157/0x17a
   start_cpu+0x5/0x14

The instruction at the given address is the first read from the memmap's
memory, i.e. the read of md->type in efi_free_boot_services().

Note that the writes earlier in efi_arch_mem_reserve() don't splat because
they're done through early_memremap()ed addresses.

So, after memblock is gone, allocations should be done through the "normal"
page allocator. Introduce a helper, efi_memmap_alloc() for this. Use
it from efi_arch_mem_reserve(), efi_free_boot_services() and, for the sake
of consistency, from efi_fake_memmap() as well.

Note that for the latter, the memmap allocations cease to be page aligned.
This isn't needed though.

Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.9
Cc: Dave Young <dyoung@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mika Penttilä <mika.penttila@nextfour.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data")
Link: http://lkml.kernel.org/r/20170105125130.2815-1-nicstange@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index a07a476178cd..0c5420208c40 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -950,6 +950,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 
+extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries);
 extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
 extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
 extern void __init efi_memmap_unmap(void);
-- 
cgit v1.2.3


From ea07b862ac8ef9b8c8358517d2e39f847dda6659 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 6 Jan 2017 19:21:43 -0500
Subject: mm: workingset: fix use-after-free in shadow node shrinker

Several people report seeing warnings about inconsistent radix tree
nodes followed by crashes in the workingset code, which all looked like
use-after-free access from the shadow node shrinker.

Dave Jones managed to reproduce the issue with a debug patch applied,
which confirmed that the radix tree shrinking indeed frees shadow nodes
while they are still linked to the shadow LRU:

  WARNING: CPU: 2 PID: 53 at lib/radix-tree.c:643 delete_node+0x1e4/0x200
  CPU: 2 PID: 53 Comm: kswapd0 Not tainted 4.10.0-rc2-think+ #3
  Call Trace:
     delete_node+0x1e4/0x200
     __radix_tree_delete_node+0xd/0x10
     shadow_lru_isolate+0xe6/0x220
     __list_lru_walk_one.isra.4+0x9b/0x190
     list_lru_walk_one+0x23/0x30
     scan_shadow_nodes+0x2e/0x40
     shrink_slab.part.44+0x23d/0x5d0
     shrink_node+0x22c/0x330
     kswapd+0x392/0x8f0

This is the WARN_ON_ONCE(!list_empty(&node->private_list)) placed in the
inlined radix_tree_shrink().

The problem is with 14b468791fa9 ("mm: workingset: move shadow entry
tracking to radix tree exceptional tracking"), which passes an update
callback into the radix tree to link and unlink shadow leaf nodes when
tree entries change, but forgot to pass the callback when reclaiming a
shadow node.

While the reclaimed shadow node itself is unlinked by the shrinker, its
deletion from the tree can cause the left-most leaf node in the tree to
be shrunk.  If that happens to be a shadow node as well, we don't unlink
it from the LRU as we should.

Consider this tree, where the s are shadow entries:

       root->rnode
            |
       [0       n]
        |       |
     [s    ] [sssss]

Now the shadow node shrinker reclaims the rightmost leaf node through
the shadow node LRU:

       root->rnode
            |
       [0        ]
        |
    [s     ]

Because the parent of the deleted node is the first level below the
root and has only one child in the left-most slot, the intermediate
level is shrunk and the node containing the single shadow is put in
its place:

       root->rnode
            |
       [s        ]

The shrinker again sees a single left-most slot in a first level node
and thus decides to store the shadow in root->rnode directly and free
the node - which is a leaf node on the shadow node LRU.

  root->rnode
       |
       s

Without the update callback, the freed node remains on the shadow LRU,
where it causes later shrinker runs to crash.

Pass the node updater callback into __radix_tree_delete_node() in case
the deletion causes the left-most branch in the tree to collapse too.

Also add warnings when linked nodes are freed right away, rather than
wait for the use-after-free when the list is scanned much later.

Fixes: 14b468791fa9 ("mm: workingset: move shadow entry tracking to radix tree exceptional tracking")
Reported-by: Dave Chinner <david@fromorbit.com>
Reported-by: Hugh Dickins <hughd@google.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-and-tested-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chris Leech <cleech@redhat.com>
Cc: Lee Duncan <lduncan@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 5dea8f6440e4..52bda854593b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -306,7 +306,9 @@ void radix_tree_iter_replace(struct radix_tree_root *,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item);
 void __radix_tree_delete_node(struct radix_tree_root *root,
-			      struct radix_tree_node *node);
+			      struct radix_tree_node *node,
+			      radix_tree_update_node_t update_node,
+			      void *private);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 void radix_tree_clear_tags(struct radix_tree_root *root,
-- 
cgit v1.2.3


From ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 14:12:51 +0100
Subject: btrfs: fix crash when tracepoint arguments are freed by wq callbacks

Enabling btrfs tracepoints leads to instant crash, as reported. The wq
callbacks could free the memory and the tracepoints started to
dereference the members to get to fs_info.

The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2
removed the tracepoints but we could preserve them by passing only the
required data in a safe way.

Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events")
CC: stable@vger.kernel.org # 4.8+
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index c14bed4ab097..b09225c77676 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1157,22 +1157,26 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
 
-/* For situiations that the work is freed */
+/*
+ * For situiations when the work is freed, we pass fs_info and a tag that that
+ * matches address of the work structure so it can be paired with the
+ * scheduling event.
+ */
 DECLARE_EVENT_CLASS(btrfs__work__done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work),
+	TP_ARGS(fs_info, wtag),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	void *,	work			)
+		__field(	void *,	wtag			)
 	),
 
-	TP_fast_assign_btrfs(btrfs_work_owner(work),
-		__entry->work		= work;
+	TP_fast_assign_btrfs(fs_info,
+		__entry->wtag		= wtag;
 	),
 
-	TP_printk_btrfs("work->%p", __entry->work)
+	TP_printk_btrfs("work->%p", __entry->wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_work_queued,
@@ -1191,9 +1195,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched,
 
 DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done,
 
-	TP_PROTO(struct btrfs_work *work),
+	TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag),
 
-	TP_ARGS(work)
+	TP_ARGS(fs_info, wtag)
 );
 
 DEFINE_EVENT(btrfs__work, btrfs_ordered_sched,
-- 
cgit v1.2.3


From 92a1bf76a89ad338f00eb9a2c7689a3907fbcaad Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 17 Nov 2016 15:00:50 -0800
Subject: Btrfs: add 'inode' for extent map tracepoint

'inode' is an important field for btrfs_get_extent, lets trace it.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b09225c77676..3048f5205363 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -184,14 +184,16 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
 
 TRACE_EVENT_CONDITION(btrfs_get_extent,
 
-	TP_PROTO(struct btrfs_root *root, struct extent_map *map),
+	TP_PROTO(struct btrfs_root *root, struct inode *inode,
+		 struct extent_map *map),
 
-	TP_ARGS(root, map),
+	TP_ARGS(root, inode, map),
 
 	TP_CONDITION(map),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  root_objectid	)
+		__field(	u64,  ino		)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
 		__field(	u64,  orig_start	)
@@ -204,7 +206,8 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 
 	TP_fast_assign_btrfs(root->fs_info,
 		__entry->root_objectid	= root->root_key.objectid;
-		__entry->start 		= map->start;
+		__entry->ino		= btrfs_ino(inode);
+		__entry->start		= map->start;
 		__entry->len		= map->len;
 		__entry->orig_start	= map->orig_start;
 		__entry->block_start	= map->block_start;
@@ -214,11 +217,12 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu, "
+	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
 		  "orig_start = %llu, block_start = %llu(%s), "
 		  "block_len = %llu, flags = %s, refs = %u, "
 		  "compress_type = %u",
 		  show_root_type(__entry->root_objectid),
+		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->orig_start,
-- 
cgit v1.2.3


From 7856654842bdbebc0fbcbf51573da5d70a787aba Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 30 Nov 2016 16:10:10 -0800
Subject: Btrfs: add truncated_len for ordered extent tracepoints

This can help us monitor truncated ordered extents.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 3048f5205363..2026a89786b0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -263,6 +263,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__field(	int,  compress_type	)
 		__field(	int,  refs		)
 		__field(	u64,  root_objectid	)
+		__field(	u64,  truncated_len	)
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
@@ -277,10 +278,12 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->refs		= atomic_read(&ordered->refs);
 		__entry->root_objectid	=
 				BTRFS_I(inode)->root->root_key.objectid;
+		__entry->truncated_len	= ordered->truncated_len;
 	),
 
 	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
 		  "start = %llu, len = %llu, disk_len = %llu, "
+		  "truncated_len = %llu, "
 		  "bytes_left = %llu, flags = %s, compress_type = %d, "
 		  "refs = %d",
 		  show_root_type(__entry->root_objectid),
@@ -289,6 +292,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len,
 		  (unsigned long long)__entry->disk_len,
+		  (unsigned long long)__entry->truncated_len,
 		  (unsigned long long)__entry->bytes_left,
 		  show_ordered_flags(__entry->flags),
 		  __entry->compress_type, __entry->refs)
-- 
cgit v1.2.3


From 562a7a07bf61e2949f7cbdb6ac7537ad9e2794d1 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 6 Jan 2017 15:51:36 +0100
Subject: btrfs: make tracepoint format strings more compact

We've recently added the fsid to trace events, this makes the line quite
long. To reduce the it again, remove extra spaces around = and remove
",".

Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 112 +++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2026a89786b0..88d18a8ceb59 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -130,8 +130,8 @@ DECLARE_EVENT_CLASS(btrfs__inode,
 				BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
-		  "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu",
+	TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu "
+		  "disk_i_size=%llu last_trans=%llu logged_trans=%llu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->generation,
 		  (unsigned long)__entry->ino,
@@ -217,10 +217,10 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->compress_type	= map->compress_type;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu start = %llu, len = %llu, "
-		  "orig_start = %llu, block_start = %llu(%s), "
-		  "block_len = %llu, flags = %s, refs = %u, "
-		  "compress_type = %u",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu "
+		  "orig_start=%llu block_start=%llu(%s) "
+		  "block_len=%llu flags=%s refs=%u "
+		  "compress_type=%u",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->start,
@@ -281,11 +281,11 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
 		__entry->truncated_len	= ordered->truncated_len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %llu, file_offset = %llu, "
-		  "start = %llu, len = %llu, disk_len = %llu, "
-		  "truncated_len = %llu, "
-		  "bytes_left = %llu, flags = %s, compress_type = %d, "
-		  "refs = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu file_offset=%llu "
+		  "start=%llu len=%llu disk_len=%llu "
+		  "truncated_len=%llu "
+		  "bytes_left=%llu flags=%s compress_type=%d "
+		  "refs=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->ino,
 		  (unsigned long long)__entry->file_offset,
@@ -362,10 +362,10 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, "
-		  "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, "
-		  "range_end = %llu, for_kupdate = %d, "
-		  "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu "
+		  "nr_to_write=%ld pages_skipped=%ld range_start=%llu "
+		  "range_end=%llu for_kupdate=%d "
+		  "for_reclaim=%d range_cyclic=%d writeback_index=%lu",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, __entry->index,
 		  __entry->nr_to_write, __entry->pages_skipped,
@@ -408,8 +408,8 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
 			 BTRFS_I(page->mapping->host)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
-		  "end = %llu, uptodate = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu "
+		  "end=%llu uptodate=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->index,
 		  (unsigned long long)__entry->start,
@@ -441,7 +441,7 @@ TRACE_EVENT(btrfs_sync_file,
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
+	TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long)__entry->ino, (unsigned long)__entry->parent,
 		  __entry->datasync)
@@ -492,9 +492,9 @@ TRACE_EVENT(btrfs_add_block_group,
 		__entry->create		= create;
 	),
 
-	TP_printk("%pU: block_group offset = %llu, size = %llu, "
-		  "flags = %llu(%s), bytes_used = %llu, bytes_super = %llu, "
-		  "create = %d", __entry->fsid,
+	TP_printk("%pU: block_group offset=%llu size=%llu "
+		  "flags=%llu(%s) bytes_used=%llu bytes_super=%llu "
+		  "create=%d", __entry->fsid,
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
 		  (unsigned long long)__entry->flags,
@@ -543,9 +543,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), level = %d, "
-		  "type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) level=%d "
+		  "type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -608,9 +608,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
 		__entry->seq		= ref->seq;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, "
-		  "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, "
-		  "offset = %llu, type = %s, seq = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
+		  "parent=%llu(%s) ref_root=%llu(%s) owner=%llu "
+		  "offset=%llu type=%s seq=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -665,7 +665,7 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 		__entry->is_data	= head_ref->is_data;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes,
 		  show_ref_action(__entry->action),
@@ -729,8 +729,8 @@ DECLARE_EVENT_CLASS(btrfs__chunk,
 		__entry->root_objectid	= fs_info->chunk_root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), offset = %llu, size = %llu, "
-		  "num_stripes = %d, sub_stripes = %d, type = %s",
+	TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
+		  "num_stripes=%d sub_stripes=%d type=%s",
 		  show_root_type(__entry->root_objectid),
 		  (unsigned long long)__entry->offset,
 		  (unsigned long long)__entry->size,
@@ -779,8 +779,8 @@ TRACE_EVENT(btrfs_cow_block,
 		__entry->cow_level	= btrfs_header_level(cow);
 	),
 
-	TP_printk_btrfs("root = %llu(%s), refs = %d, orig_buf = %llu "
-		  "(orig_level = %d), cow_buf = %llu (cow_level = %d)",
+	TP_printk_btrfs("root=%llu(%s) refs=%d orig_buf=%llu "
+		  "(orig_level=%d) cow_buf=%llu (cow_level=%d)",
 		  show_root_type(__entry->root_objectid),
 		  __entry->refs,
 		  (unsigned long long)__entry->buf_start,
@@ -844,7 +844,7 @@ TRACE_EVENT(btrfs_trigger_flush,
 		__assign_str(reason, reason)
 	),
 
-	TP_printk("%pU: %s: flush = %d(%s), flags = %llu(%s), bytes = %llu",
+	TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
 		  __entry->fsid, __get_str(reason), __entry->flush,
 		  show_flush_action(__entry->flush),
 		  (unsigned long long)__entry->flags,
@@ -887,8 +887,8 @@ TRACE_EVENT(btrfs_flush_space,
 		__entry->ret		=	ret;
 	),
 
-	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
-		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
+	TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu "
+		  "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state,
 		  show_flush_state(__entry->state),
 		  (unsigned long long)__entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -913,7 +913,7 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), start = %llu, len = %llu",
+	TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->len)
@@ -952,7 +952,7 @@ TRACE_EVENT(find_free_extent,
 		__entry->data		= data;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), len = %Lu, empty_size = %Lu, flags = %Lu(%s)",
+	TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->num_bytes, __entry->empty_size, __entry->data,
 		  __print_flags((unsigned long)__entry->data, "|",
@@ -981,8 +981,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
-		  "start = %Lu, len = %Lu",
+	TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) "
+		  "start=%Lu len=%Lu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->bg_objectid,
 		  __entry->flags, __print_flags((unsigned long)__entry->flags,
@@ -1033,8 +1033,8 @@ TRACE_EVENT(btrfs_find_cluster,
 		__entry->min_bytes	= min_bytes;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
-		  " empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu "
+		  "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS), __entry->start,
@@ -1055,7 +1055,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
 		__entry->bg_objectid	= block_group->key.objectid;
 	),
 
-	TP_printk_btrfs("block_group = %Lu", __entry->bg_objectid)
+	TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid)
 );
 
 TRACE_EVENT(btrfs_setup_cluster,
@@ -1083,8 +1083,8 @@ TRACE_EVENT(btrfs_setup_cluster,
 		__entry->bitmap		= bitmap;
 	),
 
-	TP_printk_btrfs("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
-		  "size = %Lu, max_size = %Lu, bitmap = %d",
+	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu "
+		  "size=%Lu max_size=%Lu bitmap=%d",
 		  __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -1111,7 +1111,7 @@ TRACE_EVENT(alloc_extent_state,
 		__entry->ip	= IP
 	),
 
-	TP_printk("state=%p; mask = %s; caller = %pS", __entry->state,
+	TP_printk("state=%p mask=%s caller=%pS", __entry->state,
 		  show_gfp_flags(__entry->mask), (void *)__entry->ip)
 );
 
@@ -1131,7 +1131,7 @@ TRACE_EVENT(free_extent_state,
 		__entry->ip = IP
 	),
 
-	TP_printk(" state=%p; caller = %pS", __entry->state,
+	TP_printk("state=%p caller=%pS", __entry->state,
 		  (void *)__entry->ip)
 );
 
@@ -1159,8 +1159,8 @@ DECLARE_EVENT_CLASS(btrfs__work,
 		__entry->normal_work	= &work->normal_work;
 	),
 
-	TP_printk_btrfs("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p,"
-		  " ordered_free=%p",
+	TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p "
+		  "ordered_free=%p",
 		  __entry->work, __entry->normal_work, __entry->wq,
 		   __entry->func, __entry->ordered_func, __entry->ordered_free)
 );
@@ -1233,7 +1233,7 @@ DECLARE_EVENT_CLASS(btrfs__workqueue,
 		__entry->high		= high;
 	),
 
-	TP_printk_btrfs("name=%s%s, wq=%p", __get_str(name),
+	TP_printk_btrfs("name=%s%s wq=%p", __get_str(name),
 		  __print_flags(__entry->high, "",
 				{(WQ_HIGHPRI),	"-high"}),
 		  __entry->wq)
@@ -1288,7 +1288,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_data_map,
 		__entry->free_reserved	=	free_reserved;
 	),
 
-	TP_printk_btrfs("rootid=%llu, ino=%lu, free_reserved=%llu",
+	TP_printk_btrfs("rootid=%llu ino=%lu free_reserved=%llu",
 		  __entry->rootid, __entry->ino, __entry->free_reserved)
 );
 
@@ -1335,7 +1335,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data,
 		__entry->op		= op;
 	),
 
-	TP_printk_btrfs("root=%llu, ino=%lu, start=%llu, len=%llu, reserved=%llu, op=%s",
+	TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s",
 		  __entry->rootid, __entry->ino, __entry->start, __entry->len,
 		  __entry->reserved,
 		  __print_flags((unsigned long)__entry->op, "",
@@ -1373,7 +1373,7 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
 		__entry->reserved	= reserved;
 	),
 
-	TP_printk_btrfs("root=%llu, reserved=%llu, op=free",
+	TP_printk_btrfs("root=%llu reserved=%llu op=free",
 		  __entry->ref_root, __entry->reserved)
 );
 
@@ -1400,7 +1400,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 		__entry->num_bytes	= rec->num_bytes;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu",
 		  (unsigned long long)__entry->bytenr,
 		  (unsigned long long)__entry->num_bytes)
 );
@@ -1442,8 +1442,8 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
 		__entry->nr_new_roots	= nr_new_roots;
 	),
 
-	TP_printk_btrfs("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, "
-		  "nr_new_roots = %llu",
+	TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu "
+		  "nr_new_roots=%llu",
 		  __entry->bytenr,
 		  __entry->num_bytes,
 		  __entry->nr_old_roots,
@@ -1469,7 +1469,7 @@ TRACE_EVENT(qgroup_update_counters,
 		__entry->cur_new_count	= cur_new_count;
 	),
 
-	TP_printk_btrfs("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu",
+	TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu",
 		  __entry->qgid,
 		  __entry->cur_old_count,
 		  __entry->cur_new_count)
-- 
cgit v1.2.3


From e864212078ded276bdb272b2e0ee6a979357ca8a Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 23 Dec 2016 11:37:53 +0100
Subject: target: add XCOPY target/segment desc sense codes

As defined in http://www.t10.org/lists/asc-num.htm. To be used during
validation of XCOPY target and segment descriptor lists.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 include/target/target_core_base.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 29e6858bb164..43edf82e54ff 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -174,6 +174,10 @@ enum tcm_sense_reason_table {
 	TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED	= R(0x16),
 	TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED	= R(0x17),
 	TCM_COPY_TARGET_DEVICE_NOT_REACHABLE	= R(0x18),
+	TCM_TOO_MANY_TARGET_DESCS		= R(0x19),
+	TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE	= R(0x1a),
+	TCM_TOO_MANY_SEGMENT_DESCS		= R(0x1b),
+	TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE	= R(0x1c),
 #undef R
 };
 
-- 
cgit v1.2.3


From 57ea52a865144aedbcd619ee0081155e658b6f7d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Jan 2017 12:24:15 -0800
Subject: gro: Disable frag0 optimization on IPv6 ext headers

The GRO fast path caches the frag0 address.  This address becomes
invalid if frag0 is modified by pskb_may_pull or its variants.
So whenever that happens we must disable the frag0 optimization.

This is usually done through the combination of gro_header_hard
and gro_header_slow, however, the IPv6 extension header path did
the pulling directly and would continue to use the GRO fast path
incorrectly.

This patch fixes it by disabling the fast path when we enter the
IPv6 extension header path.

Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
Reported-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f7423a74b..9bde9558b596 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2477,14 +2477,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
 	return NAPI_GRO_CB(skb)->frag0_len < hlen;
 }
 
+static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
+}
+
 static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
 					unsigned int offset)
 {
 	if (!pskb_may_pull(skb, hlen))
 		return NULL;
 
-	NAPI_GRO_CB(skb)->frag0 = NULL;
-	NAPI_GRO_CB(skb)->frag0_len = 0;
+	skb_gro_frag0_invalidate(skb);
 	return skb->data + offset;
 }
 
-- 
cgit v1.2.3


From 097963959594c5eccaba42510f7033f703211bda Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 10 Jan 2017 16:57:21 -0800
Subject: mm: add follow_pte_pmd()

Patch series "Write protect DAX PMDs in *sync path".

Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation.  This can result
in data loss, as detailed in patch 2.

This series is based on Dan's "libnvdimm-pending" branch, which is the
current home for Jan's "dax: Page invalidation fixes" series.  You can
find a working tree here:

  https://git.kernel.org/cgit/linux/kernel/git/zwisler/linux.git/log/?h=dax_pmd_clean

This patch (of 2):

Similar to follow_pte(), follow_pte_pmd() allows either a PTE leaf or a
huge page PMD leaf to be found and returned.

Link: http://lkml.kernel.org/r/1482272586-21177-2-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe6b4036664a..02793ac64ac6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1212,6 +1212,8 @@ void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
 	       spinlock_t **ptlp);
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 	unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
-- 
cgit v1.2.3


From f729c8c9b24f0540a6e6b86e68f3888ba90ef7e7 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 10 Jan 2017 16:57:24 -0800
Subject: dax: wrprotect pmd_t in dax_mapping_entry_mkclean

Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation.  This can result
in data loss in the following sequence:

1) mmap write to DAX PMD, dirtying PMD radix tree entry and making the
   pmd_t dirty and writeable
2) fsync, flushing out PMD data and cleaning the radix tree entry. We
   currently fail to mark the pmd_t as clean and write protected.
3) more mmap writes to the PMD.  These don't cause any page faults since
   the pmd_t is dirty and writeable.  The radix tree entry remains clean.
4) fsync, which fails to flush the dirty PMD data because the radix tree
   entry was clean.
5) crash - dirty data that should have been fsync'd as part of 4) could
   still have been in the processor cache, and is lost.

Fix this by marking the pmd_t clean and write protected in
dax_mapping_entry_mkclean(), which is called as part of the fsync
operation 2).  This will cause the writes in step 3) above to generate
page faults where we'll re-dirty the PMD radix tree entry, resulting in
flushes in the fsync that happens in step 4).

Fixes: 4b4bb46d00b3 ("dax: clear dirty entry tags on cache flush")
Link: http://lkml.kernel.org/r/1482272586-21177-3-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02793ac64ac6..b84615b0f64c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1210,8 +1210,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
-int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
-	       spinlock_t **ptlp);
 int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
-- 
cgit v1.2.3


From bb1107f7c6052c863692a41f78c000db792334bf Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:27 -0800
Subject: mm, slab: make sure that KMALLOC_MAX_SIZE will fit into MAX_ORDER

Andrey Konovalov has reported the following warning triggered by the
syzkaller fuzzer.

  WARNING: CPU: 1 PID: 9935 at mm/page_alloc.c:3511 __alloc_pages_nodemask+0x159c/0x1e20
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 1 PID: 9935 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #34
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
    __alloc_pages_slowpath mm/page_alloc.c:3511
    __alloc_pages_nodemask+0x159c/0x1e20 mm/page_alloc.c:3781
    alloc_pages_current+0x1c7/0x6b0 mm/mempolicy.c:2072
    alloc_pages include/linux/gfp.h:469
    kmalloc_order+0x1f/0x70 mm/slab_common.c:1015
    kmalloc_order_trace+0x1f/0x160 mm/slab_common.c:1026
    kmalloc_large include/linux/slab.h:422
    __kmalloc+0x210/0x2d0 mm/slub.c:3723
    kmalloc include/linux/slab.h:495
    ep_write_iter+0x167/0xb50 drivers/usb/gadget/legacy/inode.c:664
    new_sync_write fs/read_write.c:499
    __vfs_write+0x483/0x760 fs/read_write.c:512
    vfs_write+0x170/0x4e0 fs/read_write.c:560
    SYSC_write fs/read_write.c:607
    SyS_write+0xfb/0x230 fs/read_write.c:599
    entry_SYSCALL_64_fastpath+0x1f/0xc2

The issue is caused by a lack of size check for the request size in
ep_write_iter which should be fixed.  It, however, points to another
problem, that SLUB defines KMALLOC_MAX_SIZE too large because the its
KMALLOC_SHIFT_MAX is (MAX_ORDER + PAGE_SHIFT) which means that the
resulting page allocator request might be MAX_ORDER which is too large
(see __alloc_pages_slowpath).

The same applies to the SLOB allocator which allows even larger sizes.
Make sure that they are capped properly and never request more than
MAX_ORDER order.

Link: http://lkml.kernel.org/r/20161220130659.16461-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 084b12bad198..4c5363566815 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -226,7 +226,7 @@ static inline const char *__check_heap_object(const void *ptr,
  * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
-#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT)
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	3
 #endif
@@ -239,7 +239,7 @@ static inline const char *__check_heap_object(const void *ptr,
  * be allocated from the same page.
  */
 #define KMALLOC_SHIFT_HIGH	PAGE_SHIFT
-#define KMALLOC_SHIFT_MAX	30
+#define KMALLOC_SHIFT_MAX	(MAX_ORDER + PAGE_SHIFT - 1)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	3
 #endif
-- 
cgit v1.2.3


From 41b6167e8f746b475668f1da78599fc4284f18db Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:57:42 -0800
Subject: mm: get rid of __GFP_OTHER_NODE

The flag was introduced by commit 78afd5612deb ("mm: add
__GFP_OTHER_NODE flag") to allow proper accounting of remote node
allocations done by kernel daemons on behalf of a process - e.g.
khugepaged.

After "mm: fix remote numa hits statistics" we do not need and actually
use the flag so we can safely remove it because all allocations which
are satisfied from their "home" node are accounted properly.

[mhocko@suse.com: fix build]
Link: http://lkml.kernel.org/r/20170106122225.GK5556@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/20170102153057.9451-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h            | 13 +++----------
 include/trace/events/mmflags.h |  3 +--
 2 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4175dca4ac39..7806a8f80abc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -38,9 +38,8 @@ struct vm_area_struct;
 #define ___GFP_ACCOUNT		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_DIRECT_RECLAIM	0x400000u
-#define ___GFP_OTHER_NODE	0x800000u
-#define ___GFP_WRITE		0x1000000u
-#define ___GFP_KSWAPD_RECLAIM	0x2000000u
+#define ___GFP_WRITE		0x800000u
+#define ___GFP_KSWAPD_RECLAIM	0x1000000u
 /* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
@@ -172,11 +171,6 @@ struct vm_area_struct;
  * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
  *   distinguishing in the source between false positives and allocations that
  *   cannot be supported (e.g. page tables).
- *
- * __GFP_OTHER_NODE is for allocations that are on a remote node but that
- *   should not be accounted for as a remote allocation in vmstat. A
- *   typical user would be khugepaged collapsing a huge page on a remote
- *   node.
  */
 #define __GFP_COLD	((__force gfp_t)___GFP_COLD)
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
@@ -184,10 +178,9 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
-#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_SHIFT 25
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /*
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 9e687ca9a307..15bf875d0e4a 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -47,8 +47,7 @@
 	{(unsigned long)__GFP_WRITE,		"__GFP_WRITE"},		\
 	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
 	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
-	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
-	{(unsigned long)__GFP_OTHER_NODE,	"__GFP_OTHER_NODE"}	\
+	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"}\
 
 #define show_gfp_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
-- 
cgit v1.2.3


From 2d39b3cd34e6d323720d4c61bd714f5ae202c022 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie.iles@oracle.com>
Date: Tue, 10 Jan 2017 16:57:54 -0800
Subject: signal: protect SIGNAL_UNKILLABLE from unintentional clearing.

Since commit 00cd5c37afd5 ("ptrace: permit ptracing of /sbin/init") we
can now trace init processes.  init is initially protected with
SIGNAL_UNKILLABLE which will prevent fatal signals such as SIGSTOP, but
there are a number of paths during tracing where SIGNAL_UNKILLABLE can
be implicitly cleared.

This can result in init becoming stoppable/killable after tracing.  For
example, running:

  while true; do kill -STOP 1; done &
  strace -p 1

and then stopping strace and the kill loop will result in init being
left in state TASK_STOPPED.  Sending SIGCONT to init will resume it, but
init will now respond to future SIGSTOP signals rather than ignoring
them.

Make sure that when setting SIGNAL_STOP_CONTINUED/SIGNAL_STOP_STOPPED
that we don't clear SIGNAL_UNKILLABLE.

Link: http://lkml.kernel.org/r/20170104122017.25047-1-jamie.iles@oracle.com
Signed-off-by: Jamie Iles <jamie.iles@oracle.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d1905245c7a..ad3ec9ec61f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,6 +854,16 @@ struct signal_struct {
 
 #define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
 
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+			  SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+					 unsigned int flags)
+{
+	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
 /* If true, all threads except ->group_exit_task have pending SIGKILL */
 static inline int signal_group_exit(const struct signal_struct *sig)
 {
-- 
cgit v1.2.3


From b4536f0c829c8586544c94735c343f9b5070bd01 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 10 Jan 2017 16:58:04 -0800
Subject: mm, memcg: fix the active list aging for lowmem requests when memcg
 is enabled

Nils Holland and Klaus Ethgen have reported unexpected OOM killer
invocations with 32b kernel starting with 4.8 kernels

	kworker/u4:5 invoked oom-killer: gfp_mask=0x2400840(GFP_NOFS|__GFP_NOFAIL), nodemask=0, order=0, oom_score_adj=0
	kworker/u4:5 cpuset=/ mems_allowed=0
	CPU: 1 PID: 2603 Comm: kworker/u4:5 Not tainted 4.9.0-gentoo #2
	[...]
	Mem-Info:
	active_anon:58685 inactive_anon:90 isolated_anon:0
	 active_file:274324 inactive_file:281962 isolated_file:0
	 unevictable:0 dirty:649 writeback:0 unstable:0
	 slab_reclaimable:40662 slab_unreclaimable:17754
	 mapped:7382 shmem:202 pagetables:351 bounce:0
	 free:206736 free_pcp:332 free_cma:0
	Node 0 active_anon:234740kB inactive_anon:360kB active_file:1097296kB inactive_file:1127848kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:29528kB dirty:2596kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 184320kB anon_thp: 808kB writeback_tmp:0kB unstable:0kB pages_scanned:0 all_unreclaimable? no
	DMA free:3952kB min:788kB low:984kB high:1180kB active_anon:0kB inactive_anon:0kB active_file:7316kB inactive_file:0kB unevictable:0kB writepending:96kB present:15992kB managed:15916kB mlocked:0kB slab_reclaimable:3200kB slab_unreclaimable:1408kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
	lowmem_reserve[]: 0 813 3474 3474
	Normal free:41332kB min:41368kB low:51708kB high:62048kB active_anon:0kB inactive_anon:0kB active_file:532748kB inactive_file:44kB unevictable:0kB writepending:24kB present:897016kB managed:836248kB mlocked:0kB slab_reclaimable:159448kB slab_unreclaimable:69608kB kernel_stack:1112kB pagetables:1404kB bounce:0kB free_pcp:528kB local_pcp:340kB free_cma:0kB
	lowmem_reserve[]: 0 0 21292 21292
	HighMem free:781660kB min:512kB low:34356kB high:68200kB active_anon:234740kB inactive_anon:360kB active_file:557232kB inactive_file:1127804kB unevictable:0kB writepending:2592kB present:2725384kB managed:2725384kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:800kB local_pcp:608kB free_cma:0kB

the oom killer is clearly pre-mature because there there is still a lot
of page cache in the zone Normal which should satisfy this lowmem
request.  Further debugging has shown that the reclaim cannot make any
forward progress because the page cache is hidden in the active list
which doesn't get rotated because inactive_list_is_low is not memcg
aware.

The code simply subtracts per-zone highmem counters from the respective
memcg's lru sizes which doesn't make any sense.  We can simply end up
always seeing the resulting active and inactive counts 0 and return
false.  This issue is not limited to 32b kernels but in practice the
effect on systems without CONFIG_HIGHMEM would be much harder to notice
because we do not invoke the OOM killer for allocations requests
targeting < ZONE_NORMAL.

Fix the issue by tracking per zone lru page counts in mem_cgroup_per_node
and subtract per-memcg highmem counts when memcg is enabled.  Introduce
helper lruvec_zone_lru_size which redirects to either zone counters or
mem_cgroup_get_zone_lru_size when appropriate.

We are losing empty LRU but non-zero lru size detection introduced by
ca707239e8a7 ("mm: update_lru_size warn and reset bad lru_size") because
of the inherent zone vs. node discrepancy.

Fixes: f8d1a31163fc ("mm: consider whether to decivate based on eligible zones inactive ratio")
Link: http://lkml.kernel.org/r/20170104100825.3729-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Nils Holland <nholland@tisys.org>
Tested-by: Nils Holland <nholland@tisys.org>
Reported-by: Klaus Ethgen <Klaus@Ethgen.de>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++++++++---
 include/linux/mm_inline.h  |  2 +-
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 61d20c17f3b7..254698856b8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter {
  */
 struct mem_cgroup_per_node {
 	struct lruvec		lruvec;
-	unsigned long		lru_size[NR_LRU_LISTS];
+	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 
@@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 
 void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
-		int nr_pages);
+		int zid, int nr_pages);
 
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask);
@@ -441,9 +441,23 @@ static inline
 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	struct mem_cgroup_per_node *mz;
+	unsigned long nr_pages = 0;
+	int zid;
 
 	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	return mz->lru_size[lru];
+	for (zid = 0; zid < MAX_NR_ZONES; zid++)
+		nr_pages += mz->lru_zone_size[zid][lru];
+	return nr_pages;
+}
+
+static inline
+unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
+		enum lru_list lru, int zone_idx)
+{
+	struct mem_cgroup_per_node *mz;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+	return mz->lru_zone_size[zone_idx][lru];
 }
 
 void mem_cgroup_handle_over_high(void);
@@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	return 0;
 }
+static inline
+unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
+		enum lru_list lru, int zone_idx)
+{
+	return 0;
+}
 
 static inline unsigned long
 mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 71613e8a720f..41d376e7116d 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
 {
 	__update_lru_size(lruvec, lru, zid, nr_pages);
 #ifdef CONFIG_MEMCG
-	mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+	mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
 #endif
 }
 
-- 
cgit v1.2.3


From 8c2dd3e4a4bae78093c4a5cee6494877651be3c9 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 10 Jan 2017 16:58:06 -0800
Subject: mm: rename __alloc_page_frag to page_frag_alloc and __free_page_frag
 to page_frag_free

Patch series "Page fragment updates", v4.

This patch series takes care of a few cleanups for the page fragments
API.

First we do some renames so that things are much more consistent.  First
we move the page_frag_ portion of the name to the front of the functions
names.  Secondly we split out the cache specific functions from the
other page fragment functions by adding the word "cache" to the name.

Finally I added a bit of documentation that will hopefully help to
explain some of this.  I plan to revisit this later as we get things
more ironed out in the near future with the changes planned for the DMA
setup to support eXpress Data Path.

This patch (of 3):

This patch renames the page frag functions to be more consistent with
other APIs.  Specifically we place the name page_frag first in the name
and then have either an alloc or free call name that we append as the
suffix.  This makes it a bit clearer in terms of naming.

In addition we drop the leading double underscores since we are
technically no longer a backing interface and instead the front end that
is called from the networking APIs.

Link: http://lkml.kernel.org/r/20170104023854.13451.67390.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 6 +++---
 include/linux/skbuff.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7806a8f80abc..ed77a86fbbb0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -501,9 +501,9 @@ extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 struct page_frag_cache;
 extern void __page_frag_drain(struct page *page, unsigned int order,
 			      unsigned int count);
-extern void *__alloc_page_frag(struct page_frag_cache *nc,
-			       unsigned int fragsz, gfp_t gfp_mask);
-extern void __free_page_frag(void *addr);
+extern void *page_frag_alloc(struct page_frag_cache *nc,
+			     unsigned int fragsz, gfp_t gfp_mask);
+extern void page_frag_free(void *addr);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cfd417e..a410715bbef8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2480,7 +2480,7 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 
 static inline void skb_free_frag(void *addr)
 {
-	__free_page_frag(addr);
+	page_frag_free(addr);
 }
 
 void *napi_alloc_frag(unsigned int fragsz);
-- 
cgit v1.2.3


From 2976db8018532b624c4123ae662fbc0814877abf Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 10 Jan 2017 16:58:09 -0800
Subject: mm: rename __page_frag functions to __page_frag_cache, drop order
 from drain

This patch does two things.

First it goes through and renames the __page_frag prefixed functions to
__page_frag_cache so that we can be clear that we are draining or
refilling the cache, not the frags themselves.

Second we drop the order parameter from __page_frag_cache_drain since we
don't actually need to pass it since all fragments are either order 0 or
must be a compound page.

Link: http://lkml.kernel.org/r/20170104023954.13451.5678.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed77a86fbbb0..0fe0b6295ab5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -499,8 +499,7 @@ extern void free_hot_cold_page(struct page *page, bool cold);
 extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 
 struct page_frag_cache;
-extern void __page_frag_drain(struct page *page, unsigned int order,
-			      unsigned int count);
+extern void __page_frag_cache_drain(struct page *page, unsigned int count);
 extern void *page_frag_alloc(struct page_frag_cache *nc,
 			     unsigned int fragsz, gfp_t gfp_mask);
 extern void page_frag_free(void *addr);
-- 
cgit v1.2.3


From f05714293a591038304ddae7cb0dd747bb3786cc Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 10 Jan 2017 16:58:15 -0800
Subject: mm: support anonymous stable page

During developemnt for zram-swap asynchronous writeback, I found strange
corruption of compressed page, resulting in:

  Modules linked in: zram(E)
  CPU: 3 PID: 1520 Comm: zramd-1 Tainted: G            E   4.8.0-mm1-00320-ge0d4894c9c38-dirty #3274
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
  task: ffff88007620b840 task.stack: ffff880078090000
  RIP: set_freeobj.part.43+0x1c/0x1f
  RSP: 0018:ffff880078093ca8  EFLAGS: 00010246
  RAX: 0000000000000018 RBX: ffff880076798d88 RCX: ffffffff81c408c8
  RDX: 0000000000000018 RSI: 0000000000000000 RDI: 0000000000000246
  RBP: ffff880078093cb0 R08: 0000000000000000 R09: 0000000000000000
  R10: ffff88005bc43030 R11: 0000000000001df3 R12: ffff880076798d88
  R13: 000000000005bc43 R14: ffff88007819d1b8 R15: 0000000000000001
  FS:  0000000000000000(0000) GS:ffff88007e380000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fc934048f20 CR3: 0000000077b01000 CR4: 00000000000406e0
  Call Trace:
    obj_malloc+0x22b/0x260
    zs_malloc+0x1e4/0x580
    zram_bvec_rw+0x4cd/0x830 [zram]
    page_requests_rw+0x9c/0x130 [zram]
    zram_thread+0xe6/0x173 [zram]
    kthread+0xca/0xe0
    ret_from_fork+0x25/0x30

With investigation, it reveals currently stable page doesn't support
anonymous page.  IOW, reuse_swap_page can reuse the page without waiting
writeback completion so it can overwrite page zram is compressing.

Unfortunately, zram has used per-cpu stream feature from v4.7.
It aims for increasing cache hit ratio of scratch buffer for
compressing. Downside of that approach is that zram should ask
memory space for compressed page in per-cpu context which requires
stricted gfp flag which could be failed. If so, it retries to
allocate memory space out of per-cpu context so it could get memory
this time and compress the data again, copies it to the memory space.

In this scenario, zram assumes the data should never be changed
but it is not true unless stable page supports. So, If the data is
changed under us, zram can make buffer overrun because second
compression size could be bigger than one we got in previous trial
and blindly, copy bigger size object to smaller buffer which is
buffer overrun. The overrun breaks zsmalloc free object chaining
so system goes crash like above.

I think below is same problem.
https://bugzilla.suse.com/show_bug.cgi?id=997574

Unfortunately, reuse_swap_page should be atomic so that we cannot wait on
writeback in there so the approach in this patch is simply return false if
we found it needs stable page.  Although it increases memory footprint
temporarily, it happens rarely and it should be reclaimed easily althoug
it happened.  Also, It would be better than waiting of IO completion,
which is critial path for application latency.

Fixes: da9556a2367c ("zram: user per-cpu compression streams")
Link: http://lkml.kernel.org/r/20161120233015.GA14113@bbox
Link: http://lkml.kernel.org/r/1482366980-3782-2-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Hyeoncheol Lee <cheol.lee@lge.com>
Cc: <yjay.kim@lge.com>
Cc: Sangseok Lee <sangseok.lee@lge.com>
Cc: <stable@vger.kernel.org> [4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 09f4be179ff3..7f47b7098b1b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -150,8 +150,9 @@ enum {
 	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
 	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
 	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
+	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 10),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 11),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
-- 
cgit v1.2.3


From 575b1967e10a1f3038266244d2c7a3ca6b99fed8 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 10 Jan 2017 16:58:30 -0800
Subject: timerfd: export defines to userspace

Since userspace is expected to call timerfd syscalls directly with these
flags/ioctls, make sure we export them so they don't have to duplicate
the values themselves.

Link: http://lkml.kernel.org/r/20161219064052.7196-1-vapier@gentoo.org
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/timerfd.h      | 20 +-------------------
 include/uapi/linux/Kbuild    |  1 +
 include/uapi/linux/timerfd.h | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 19 deletions(-)
 create mode 100644 include/uapi/linux/timerfd.h

(limited to 'include')

diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index bd36ce431e32..bab0b1ad0613 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -8,23 +8,7 @@
 #ifndef _LINUX_TIMERFD_H
 #define _LINUX_TIMERFD_H
 
-/* For O_CLOEXEC and O_NONBLOCK */
-#include <linux/fcntl.h>
-
-/* For _IO helpers */
-#include <linux/ioctl.h>
-
-/*
- * CAREFUL: Check include/asm-generic/fcntl.h when defining
- * new flags, since they might collide with O_* ones. We want
- * to re-use O_* flags that couldn't possibly have a meaning
- * from eventfd, in order to leave a free define-space for
- * shared O_* flags.
- */
-#define TFD_TIMER_ABSTIME (1 << 0)
-#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
-#define TFD_CLOEXEC O_CLOEXEC
-#define TFD_NONBLOCK O_NONBLOCK
+#include <uapi/linux/timerfd.h>
 
 #define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
 /* Flags for timerfd_create.  */
@@ -32,6 +16,4 @@
 /* Flags for timerfd_settime.  */
 #define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
 
-#define TFD_IOC_SET_TICKS	_IOW('T', 0, u64)
-
 #endif /* _LINUX_TIMERFD_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index a8b93e685239..f330ba4547cf 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -414,6 +414,7 @@ header-y += telephony.h
 header-y += termios.h
 header-y += thermal.h
 header-y += time.h
+header-y += timerfd.h
 header-y += times.h
 header-y += timex.h
 header-y += tiocl.h
diff --git a/include/uapi/linux/timerfd.h b/include/uapi/linux/timerfd.h
new file mode 100644
index 000000000000..6fcfaa8da173
--- /dev/null
+++ b/include/uapi/linux/timerfd.h
@@ -0,0 +1,36 @@
+/*
+ *  include/linux/timerfd.h
+ *
+ *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#ifndef _UAPI_LINUX_TIMERFD_H
+#define _UAPI_LINUX_TIMERFD_H
+
+#include <linux/types.h>
+
+/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/fcntl.h>
+
+/* For _IO helpers */
+#include <linux/ioctl.h>
+
+/*
+ * CAREFUL: Check include/asm-generic/fcntl.h when defining
+ * new flags, since they might collide with O_* ones. We want
+ * to re-use O_* flags that couldn't possibly have a meaning
+ * from eventfd, in order to leave a free define-space for
+ * shared O_* flags.
+ *
+ * Also make sure to update the masks in include/linux/timerfd.h
+ * when adding new flags.
+ */
+#define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
+#define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
+
+#define TFD_IOC_SET_TICKS	_IOW('T', 0, __u64)
+
+#endif /* _UAPI_LINUX_TIMERFD_H */
-- 
cgit v1.2.3


From 06f7c88c107fb469f4f1344142e80df5175c6836 Mon Sep 17 00:00:00 2001
From: Beni Lev <beni.lev@intel.com>
Date: Tue, 19 Jul 2016 19:28:56 +0300
Subject: cfg80211: consider VHT opmode on station update

Currently, this attribute is only fetched on station addition, but
not on station change. Since this info is only present in the assoc
request, with full station state support in the driver it cannot be
present when the station is added.

Thus, add support for changing the VHT opmode on station update if
done before (or while) the station is marked as associated. After
this, ignore it, since it used to be ignored.

Signed-off-by: Beni Lev <beni.lev@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6b76e3b0c18e..bea982af9cfb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1772,7 +1772,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode
  *	Notification Element based on association request when used with
- *	%NL80211_CMD_NEW_STATION; u8 attribute.
+ *	%NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when
+ *	%NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS);
+ *	u8 attribute.
  *
  * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if
  *	%NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet)
-- 
cgit v1.2.3


From b6416e61012429e0277bd15a229222fd17afc1c1 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 16 Dec 2016 14:30:35 -0800
Subject: jump_labels: API for flushing deferred jump label updates

Modules that use static_key_deferred need a way to synchronize with
any delayed work that is still pending when the module is unloaded.
Introduce static_key_deferred_flush() which flushes any pending
jump label updates.

Signed-off-by: David Matlack <dmatlack@google.com>
Cc: stable@vger.kernel.org
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/jump_label_ratelimit.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index 089f70f83e97..23da3af459fe 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -14,6 +14,7 @@ struct static_key_deferred {
 
 #ifdef HAVE_JUMP_LABEL
 extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
+extern void static_key_deferred_flush(struct static_key_deferred *key);
 extern void
 jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
@@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 	STATIC_KEY_CHECK_USE();
 	static_key_slow_dec(&key->key);
 }
+static inline void static_key_deferred_flush(struct static_key_deferred *key)
+{
+	STATIC_KEY_CHECK_USE();
+}
 static inline void
 jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
-- 
cgit v1.2.3


From f99e86485cc32cd16e5cc97f9bb0474f28608d84 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 12 Jan 2017 07:58:32 -0700
Subject: block: Rename blk_queue_zone_size and bdev_zone_size

All block device data fields and functions returning a number of 512B
sectors are by convention named xxx_sectors while names in the form
xxx_size are generally used for a number of bytes. The blk_queue_zone_size
and bdev_zone_size functions were not following this convention so rename
them.

No functional change is introduced by this patch.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>

Collapsed the two patches, they were nonsensically split and broke
bisection.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 83695641bd5e..ff3d774f2751 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -739,7 +739,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 	}
 }
 
-static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 {
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
@@ -1536,12 +1536,12 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
 	return false;
 }
 
-static inline unsigned int bdev_zone_size(struct block_device *bdev)
+static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 
 	if (q)
-		return blk_queue_zone_size(q);
+		return blk_queue_zone_sectors(q);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 331c34255293cd02d395b7097008b509ba89e60e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Jan 2017 20:57:22 -0800
Subject: i2c: do not enable fall back to Host Notify by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Falling back unconditionally to HostNotify as primary client's interrupt
breaks some drivers which alter their functionality depending on whether
interrupt is present or not, so let's introduce a board flag telling I2C
core explicitly if we want wired interrupt or HostNotify-based one:
I2C_CLIENT_HOST_NOTIFY.

For DT-based systems we introduce "host-notify" property that we convert
to I2C_CLIENT_HOST_NOTIFY board flag.

Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b2109c522dec..4b45ec46161f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -665,6 +665,7 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
 					/* Must equal I2C_M_TEN below */
 #define I2C_CLIENT_SLAVE	0x20	/* we are the slave */
+#define I2C_CLIENT_HOST_NOTIFY	0x40	/* We want to use I2C host notify */
 #define I2C_CLIENT_WAKE		0x80	/* for board_info; true iff can wake */
 #define I2C_CLIENT_SCCB		0x9000	/* Use Omnivision SCCB protocol */
 					/* Must match I2C_M_STOP|IGNORE_NAK */
-- 
cgit v1.2.3


From 3846fd9b86001bea171943cc3bb9222cb6da6b42 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 11 Jan 2017 10:01:17 +0100
Subject: drm/probe-helpers: Drop locking from poll_enable

It was only needed to protect the connector_list walking, see

commit 8c4ccc4ab6f64e859d4ff8d7c02c2ed2e956e07f
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Jul 9 23:44:26 2015 +0200

    drm/probe-helper: Grab mode_config.mutex in poll_init/enable

Unfortunately the commit message of that patch fails to mention that
the new locking check was for the connector_list.

But that requirement disappeared in

commit c36a3254f7857f1ad9badbe3578ccc92be541a8e
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Dec 15 16:58:43 2016 +0100

    drm: Convert all helpers to drm_connector_list_iter

and so we can drop this again.

This fixes a locking inversion on nouveau, where the rpm code needs to
re-enable. But in other places the rpm_get() calls are nested within
the big modeset locks.

While at it, also improve the kerneldoc for these two functions a
notch.

v2: Update the kerneldoc even more to explain that these functions
can't be called concurrently, or bad things happen (Chris).

Cc: Dave Airlie <airlied@gmail.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Lyude <lyude@redhat.com>
Reviewed-by: Lyude <lyude@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170111090117.5134-1-daniel.vetter@ffwll.ch
---
 include/drm/drm_crtc_helper.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 982c299e435a..d026f5017c33 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -73,6 +73,5 @@ extern void drm_kms_helper_hotplug_event(struct drm_device *dev);
 
 extern void drm_kms_helper_poll_disable(struct drm_device *dev);
 extern void drm_kms_helper_poll_enable(struct drm_device *dev);
-extern void drm_kms_helper_poll_enable_locked(struct drm_device *dev);
 
 #endif
-- 
cgit v1.2.3


From 546125d1614264d26080817d0c8cddb9b25081fa Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 5 Jan 2017 16:34:51 -0500
Subject: sunrpc: don't call sleeping functions from the notifier block
 callbacks

The inet6addr_chain is an atomic notifier chain, so we can't call
anything that might sleep (like lock_sock)... instead of closing the
socket from svc_age_temp_xprts_now (which is called by the notifier
function), just have the rpc service threads do it instead.

Cc: stable@vger.kernel.org
Fixes: c3d4879e01be "sunrpc: Add a function to close..."
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index e5d193440374..7440290f64ac 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -66,6 +66,7 @@ struct svc_xprt {
 #define XPT_LISTENER	10		/* listening endpoint */
 #define XPT_CACHE_AUTH	11		/* cache auth info */
 #define XPT_LOCAL	12		/* connection from loopback interface */
+#define XPT_KILL_TEMP   13		/* call xpo_kill_temp_xprt before closing */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
-- 
cgit v1.2.3


From 488f94d7212b00a2ec72fb886b155f1b04c5aa98 Mon Sep 17 00:00:00 2001
From: Jintack Lim <jintack@cs.columbia.edu>
Date: Thu, 1 Dec 2016 14:32:05 -0500
Subject: KVM: arm64: Access CNTHCTL_EL2 bit fields correctly on VHE systems

Current KVM world switch code is unintentionally setting wrong bits to
CNTHCTL_EL2 when E2H == 1, which may allow guest OS to access physical
timer.  Bit positions of CNTHCTL_EL2 are changing depending on
HCR_EL2.E2H bit.  EL1PCEN and EL1PCTEN are 1st and 0th bits when E2H is
not set, but they are 11th and 10th bits respectively when E2H is set.

In fact, on VHE we only need to set those bits once, not for every world
switch. This is because the host kernel runs in EL2 with HCR_EL2.TGE ==
1, which makes those bits have no effect for the host kernel execution.
So we just set those bits once for guests, and that's it.

Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_arch_timer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index b717ed9d2b75..5c970ce67949 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -76,4 +76,5 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
 
+void kvm_timer_init_vhe(void);
 #endif
-- 
cgit v1.2.3


From 003c941057eaa868ca6fedd29a274c863167230d Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Thu, 12 Jan 2017 14:24:58 -0800
Subject: tcp: fix tcp_fastopen unaligned access complaints on sparc

Fix up a data alignment issue on sparc by swapping the order
of the cookie byte array field with the length field in
struct tcp_fastopen_cookie, and making it a proper union
to clean up the typecasting.

This addresses log complaints like these:
    log_unaligned: 113 callbacks suppressed
    Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360
    Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360
    Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360
    Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360
    Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fc5848dad7a4..c93f4b3a59cb 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 
 /* TCP Fast Open Cookie as stored in memory */
 struct tcp_fastopen_cookie {
+	union {
+		u8	val[TCP_FASTOPEN_COOKIE_MAX];
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr addr;
+#endif
+	};
 	s8	len;
-	u8	val[TCP_FASTOPEN_COOKIE_MAX];
 	bool	exp;	/* In RFC6994 experimental option format */
 };
 
-- 
cgit v1.2.3


From 2e3258ecfaebace1ceffaa14e0ea94775d54f46f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Jan 2017 12:29:10 +0100
Subject: block: add blk_rq_payload_bytes

Add a helper to calculate the actual data transfer size for special
payload requests.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ff3d774f2751..1ca8e8fd1078 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1000,6 +1000,19 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
+/*
+ * Some commands like WRITE SAME have a payload or data transfer size which
+ * is different from the size of the request.  Any driver that supports such
+ * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
+ * calculate the data transfer size.
+ */
+static inline unsigned int blk_rq_payload_bytes(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return rq->special_vec.bv_len;
+	return blk_rq_bytes(rq);
+}
+
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 						     int op)
 {
-- 
cgit v1.2.3


From 475113d937adfd150eb82b5e2c5507125a68e7af Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 28 Dec 2016 14:31:03 +0100
Subject: perf/x86/intel: Account interrupts for PEBS errors

It's possible to set up PEBS events to get only errors and not
any data, like on SNB-X (model 45) and IVB-EP (model 62)
via 2 perf commands running simultaneously:

    taskset -c 1 ./perf record -c 4 -e branches:pp -j any -C 10

This leads to a soft lock up, because the error path of the
intel_pmu_drain_pebs_nhm() does not account event->hw.interrupt
for error PEBS interrupts, so in case you're getting ONLY
errors you don't have a way to stop the event when it's over
the max_samples_per_tick limit:

  NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [perf_fuzzer:5816]
  ...
  RIP: 0010:[<ffffffff81159232>]  [<ffffffff81159232>] smp_call_function_single+0xe2/0x140
  ...
  Call Trace:
   ? trace_hardirqs_on_caller+0xf5/0x1b0
   ? perf_cgroup_attach+0x70/0x70
   perf_install_in_context+0x199/0x1b0
   ? ctx_resched+0x90/0x90
   SYSC_perf_event_open+0x641/0xf90
   SyS_perf_event_open+0x9/0x10
   do_syscall_64+0x6c/0x1f0
   entry_SYSCALL64_slow_path+0x25/0x25

Add perf_event_account_interrupt() which does the interrupt
and frequency checks and call it from intel_pmu_drain_pebs_nhm()'s
error path.

We keep the pending_kill and pending_wakeup logic only in the
__perf_event_overflow() path, because they make sense only if
there's any data to deliver.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1482931866-6018-2-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
-- 
cgit v1.2.3


From 0100a3e67a9cef64d72cd3a1da86f3ddbee50363 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 12 Dec 2016 18:42:28 -0500
Subject: efi/x86: Prune invalid memory map entries and fix boot regression

Some machines, such as the Lenovo ThinkPad W541 with firmware GNET80WW
(2.28), include memory map entries with phys_addr=0x0 and num_pages=0.

These machines fail to boot after the following commit,

  commit 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()")

Fix this by removing such bogus entries from the memory map.

Furthermore, currently the log output for this case (with efi=debug)
looks like:

 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |  |  |  |  |  ] range=[0x0000000000000000-0xffffffffffffffff] (0MB)

This is clearly wrong, and also not as informative as it could be.  This
patch changes it so that if we find obviously invalid memory map
entries, we print an error and skip those entries.  It also detects the
display of the address range calculation overflow, so the new output is:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[0x0000000000000000-0x0000000000000000] (invalid)

It also detects memory map sizes that would overflow the physical
address, for example phys_addr=0xfffffffffffff000 and
num_pages=0x0200000000000001, and prints:

 [    0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries:
 [    0.000000] efi: mem45: [Reserved           |   |  |  |  |  |  |  |   |  |  |  |  ] range=[phys_addr=0xfffffffffffff000-0x20ffffffffffffffff] (invalid)

It then removes these entries from the memory map.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ardb: refactor for clarity with no functional changes, avoid PAGE_SHIFT]
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
[Matt: Include bugzilla info in commit log]
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191121
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0c5420208c40..5b1af30ece55 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -103,6 +103,7 @@ typedef	struct {
 
 #define EFI_PAGE_SHIFT		12
 #define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
+#define EFI_PAGES_MAX		(U64_MAX >> EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;
-- 
cgit v1.2.3


From 4d22c75d4c7b5c5f4bd31054f09103ee490878fd Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Wed, 11 Jan 2017 13:25:00 -0600
Subject: coredump: Ensure proper size of sparse core files

If the last section of a core file ends with an unmapped or zero page,
the size of the file does not correspond with the last dump_skip() call.
gdb complains that the file is truncated and can be confusing to users.

After all of the vma sections are written, make sure that the file size
is no smaller than the current file position.

This problem can be demonstrated with gdb's bigcore testcase on the
sparc architecture.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/coredump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d016a121a8c4..28ffa94aed6b 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -14,6 +14,7 @@ struct coredump_params;
 extern int dump_skip(struct coredump_params *cprm, size_t nr);
 extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
+extern void dump_truncate(struct coredump_params *cprm);
 #ifdef CONFIG_COREDUMP
 extern void do_coredump(const siginfo_t *siginfo);
 #else
-- 
cgit v1.2.3


From 52d7e48b86fc108e45a656d8e53e4237993c481d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jan 2017 02:28:26 -0800
Subject: rcu: Narrow early boot window of illegal synchronous grace periods

The current preemptible RCU implementation goes through three phases
during bootup.  In the first phase, there is only one CPU that is running
with preemption disabled, so that a no-op is a synchronous grace period.
In the second mid-boot phase, the scheduler is running, but RCU has
not yet gotten its kthreads spawned (and, for expedited grace periods,
workqueues are not yet running.  During this time, any attempt to do
a synchronous grace period will hang the system (or complain bitterly,
depending).  In the third and final phase, RCU is fully operational and
everything works normally.

This has been OK for some time, but there has recently been some
synchronous grace periods showing up during the second mid-boot phase.
This code worked "by accident" for awhile, but started failing as soon
as expedited RCU grace periods switched over to workqueues in commit
8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue").
Note that the code was buggy even before this commit, as it was subject
to failure on real-time systems that forced all expedited grace periods
to run as normal grace periods (for example, using the rcu_normal ksysfs
parameter).  The callchain from the failure case is as follows:

early_amd_iommu_init()
|-> acpi_put_table(ivrs_base);
|-> acpi_tb_put_table(table_desc);
|-> acpi_tb_invalidate_table(table_desc);
|-> acpi_tb_release_table(...)
|-> acpi_os_unmap_memory
|-> acpi_os_unmap_iomem
|-> acpi_os_map_cleanup
|-> synchronize_rcu_expedited

The kernel showing this callchain was built with CONFIG_PREEMPT_RCU=y,
which caused the code to try using workqueues before they were
initialized, which did not go well.

This commit therefore reworks RCU to permit synchronous grace periods
to proceed during this mid-boot phase.  This commit is therefore a
fix to a regression introduced in v4.9, and is therefore being put
forward post-merge-window in v4.10.

This commit sets a flag from the existing rcu_scheduler_starting()
function which causes all synchronous grace periods to take the expedited
path.  The expedited path now checks this flag, using the requesting task
to drive the expedited grace period forward during the mid-boot phase.
Finally, this flag is updated by a core_initcall() function named
rcu_exp_runtime_mode(), which causes the runtime codepaths to be used.

Note that this arrangement assumes that tasks are not sent POSIX signals
(or anything similar) from the time that the first task is spawned
through core_initcall() time.

Fixes: 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue")
Reported-by: "Zheng, Lv" <lv.zheng@intel.com>
Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Stan Kain <stan.kain@gmail.com>
Tested-by: Ivan <waffolz@hotmail.com>
Tested-by: Emanuel Castelo <emanuel.castelo@gmail.com>
Tested-by: Bruno Pesavento <bpesavento@infinito.it>
Tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Frederic Bezies <fredbezies@gmail.com>
Cc: <stable@vger.kernel.org> # 4.9.0-
---
 include/linux/rcupdate.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 321f9ed552a9..01f71e1d2e94 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -444,6 +444,10 @@ bool __rcu_is_watching(void);
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
+#define RCU_SCHEDULER_INACTIVE	0
+#define RCU_SCHEDULER_INIT	1
+#define RCU_SCHEDULER_RUNNING	2
+
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
-- 
cgit v1.2.3


From 4205e4786d0b9fc3b4fec7b1910cf645a0468307 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 10 Jan 2017 14:01:05 +0100
Subject: cpu/hotplug: Provide dynamic range for prepare stage

Mathieu reported that the LTTNG modules are broken as of 4.10-rc1 due to
the removal of the cpu hotplug notifiers.

Usually I don't care much about out of tree modules, but LTTNG is widely
used in distros. There are two ways to solve that:

1) Reserve a hotplug state for LTTNG

2) Add a dynamic range for the prepare states.

While #1 is the simplest solution, #2 is the proper one as we can convert
in tree users, which do not care about ordering, to the dynamic range as
well.

Add a dynamic range which allows LTTNG to request states in the prepare
stage.

Reported-and-tested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Sewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701101353010.3401@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 20bfefbe7594..d936a0021839 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -74,6 +74,8 @@ enum cpuhp_state {
 	CPUHP_ZCOMP_PREPARE,
 	CPUHP_TIMERS_DEAD,
 	CPUHP_MIPS_SOC_PREPARE,
+	CPUHP_BP_PREPARE_DYN,
+	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
 	CPUHP_BRINGUP_CPU,
 	CPUHP_AP_IDLE_DEAD,
 	CPUHP_AP_OFFLINE,
-- 
cgit v1.2.3


From d7f5762c5e532dfe8247ce1bc60d97af27ff8d00 Mon Sep 17 00:00:00 2001
From: Alexander Alemayhu <alexander@alemayhu.com>
Date: Wed, 4 Jan 2017 23:16:41 +0100
Subject: netfilter: nf_tables: fix spelling mistakes

o s/numerice/numeric
o s/opertaor/operator

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 881d49e94569..e3f27e09eb2b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -235,7 +235,7 @@ enum nft_rule_compat_flags {
 /**
  * enum nft_rule_compat_attributes - nf_tables rule compat attributes
  *
- * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32)
+ * @NFTA_RULE_COMPAT_PROTO: numeric value of handled protocol (NLA_U32)
  * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32)
  */
 enum nft_rule_compat_attributes {
@@ -499,7 +499,7 @@ enum nft_bitwise_attributes {
  * enum nft_byteorder_ops - nf_tables byteorder operators
  *
  * @NFT_BYTEORDER_NTOH: network to host operator
- * @NFT_BYTEORDER_HTON: host to network opertaor
+ * @NFT_BYTEORDER_HTON: host to network operator
  */
 enum nft_byteorder_ops {
 	NFT_BYTEORDER_NTOH,
-- 
cgit v1.2.3


From 6443ebc3fdd6f3c766d9442c18be274b3d736050 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 7 Jan 2017 20:49:18 +0800
Subject: netfilter: rpfilter: fix incorrect loopback packet judgment

Currently, we check the existing rtable in PREROUTING hook, if RTCF_LOCAL
is set, we assume that the packet is loopback.

But this assumption is incorrect, for example, a packet encapsulated
in ipsec transport mode was received and routed to local, after
decapsulation, it would be delivered to local again, and the rtable
was not dropped, so RTCF_LOCAL check would trigger. But actually, the
packet was not loopback.

So for these normal loopback packets, we can check whether the in device
is IFF_LOOPBACK or not. For these locally generated broadcast/multicast,
we can check whether the skb->pkt_type is PACKET_LOOPBACK or not.

Finally, there's a subtle difference between nft fib expr and xtables
rpfilter extension, user can add the following nft rule to do strict
rpfilter check:
  # nft add rule x y meta iif eth0 fib saddr . iif oif != eth0 drop

So when the packet is loopback, it's better to store the in device
instead of the LOOPBACK_IFINDEX, otherwise, after adding the above
nft rule, locally generated broad/multicast packets will be dropped
incorrectly.

Fixes: f83a7ea2075c ("netfilter: xt_rpfilter: skip locally generated broadcast/multicast, too")
Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_fib.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index cbedda077db2..5ceb2205e4e3 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -9,6 +9,12 @@ struct nft_fib {
 
 extern const struct nla_policy nft_fib_policy[];
 
+static inline bool
+nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
+{
+	return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
+}
+
 int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
 int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		 const struct nlattr * const tb[]);
-- 
cgit v1.2.3


From f1f7714ea51c56b7163fb1a5acf39c6a204dd758 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 13 Jan 2017 23:38:15 +0100
Subject: bpf: rework prog_digest into prog_tag

Commit 7bd509e311f4 ("bpf: add prog_digest and expose it via
fdinfo/netlink") was recently discussed, partially due to
admittedly suboptimal name of "prog_digest" in combination
with sha1 hash usage, thus inevitably and rightfully concerns
about its security in terms of collision resistance were
raised with regards to use-cases.

The intended use cases are for debugging resp. introspection
only for providing a stable "tag" over the instruction sequence
that both kernel and user space can calculate independently.
It's not usable at all for making a security relevant decision.
So collisions where two different instruction sequences generate
the same tag can happen, but ideally at a rather low rate. The
"tag" will be dumped in hex and is short enough to introspect
in tracepoints or kallsyms output along with other data such
as stack trace, etc. Thus, this patch performs a rename into
prog_tag and truncates the tag to a short output (64 bits) to
make it obvious it's not collision-free.

Should in future a hash or facility be needed with a security
relevant focus, then we can think about requirements, constraints,
etc that would fit to that situation. For now, rework the exposed
parts for the current use cases as long as nothing has been
released yet. Tested on x86_64 and s390x.

Fixes: 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h                | 2 +-
 include/linux/filter.h             | 6 ++++--
 include/uapi/linux/pkt_cls.h       | 2 +-
 include/uapi/linux/tc_act/tc_bpf.h | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f74ae68086dc..05cf951df3fe 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
-int bpf_prog_calc_digest(struct bpf_prog *fp);
+int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a0934e6c9bab..e4eb2546339a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -57,6 +57,8 @@ struct bpf_prog_aux;
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
+#define BPF_TAG_SIZE	8
+
 /* Helper macros for filter block array initializers. */
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@@ -408,7 +410,7 @@ struct bpf_prog {
 	kmemcheck_bitfield_end(meta);
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
-	u32			digest[SHA_DIGEST_WORDS]; /* Program digest */
+	u8			tag[BPF_TAG_SIZE];
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	unsigned int		(*bpf_func)(const void *ctx,
@@ -519,7 +521,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
 	return prog->len * sizeof(struct bpf_insn);
 }
 
-static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog)
+static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
 {
 	return round_up(bpf_prog_insn_size(prog) +
 			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index cb4bcdc58543..a4dcd88ec271 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -397,7 +397,7 @@ enum {
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
 	TCA_BPF_FLAGS_GEN,
-	TCA_BPF_DIGEST,
+	TCA_BPF_TAG,
 	__TCA_BPF_MAX,
 };
 
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index a6b88a6f7f71..975b50dc8d1d 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -27,7 +27,7 @@ enum {
 	TCA_ACT_BPF_FD,
 	TCA_ACT_BPF_NAME,
 	TCA_ACT_BPF_PAD,
-	TCA_ACT_BPF_DIGEST,
+	TCA_ACT_BPF_TAG,
 	__TCA_ACT_BPF_MAX,
 };
 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
-- 
cgit v1.2.3


From 67d35e70af9cabb663c827e03bc5c1e89b43db72 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 13 Jan 2017 11:40:01 +0800
Subject: scsi: libfc: Fix variable name in fc_set_wwpn

The parameter name should be wwpn instead of wwnn.

Signed-off-by: Fam Zheng <famz@redhat.com>
Acked-by: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libfc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 96dd0b3f70d7..da5033dd8cbc 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -809,11 +809,11 @@ static inline void fc_set_wwnn(struct fc_lport *lport, u64 wwnn)
 /**
  * fc_set_wwpn() - Set the World Wide Port Name of a local port
  * @lport: The local port whose WWPN is to be set
- * @wwnn:  The new WWPN
+ * @wwpn:  The new WWPN
  */
-static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwnn)
+static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwpn)
 {
-	lport->wwpn = wwnn;
+	lport->wwpn = wwpn;
 }
 
 /**
-- 
cgit v1.2.3


From 7e9081c5aac73b8a0bc22e0b3e7a12c3e9cf5256 Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.com>
Date: Fri, 13 Jan 2017 12:22:09 -0200
Subject: drm/fence: fix memory overwrite when setting out_fence fd

Currently if the userspace declares a int variable to store the out_fence
fd and pass it to OUT_FENCE_PTR the kernel will overwrite the 32 bits
above the int variable on 64 bits systems.

Fix this by making the internal storage of out_fence in the kernel a s32
pointer.

Reported-by: Chad Versace <chadversary@chromium.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Fixes: beaf5af48034 ("drm/fence: add out-fences support")
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rafael Antognolli <rafael.antognolli@intel.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-and-Tested-by: Chad Versace <chadversary@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/1484317329-9293-1-git-send-email-gustavo@padovan.org
---
 include/drm/drm_atomic.h      | 2 +-
 include/drm/drm_mode_config.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index d6d241f63b9f..56814e8ae7ea 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -144,7 +144,7 @@ struct __drm_crtcs_state {
 	struct drm_crtc *ptr;
 	struct drm_crtc_state *state;
 	struct drm_crtc_commit *commit;
-	s64 __user *out_fence_ptr;
+	s32 __user *out_fence_ptr;
 };
 
 struct __drm_connnectors_state {
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index bf9991b20611..137432386310 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -488,7 +488,7 @@ struct drm_mode_config {
 	/**
 	 * @prop_out_fence_ptr: Sync File fd pointer representing the
 	 * outgoing fences for a CRTC. Userspace should provide a pointer to a
-	 * value of type s64, and then cast that pointer to u64.
+	 * value of type s32, and then cast that pointer to u64.
 	 */
 	struct drm_property *prop_out_fence_ptr;
 	/**
-- 
cgit v1.2.3


From 5eb7c0d04f04a667c049fe090a95494a8de2955c Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 1 Jan 2017 20:25:25 -0600
Subject: taint/module: Fix problems when out-of-kernel driver defines true or
 false

Commit 7fd8329ba502 ("taint/module: Clean up global and module taint
flags handling") used the key words true and false as character members
of a new struct. These names cause problems when out-of-kernel modules
such as VirtualBox include their own definitions of true and false.

Fixes: 7fd8329ba502 ("taint/module: Clean up global and module taint flags handling")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 56aec84237ad..cb09238f6d32 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -514,8 +514,8 @@ extern enum system_states {
 #define TAINT_FLAGS_COUNT		16
 
 struct taint_flag {
-	char true;	/* character printed when tainted */
-	char false;	/* character printed when not tainted */
+	char c_true;	/* character printed when tainted */
+	char c_false;	/* character printed when not tainted */
 	bool module;	/* also show as a per-module taint flag */
 };
 
-- 
cgit v1.2.3


From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001
From: Rolf Neugebauer <rolf.neugebauer@docker.com>
Date: Tue, 17 Jan 2017 18:13:51 +0000
Subject: virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit

This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a
subtle change in how the virtio_net flags are derived from the SKBs
ip_summed field.

With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID
when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to
ip_summed == CHECKSUM_NONE, which should be the same.

Further, the virtio spec 1.0 / CS04 explicitly says that
VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver.

Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb")
Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 66204007d7ac..56436472ccc7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 				skb_checksum_start_offset(skb));
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
-	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
 
 	return 0;
-- 
cgit v1.2.3


From 9ed59592e3e379b2e9557dc1d9e9ec8fcbb33f16 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 17 Jan 2017 14:57:36 -0800
Subject: lwtunnel: fix autoload of lwt modules

Trying to add an mpls encap route when the MPLS modules are not loaded
hangs. For example:

    CONFIG_MPLS=y
    CONFIG_NET_MPLS_GSO=m
    CONFIG_MPLS_ROUTING=m
    CONFIG_MPLS_IPTUNNEL=m

    $ ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2

The ip command hangs:
root       880   826  0 21:25 pts/0    00:00:00 ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2

    $ cat /proc/880/stack
    [<ffffffff81065a9b>] call_usermodehelper_exec+0xd6/0x134
    [<ffffffff81065efc>] __request_module+0x27b/0x30a
    [<ffffffff814542f6>] lwtunnel_build_state+0xe4/0x178
    [<ffffffff814aa1e4>] fib_create_info+0x47f/0xdd4
    [<ffffffff814ae451>] fib_table_insert+0x90/0x41f
    [<ffffffff814a8010>] inet_rtm_newroute+0x4b/0x52
    ...

modprobe is trying to load rtnl-lwt-MPLS:

root       881     5  0 21:25 ?        00:00:00 /sbin/modprobe -q -- rtnl-lwt-MPLS

and it hangs after loading mpls_router:

    $ cat /proc/881/stack
    [<ffffffff81441537>] rtnl_lock+0x12/0x14
    [<ffffffff8142ca2a>] register_netdevice_notifier+0x16/0x179
    [<ffffffffa0033025>] mpls_init+0x25/0x1000 [mpls_router]
    [<ffffffff81000471>] do_one_initcall+0x8e/0x13f
    [<ffffffff81119961>] do_init_module+0x5a/0x1e5
    [<ffffffff810bd070>] load_module+0x13bd/0x17d6
    ...

The problem is that lwtunnel_build_state is called with rtnl lock
held preventing mpls_init from registering.

Given the potential references held by the time lwtunnel_build_state it
can not drop the rtnl lock to the load module. So, extract the module
loading code from lwtunnel_build_state into a new function to validate
the encap type. The new function is called while converting the user
request into a fib_config which is well before any table, device or
fib entries are examined.

Fixes: 745041e2aaf1 ("lwtunnel: autoload of lwt modules")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index d4c1c75b8862..0b585f1fd340 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -105,6 +105,8 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
 			   unsigned int num);
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
 			   unsigned int num);
+int lwtunnel_valid_encap_type(u16 encap_type);
+int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len);
 int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
 			 struct nlattr *encap,
 			 unsigned int family, const void *cfg,
@@ -168,6 +170,15 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
 	return -EOPNOTSUPP;
 }
 
+static inline int lwtunnel_valid_encap_type(u16 encap_type)
+{
+	return -EOPNOTSUPP;
+}
+static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
 				       struct nlattr *encap,
 				       unsigned int family, const void *cfg,
-- 
cgit v1.2.3


From d407bd25a204bd66b7346dde24bd3d37ef0e0b05 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 18 Jan 2017 15:14:17 +0100
Subject: bpf: don't trigger OOM killer under pressure with map alloc

This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(),
that are to be used for map allocations. Using kmalloc() for very large
allocations can cause excessive work within the page allocator, so i) fall
back earlier to vmalloc() when the attempt is considered costly anyway,
and even more importantly ii) don't trigger OOM killer with any of the
allocators.

Since this is based on a user space request, for example, when creating
maps with element pre-allocation, we really want such requests to fail
instead of killing other user space processes.

Also, don't spam the kernel log with warnings should any of the allocations
fail under pressure. Given that, we can make backend selection in
bpf_map_area_alloc() generic, and convert all maps over to use this API
for spots with potentially large allocation requests.

Note, replacing the one kmalloc_array() is fine as overflow checks happen
earlier in htab_map_alloc(), since it must also protect the multiplication
for vmalloc() should kmalloc_array() fail.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05cf951df3fe..3ed1f3b1d594 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -247,6 +247,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
+void *bpf_map_area_alloc(size_t size);
+void bpf_map_area_free(void *base);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
-- 
cgit v1.2.3


From 739e6f5945d88dcee01590913f6886132a10c215 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 11 Jan 2017 13:37:07 +0100
Subject: gpio: provide lockdep keys for nested/unnested irqchips

The helper function for adding a GPIO chip compiles in a lockdep
key for debugging, the same key is needed for nested chips as
well.

The macro construction is unreadable, replace this with two
static inlines instead.

The _gpiochip_irqchip_add prefixed function is not helpful,
rename it with gpiochip_irqchip_add_key() that tell us what the
function is actually doing.

Fixes: d245b3f9bd36 ("gpio: simplify adding threaded interrupts")
Cc: Roger Quadros <rogerq@ti.com>
Reported-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Reported-by: Roger Quadros <rogerq@ti.com>
Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 70 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c2748accea71..e973faba69dc 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -274,37 +274,67 @@ void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
 		struct irq_chip *irqchip,
 		int parent_irq);
 
-int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
+			     struct irq_chip *irqchip,
+			     unsigned int first_irq,
+			     irq_flow_handler_t handler,
+			     unsigned int type,
+			     bool nested,
+			     struct lock_class_key *lock_key);
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Lockdep requires that each irqchip instance be created with a
+ * unique key so as to avoid unnecessary warnings. This upfront
+ * boilerplate static inlines provides such a key for each
+ * unique instance.
+ */
+static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+				       struct irq_chip *irqchip,
+				       unsigned int first_irq,
+				       irq_flow_handler_t handler,
+				       unsigned int type)
+{
+	static struct lock_class_key key;
+
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, false, &key);
+}
+
+static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  struct irq_chip *irqchip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
-			  unsigned int type,
-			  bool nested,
-			  struct lock_class_key *lock_key);
+			  unsigned int type)
+{
+
+	static struct lock_class_key key;
+
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, true, &key);
+}
+#else
+static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+				       struct irq_chip *irqchip,
+				       unsigned int first_irq,
+				       irq_flow_handler_t handler,
+				       unsigned int type)
+{
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, false, NULL);
+}
 
-/* FIXME: I assume threaded IRQchips do not have the lockdep problem */
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  struct irq_chip *irqchip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
 			  unsigned int type)
 {
-	return _gpiochip_irqchip_add(gpiochip, irqchip, first_irq,
-				     handler, type, true, NULL);
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, true, NULL);
 }
-
-#ifdef CONFIG_LOCKDEP
-#define gpiochip_irqchip_add(...)				\
-(								\
-	({							\
-		static struct lock_class_key _key;		\
-		_gpiochip_irqchip_add(__VA_ARGS__, false, &_key); \
-	})							\
-)
-#else
-#define gpiochip_irqchip_add(...)				\
-	_gpiochip_irqchip_add(__VA_ARGS__, false, NULL)
-#endif
+#endif /* CONFIG_LOCKDEP */
 
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
 
-- 
cgit v1.2.3


From 579b2a65d245c093d3e63845c320b9321f112b75 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:08 +0530
Subject: iommu: add IOMMU_PRIV attribute

Add the IOMMU_PRIV attribute, which is used to indicate privileged
mappings.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..69e2417a2965 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,13 @@
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC	(1 << 3)
 #define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */
+/*
+ * This is to make the IOMMU API setup privileged
+ * mapppings accessible by the master only at higher
+ * privileged execution level and inaccessible at
+ * less privileged levels.
+ */
+#define IOMMU_PRIV	(1 << 5)
 
 struct iommu_ops;
 struct iommu_group;
-- 
cgit v1.2.3


From b2fb366425ceb85dca56afa538257ec5a2c4f6d1 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:11 +0530
Subject: common: DMA-mapping: add DMA_ATTR_PRIVILEGED attribute

This patch adds the DMA_ATTR_PRIVILEGED attribute to the DMA-mapping
subsystem.

Some advanced peripherals such as remote processors and GPUs perform
accesses to DMA buffers in both privileged "supervisor" and unprivileged
"user" modes.  This attribute is used to indicate to the DMA-mapping
subsystem that the buffer is fully accessible at the elevated privilege
level (and ideally inaccessible or at least read-only at the
lesser-privileged levels).

Cc: linux-doc@vger.kernel.org
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-mapping.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 10c5a17b1f51..c24721a33b4c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -62,6 +62,13 @@
  */
 #define DMA_ATTR_NO_WARN	(1UL << 8)
 
+/*
+ * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
+ * accessible at an elevated privilege level (and ideally inaccessible or
+ * at least read-only at lesser-privileged levels).
+ */
+#define DMA_ATTR_PRIVILEGED		(1UL << 9)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
-- 
cgit v1.2.3


From 737c85ca1c3af4f97acb61cd53415ec039b31111 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:12 +0530
Subject: arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED

The newly added DMA_ATTR_PRIVILEGED is useful for creating mappings that
are only accessible to privileged DMA engines.  Implement it in
dma-iommu.c so that the ARM64 DMA IOMMU mapper can make use of it.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 7f7e9a7e3839..c5511e1d5560 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -34,7 +34,8 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		u64 size, struct device *dev);
 
 /* General helpers for DMA-API <-> IOMMU-API interaction */
-int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
+int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
+		     unsigned long attrs);
 
 /*
  * These implement the bulk of the relevant DMA mapping callbacks, but require
-- 
cgit v1.2.3


From e326ce013a8e851193eb337aafb1aa396c533a61 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 20 Jan 2017 03:25:34 +0100
Subject: Revert "PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag"

Revert commit 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0
flag) as it caused system suspend (in the default configuration) to fail
on Dell XPS13 (9360) with the Kaby Lake processor.

Fixes: 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag)
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0c729c3c8549..d9718378a8be 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -194,8 +194,6 @@ struct platform_freeze_ops {
 };
 
 #ifdef CONFIG_SUSPEND
-extern suspend_state_t mem_sleep_default;
-
 /**
  * suspend_set_ops - set platform dependent suspend operations
  * @ops: The new suspend operations to set.
-- 
cgit v1.2.3


From 6391a4481ba0796805d6581e42f9f0418c099e34 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 20 Jan 2017 14:32:42 +0800
Subject: virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving

Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on
xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too,
fixing this by adding a hint (has_data_valid) and set it only on the
receiving path.

Cc: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 56436472ccc7..5209b5ed2a64 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 
 static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
-					  bool little_endian)
+					  bool little_endian,
+					  bool has_data_valid)
 {
 	memset(hdr, 0, sizeof(*hdr));   /* no info leak */
 
@@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 				skb_checksum_start_offset(skb));
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
+	} else if (has_data_valid &&
+		   skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
 
 	return 0;
-- 
cgit v1.2.3


From fdbe574eb69312a7fbe09674d69c01b80e4ed9dc Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 19 Jan 2017 20:57:46 +0000
Subject: iommu/dma: Allow MSI-only cookies

IOMMU domain users such as VFIO face a similar problem to DMA API ops
with regard to mapping MSI messages in systems where the MSI write is
subject to IOMMU translation. With the relevant infrastructure now in
place for managed DMA domains, it's actually really simple for other
users to piggyback off that and reap the benefits without giving up
their own IOVA management, and without having to reinvent their own
wheel in the MSI layer.

Allow such users to opt into automatic MSI remapping by dedicating a
region of their IOVA space to a managed cookie, and extend the mapping
routine to implement a trivial linear allocator in such cases, to avoid
the needless overhead of a full-blown IOVA domain.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 7f7e9a7e3839..28df844a23b6 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -27,6 +27,7 @@ int iommu_dma_init(void);
 
 /* Domain management interface for IOMMU drivers */
 int iommu_get_dma_cookie(struct iommu_domain *domain);
+int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
@@ -86,6 +87,11 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
 	return -ENODEV;
 }
 
+static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
+{
+	return -ENODEV;
+}
+
 static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
 }
-- 
cgit v1.2.3


From e5b5234a36ca283158721d3d2e0cddfa324abdf9 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:47 +0000
Subject: iommu: Rename iommu_dm_regions into iommu_resv_regions

We want to extend the callbacks used for dm regions and
use them for reserved regions. Reserved regions can be
- directly mapped regions
- regions that cannot be iommu mapped (PCI host bridge windows, ...)
- MSI regions (because they belong to another address space or because
  they are not translated by the IOMMU and need special handling)

So let's rename the struct and also the callbacks.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..bfecb8b74078 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -118,13 +118,13 @@ enum iommu_attr {
 };
 
 /**
- * struct iommu_dm_region - descriptor for a direct mapped memory region
+ * struct iommu_resv_region - descriptor for a reserved memory region
  * @list: Linked list pointers
  * @start: System physical start address of the region
  * @length: Length of the region in bytes
  * @prot: IOMMU Protection flags (READ/WRITE/...)
  */
-struct iommu_dm_region {
+struct iommu_resv_region {
 	struct list_head	list;
 	phys_addr_t		start;
 	size_t			length;
@@ -150,9 +150,9 @@ struct iommu_dm_region {
  * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
- * @get_dm_regions: Request list of direct mapping requirements for a device
- * @put_dm_regions: Free list of direct mapping requirements for a device
- * @apply_dm_region: Temporary helper call-back for iova reserved ranges
+ * @get_resv_regions: Request list of reserved regions for a device
+ * @put_resv_regions: Free list of reserved regions for a device
+ * @apply_resv_region: Temporary helper call-back for iova reserved ranges
  * @domain_window_enable: Configure and enable a particular window for a domain
  * @domain_window_disable: Disable a particular window for a domain
  * @domain_set_windows: Set the number of windows for a domain
@@ -184,11 +184,12 @@ struct iommu_ops {
 	int (*domain_set_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
 
-	/* Request/Free a list of direct mapping requirements for a device */
-	void (*get_dm_regions)(struct device *dev, struct list_head *list);
-	void (*put_dm_regions)(struct device *dev, struct list_head *list);
-	void (*apply_dm_region)(struct device *dev, struct iommu_domain *domain,
-				struct iommu_dm_region *region);
+	/* Request/Free a list of reserved regions for a device */
+	void (*get_resv_regions)(struct device *dev, struct list_head *list);
+	void (*put_resv_regions)(struct device *dev, struct list_head *list);
+	void (*apply_resv_region)(struct device *dev,
+				  struct iommu_domain *domain,
+				  struct iommu_resv_region *region);
 
 	/* Window handling functions */
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
@@ -233,8 +234,8 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
-extern void iommu_get_dm_regions(struct device *dev, struct list_head *list);
-extern void iommu_put_dm_regions(struct device *dev, struct list_head *list);
+extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
+extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
@@ -443,12 +444,12 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
-static inline void iommu_get_dm_regions(struct device *dev,
+static inline void iommu_get_resv_regions(struct device *dev,
 					struct list_head *list)
 {
 }
 
-static inline void iommu_put_dm_regions(struct device *dev,
+static inline void iommu_put_resv_regions(struct device *dev,
 					struct list_head *list)
 {
 }
-- 
cgit v1.2.3


From d30ddcaa7b028049cdfee3a40248002d07b2bbf3 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:48 +0000
Subject: iommu: Add a new type field in iommu_resv_region

We introduce a new field to differentiate the reserved region
types and specialize the apply_resv_region implementation.

Legacy direct mapped regions have IOMMU_RESV_DIRECT type.
We introduce 2 new reserved memory types:
- IOMMU_RESV_MSI will characterize MSI regions that are mapped
- IOMMU_RESV_RESERVED characterize regions that cannot by mapped.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bfecb8b74078..233a6bf093bf 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -117,18 +117,25 @@ enum iommu_attr {
 	DOMAIN_ATTR_MAX,
 };
 
+/* These are the possible reserved region types */
+#define IOMMU_RESV_DIRECT	(1 << 0)
+#define IOMMU_RESV_RESERVED	(1 << 1)
+#define IOMMU_RESV_MSI		(1 << 2)
+
 /**
  * struct iommu_resv_region - descriptor for a reserved memory region
  * @list: Linked list pointers
  * @start: System physical start address of the region
  * @length: Length of the region in bytes
  * @prot: IOMMU Protection flags (READ/WRITE/...)
+ * @type: Type of the reserved region
  */
 struct iommu_resv_region {
 	struct list_head	list;
 	phys_addr_t		start;
 	size_t			length;
 	int			prot;
+	int			type;
 };
 
 #ifdef CONFIG_IOMMU_API
-- 
cgit v1.2.3


From 2b20cbba3390a55c511acba2f0f517dd27a528b2 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:49 +0000
Subject: iommu: iommu_alloc_resv_region

Introduce a new helper serving the purpose to allocate a reserved
region. This will be used in iommu driver implementing reserved
region callbacks.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 233a6bf093bf..f6bb55d3e606 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -244,6 +244,8 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain,
 extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
+extern struct iommu_resv_region *
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
-- 
cgit v1.2.3


From 6c65fb318e8bbf21e939e651028b955324f1d873 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:51 +0000
Subject: iommu: iommu_get_group_resv_regions

Introduce iommu_get_group_resv_regions whose role consists in
enumerating all devices from the group and collecting their
reserved regions. The list is sorted and overlaps between
regions of the same type are handled by merging the regions.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f6bb55d3e606..bec3730dc009 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -246,6 +246,8 @@ extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern struct iommu_resv_region *
 iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+extern int iommu_get_group_resv_regions(struct iommu_group *group,
+					struct list_head *head);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
@@ -463,6 +465,12 @@ static inline void iommu_put_resv_regions(struct device *dev,
 {
 }
 
+static inline int iommu_get_group_resv_regions(struct iommu_group *group,
+					       struct list_head *head)
+{
+	return -ENODEV;
+}
+
 static inline int iommu_request_dm_for_dev(struct device *dev)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 631a9639ac413da6242cb15558ebd661cf633622 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:57 +0000
Subject: irqdomain: Add irq domain MSI and MSI_REMAP flags

We introduce two new enum values for the irq domain flag:
- IRQ_DOMAIN_FLAG_MSI indicates the irq domain corresponds to
  an MSI domain
- IRQ_DOMAIN_FLAG_MSI_REMAP indicates the irq domain has MSI
  remapping capabilities.

Those values will be useful to check all MSI irq domains have
MSI remapping support when assessing the safety of IRQ assignment
to a guest.

irq_domain_hierarchical_is_msi_remap() allows to check if an
irq domain or any parent implements MSI remapping.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/irqdomain.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index ffb84604c1de..bc2f5719dace 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -183,6 +183,12 @@ enum {
 	/* Irq domain is an IPI domain with single virq */
 	IRQ_DOMAIN_FLAG_IPI_SINGLE	= (1 << 3),
 
+	/* Irq domain implements MSIs */
+	IRQ_DOMAIN_FLAG_MSI		= (1 << 4),
+
+	/* Irq domain implements MSI remapping */
+	IRQ_DOMAIN_FLAG_MSI_REMAP	= (1 << 5),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -446,6 +452,19 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
 {
 	return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
 }
+
+static inline bool irq_domain_is_msi(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_MSI;
+}
+
+static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
+}
+
+extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
+
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 static inline void irq_domain_activate_irq(struct irq_data *data) { }
 static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
@@ -477,6 +496,22 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
 {
 	return false;
 }
+
+static inline bool irq_domain_is_msi(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool
+irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
+{
+	return false;
+}
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #else /* CONFIG_IRQ_DOMAIN */
-- 
cgit v1.2.3


From c7b41f0af38f53e46050b56a5b0e96710097b83c Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:59 +0000
Subject: irqdomain: irq_domain_check_msi_remap

This new function checks whether all MSI irq domains
implement IRQ remapping. This is useful to understand
whether VFIO passthrough is safe with respect to interrupts.

On ARM typically an MSI controller can sit downstream
to the IOMMU without preventing VFIO passthrough.
As such any assigned device can write into the MSI doorbell.
In case the MSI controller implements IRQ remapping, assigned
devices will not be able to trigger interrupts towards the
host. On the contrary, the assignment must be emphasized as
unsafe with respect to interrupts.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bc2f5719dace..188eced6813e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -222,6 +222,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 					 void *host_data);
 extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 						   enum irq_domain_bus_token bus_token);
+extern bool irq_domain_check_msi_remap(void);
 extern void irq_set_default_host(struct irq_domain *host);
 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
 				  irq_hw_number_t hwirq, int node,
-- 
cgit v1.2.3


From b1a27eac7fefff33ccf6acc919fc0725bf9815fb Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Sun, 22 Jan 2017 14:41:22 +0100
Subject: IB/cxgb3: fix misspelling in header guard

Use CXGB3_... instead of CXBG3_...

Fixes: a85fb3383340 ("IB/cxgb3: Move user vendor structures")
Cc: stable@vger.kernel.org # 4.9
Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Steve Wise <swise@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/cxgb3-abi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h
index 48a19bda071b..d24eee12128f 100644
--- a/include/uapi/rdma/cxgb3-abi.h
+++ b/include/uapi/rdma/cxgb3-abi.h
@@ -30,7 +30,7 @@
  * SOFTWARE.
  */
 #ifndef CXGB3_ABI_USER_H
-#define CXBG3_ABI_USER_H
+#define CXGB3_ABI_USER_H
 
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From 059aa734824165507c65fd30a55ff000afd14983 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 22 Jan 2017 14:04:29 -0500
Subject: nfs: Don't increment lock sequence ID after NFS4ERR_MOVED

Xuan Qi reports that the Linux NFSv4 client failed to lock a file
that was migrated. The steps he observed on the wire:

1. The client sent a LOCK request to the source server
2. The source server replied NFS4ERR_MOVED
3. The client switched to the destination server
4. The client sent the same LOCK request to the destination
   server with a bumped lock sequence ID
5. The destination server rejected the LOCK request with
   NFS4ERR_BAD_SEQID

RFC 3530 section 8.1.5 provides a list of NFS errors which do not
bump a lock sequence ID.

However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section
9.1.7, this list has been updated by the addition of NFS4ERR_MOVED.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # v3.7+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs4.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bca536341d1a..1b1ca04820a3 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -282,7 +282,7 @@ enum nfsstat4 {
 
 static inline bool seqid_mutating_err(u32 err)
 {
-	/* rfc 3530 section 8.1.5: */
+	/* See RFC 7530, section 9.1.7 */
 	switch (err) {
 	case NFS4ERR_STALE_CLIENTID:
 	case NFS4ERR_STALE_STATEID:
@@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err)
 	case NFS4ERR_BADXDR:
 	case NFS4ERR_RESOURCE:
 	case NFS4ERR_NOFILEHANDLE:
+	case NFS4ERR_MOVED:
 		return false;
 	};
 	return true;
-- 
cgit v1.2.3


From 517e7610d2ce04d1b8d8b6c6d1a36dcce5cac6ab Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 19 Jan 2017 17:05:00 -0800
Subject: ARCv2: MCIP: update the BCR per current changes

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 include/soc/arc/mcip.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h
index 6902c2a8bd23..4b6b489a8d7c 100644
--- a/include/soc/arc/mcip.h
+++ b/include/soc/arc/mcip.h
@@ -55,17 +55,17 @@ struct mcip_cmd {
 
 struct mcip_bcr {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad3:8,
-			     idu:1, llm:1, num_cores:6,
-			     iocoh:1,  gfrc:1, dbg:1, pad2:1,
-			     msg:1, sem:1, ipi:1, pad:1,
+		unsigned int pad4:6, pw_dom:1, pad3:1,
+			     idu:1, pad2:1, num_cores:6,
+			     pad:1,  gfrc:1, dbg:1, pw:1,
+			     msg:1, sem:1, ipi:1, slv:1,
 			     ver:8;
 #else
 		unsigned int ver:8,
-			     pad:1, ipi:1, sem:1, msg:1,
-			     pad2:1, dbg:1, gfrc:1, iocoh:1,
-			     num_cores:6, llm:1, idu:1,
-			     pad3:8;
+			     slv:1, ipi:1, sem:1, msg:1,
+			     pw:1, dbg:1, gfrc:1, pad:1,
+			     num_cores:6, pad2:1, idu:1,
+			     pad3:1, pw_dom:1, pad4:6;
 #endif
 };
 
-- 
cgit v1.2.3


From c929ea0b910355e1876c64431f3d5802f95b3d75 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 20 Jan 2017 16:48:39 +0800
Subject: SUNRPC: cleanup ida information when removing sunrpc module

After removing sunrpc module, I get many kmemleak information as,
unreferenced object 0xffff88003316b1e0 (size 544):
  comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffffb0cfb58a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffffb03507fe>] kmem_cache_alloc+0x15e/0x1f0
    [<ffffffffb0639baa>] ida_pre_get+0xaa/0x150
    [<ffffffffb0639cfd>] ida_simple_get+0xad/0x180
    [<ffffffffc06054fb>] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd]
    [<ffffffffc0605e1d>] lockd+0x4d/0x270 [lockd]
    [<ffffffffc06061e5>] param_set_timeout+0x55/0x100 [lockd]
    [<ffffffffc06cba24>] svc_defer+0x114/0x3f0 [sunrpc]
    [<ffffffffc06cbbe7>] svc_defer+0x2d7/0x3f0 [sunrpc]
    [<ffffffffc06c71da>] rpc_show_info+0x8a/0x110 [sunrpc]
    [<ffffffffb044a33f>] proc_reg_write+0x7f/0xc0
    [<ffffffffb038e41f>] __vfs_write+0xdf/0x3c0
    [<ffffffffb0390f1f>] vfs_write+0xef/0x240
    [<ffffffffb0392fbd>] SyS_write+0xad/0x130
    [<ffffffffb0d06c37>] entry_SYSCALL_64_fastpath+0x1a/0xa9
    [<ffffffffffffffff>] 0xffffffffffffffff

I found, the ida information (dynamic memory) isn't cleanup.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt")
Cc: stable@vger.kernel.org # v3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 85cc819676e8..333ad11b3dd9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
 void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
 bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
 			const struct sockaddr *sap);
+void rpc_cleanup_clids(void);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From d3f4aadd614c4627244452ad64eaf351179f2c31 Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Mon, 26 Dec 2016 08:40:57 +0200
Subject: RDMA/core: Add the function ib_mtu_int_to_enum

As the functionality to convert the MTU from a number to enum_ib_mtu
is ubiquitous, define a dedicated function and remove the duplicated
code.

Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_verbs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 958a24d8fae7..b567e4452a47 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -352,6 +352,20 @@ static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
 	}
 }
 
+static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
+{
+	if (mtu >= 4096)
+		return IB_MTU_4096;
+	else if (mtu >= 2048)
+		return IB_MTU_2048;
+	else if (mtu >= 1024)
+		return IB_MTU_1024;
+	else if (mtu >= 512)
+		return IB_MTU_512;
+	else
+		return IB_MTU_256;
+}
+
 enum ib_port_state {
 	IB_PORT_NOP		= 0,
 	IB_PORT_DOWN		= 1,
-- 
cgit v1.2.3


From 20f5e10ef8bcf29a915642245b66e5a132e38fc4 Mon Sep 17 00:00:00 2001
From: "Amrani, Ram" <Ram.Amrani@cavium.com>
Date: Tue, 24 Jan 2017 12:01:31 +0200
Subject: RDMA/qedr: Add uapi header qedr-abi.h

Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 82bdf5626859..bb68cb1b04ed 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -16,3 +16,4 @@ header-y += nes-abi.h
 header-y += ocrdma-abi.h
 header-y += hns-abi.h
 header-y += vmw_pvrdma-abi.h
+header-y += qedr-abi.h
-- 
cgit v1.2.3


From 5ce6b04ce96896e8a79e6f60740ced911eaac7a4 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 22 Jan 2017 22:10:32 +0800
Subject: netfilter: nft_log: restrict the log prefix length to 127

First, log prefix will be truncated to NF_LOG_PREFIXLEN-1, i.e. 127,
at nf_log_packet(), so the extra part is useless.

Second, after adding a log rule with a very very long prefix, we will
fail to dump the nft rules after this _special_ one, but acctually,
they do exist. For example:
  # name_65000=$(printf "%0.sQ" {1..65000})
  # nft add rule filter output log prefix "$name_65000"
  # nft add rule filter output counter
  # nft add rule filter output counter
  # nft list chain filter output
  table ip filter {
      chain output {
          type filter hook output priority 0; policy accept;
      }
  }

So now, restrict the log prefix length to NF_LOG_PREFIXLEN-1.

Fixes: 96518518cc41 ("netfilter: add nftables")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_log.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h
index 8be21e02387d..d0b5fa91ff54 100644
--- a/include/uapi/linux/netfilter/nf_log.h
+++ b/include/uapi/linux/netfilter/nf_log.h
@@ -9,4 +9,6 @@
 #define NF_LOG_MACDECODE	0x20	/* Decode MAC header */
 #define NF_LOG_MASK		0x2f
 
+#define NF_LOG_PREFIXLEN	128
+
 #endif /* _NETFILTER_NF_LOG_H */
-- 
cgit v1.2.3


From de70185de0333783154863278ac87bfbbc54e384 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 24 Jan 2017 00:51:41 +0100
Subject: netfilter: nf_tables: deconstify walk callback function

The flush operation needs to modify set and element objects, so let's
deconstify this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 924325c46aab..7dfdb517f0be 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -207,9 +207,9 @@ struct nft_set_iter {
 	unsigned int	skip;
 	int		err;
 	int		(*fn)(const struct nft_ctx *ctx,
-			      const struct nft_set *set,
+			      struct nft_set *set,
 			      const struct nft_set_iter *iter,
-			      const struct nft_set_elem *elem);
+			      struct nft_set_elem *elem);
 };
 
 /**
@@ -301,7 +301,7 @@ struct nft_set_ops {
 	void				(*remove)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
 	void				(*walk)(const struct nft_ctx *ctx,
-						const struct nft_set *set,
+						struct nft_set *set,
 						struct nft_set_iter *iter);
 
 	unsigned int			(*privsize)(const struct nlattr * const nla[]);
-- 
cgit v1.2.3


From 88ff7334f25909802140e690c0e16433e485b0a0 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Tue, 24 Jan 2017 16:26:47 +0000
Subject: net: Specify the owning module for lwtunnel ops

Modules implementing lwtunnel ops should not be allowed to unload
while there is state alive using those ops, so specify the owning
module for all lwtunnel ops.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 0b585f1fd340..73dd87647460 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -44,6 +44,8 @@ struct lwtunnel_encap_ops {
 	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
 	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
 	int (*xmit)(struct sk_buff *skb);
+
+	struct module *owner;
 };
 
 #ifdef CONFIG_LWTUNNEL
-- 
cgit v1.2.3


From 8a1f780e7f28c7c1d640118242cf68d528c456cd Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 24 Jan 2017 15:17:45 -0800
Subject: memory_hotplug: make zone_can_shift() return a boolean value

online_{kernel|movable} is used to change the memory zone to
ZONE_{NORMAL|MOVABLE} and online the memory.

To check that memory zone can be changed, zone_can_shift() is used.
Currently the function returns minus integer value, plus integer
value and 0. When the function returns minus or plus integer value,
it means that the memory zone can be changed to ZONE_{NORNAL|MOVABLE}.

But when the function returns 0, there are two meanings.

One of the meanings is that the memory zone does not need to be changed.
For example, when memory is in ZONE_NORMAL and onlined by online_kernel
the memory zone does not need to be changed.

Another meaning is that the memory zone cannot be changed. When memory
is in ZONE_NORMAL and onlined by online_movable, the memory zone may
not be changed to ZONE_MOVALBE due to memory online limitation(see
Documentation/memory-hotplug.txt). In this case, memory must not be
onlined.

The patch changes the return type of zone_can_shift() so that memory
online operation fails when memory zone cannot be changed as follows:

Before applying patch:
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320
   # echo online_movable > memory4097/state
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  8388608
           managed  8388608

   online_movable operation succeeded. But memory is onlined as
   ZONE_NORMAL, not ZONE_MOVABLE.

After applying patch:
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320
   # echo online_movable > memory4097/state
   bash: echo: write error: Invalid argument
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320

   online_movable operation failed because of failure of changing
   the memory zone from ZONE_NORMAL to ZONE_MOVABLE

Fixes: df429ac03936 ("memory-hotplug: more general validation of zone during online")
Link: http://lkml.kernel.org/r/2f9c3837-33d7-b6e5-59c0-6ca4372b2d84@gmail.com
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Reviewed-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 01033fadea47..c1784c0b4f35 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 		unsigned long map_offset);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
-extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
-			  enum zone_type target);
+extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
+			  enum zone_type target, int *zone_shift);
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
-- 
cgit v1.2.3


From b94f51183b0617e7b9b4fb4137d4cf1cab7547c2 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 24 Jan 2017 15:17:53 -0800
Subject: kernel/watchdog: prevent false hardlockup on overloaded system

On an overloaded system, it is possible that a change in the watchdog
threshold can be delayed long enough to trigger a false positive.

This can easily be achieved by having a cpu spinning indefinitely on a
task, while another cpu updates watchdog threshold.

What happens is while trying to park the watchdog threads, the hrtimers
on the other cpus trigger and reprogram themselves with the new slower
watchdog threshold.  Meanwhile, the nmi watchdog is still programmed
with the old faster threshold.

Because the one cpu is blocked, it prevents the thread parking on the
other cpus from completing, which is needed to shutdown the nmi watchdog
and reprogram it correctly.  As a result, a false positive from the nmi
watchdog is reported.

Fix this by setting a park_in_progress flag to block all lockups until
the parking is complete.

Fix provided by Ulrich Obergfell.

[akpm@linux-foundation.org: s/park_in_progress/watchdog_park_in_progress/]
Link: http://lkml.kernel.org/r/1481041033-192236-1-git-send-email-dzickus@redhat.com
Signed-off-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aacca824a6ae..0a3fadc32693 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -110,6 +110,7 @@ extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
 extern unsigned long *watchdog_cpumask_bits;
+extern atomic_t watchdog_park_in_progress;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
-- 
cgit v1.2.3


From ea57485af8f4221312a5a95d63c382b45e7840dc Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 24 Jan 2017 15:18:32 -0800
Subject: mm, page_alloc: fix check for NULL preferred_zone

Patch series "fix premature OOM regression in 4.7+ due to cpuset races".

This is v2 of my attempt to fix the recent report based on LTP cpuset
stress test [1].  The intention is to go to stable 4.9 LTSS with this,
as triggering repeated OOMs is not nice.  That's why the patches try to
be not too intrusive.

Unfortunately why investigating I found that modifying the testcase to
use per-VMA policies instead of per-task policies will bring the OOM's
back, but that seems to be much older and harder to fix problem.  I have
posted a RFC [2] but I believe that fixing the recent regressions has a
higher priority.

Longer-term we might try to think how to fix the cpuset mess in a better
and less error prone way.  I was for example very surprised to learn,
that cpuset updates change not only task->mems_allowed, but also
nodemask of mempolicies.  Until now I expected the parameter to
alloc_pages_nodemask() to be stable.  I wonder why do we then treat
cpusets specially in get_page_from_freelist() and distinguish HARDWALL
etc, when there's unconditional intersection between mempolicy and
cpuset.  I would expect the nodemask adjustment for saving overhead in
g_p_f(), but that clearly doesn't happen in the current form.  So we
have both crazy complexity and overhead, AFAICS.

[1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com
[2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz

This patch (of 4):

Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first
zone in a zonelist twice") we have a wrong check for NULL preferred_zone,
which can theoretically happen due to concurrent cpuset modification.  We
check the zoneref pointer which is never NULL and we should check the zone
pointer.  Also document this in first_zones_zonelist() comment per Michal
Hocko.

Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice")
Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 36d9896fbc1e..f4aac87adcc3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
  * @zonelist - The zonelist to search for a suitable zone
  * @highest_zoneidx - The zone index of the highest zone to return
  * @nodes - An optional nodemask to filter the zonelist with
- * @zone - The first suitable zone found is returned via this parameter
+ * @return - Zoneref pointer for the first suitable zone found (see below)
  *
  * This function returns the first zone at or below a given zone index that is
  * within the allowed nodemask. The zoneref returned is a cursor that can be
  * used to iterate the zonelist with next_zones_zonelist by advancing it by
  * one before calling.
+ *
+ * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
+ * never NULL). This may happen either genuinely, or due to concurrent nodemask
+ * update due to cpuset modification.
  */
 static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 					enum zone_type highest_zoneidx,
-- 
cgit v1.2.3


From d6f8cfa3dea294eabf8f302e90176dd6381fb66e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 25 Jan 2017 11:39:49 +0100
Subject: net: phy: leds: Break dependency of phy.h on phy_led_triggers.h

<linux/phy.h> includes <linux/phy_led_triggers.h>, which is not really
needed.  Drop the include from <linux/phy.h>, and add it to all users
that didn't include it explicitly.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index f7d95f644eed..7fc1105605bf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -25,7 +25,6 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mod_devicetable.h>
-#include <linux/phy_led_triggers.h>
 
 #include <linux/atomic.h>
 
-- 
cgit v1.2.3


From 3c880eb0205222bb062970085ebedc73ec8dfd14 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 25 Jan 2017 11:39:50 +0100
Subject: net: phy: leds: Fix truncated LED trigger names

Commit 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and
bus_id") increased the size of MII bus IDs, but forgot to update the
private definition in <linux/phy_led_triggers.h>.
This may cause:
  1. Truncation of LED trigger names,
  2. Duplicate LED trigger names,
  3. Failures registering LED triggers,
  4. Crashes due to bad error handling in the LED trigger failure path.

To fix this, and prevent the definitions going out of sync again in the
future, let the PHY LED trigger code use the existing MII_BUS_ID_SIZE
definition.

Example:
  - Before I had triggers "ee700000.etherne:01:100Mbps" and
    "ee700000.etherne:01:10Mbps",
  - After the increase of MII_BUS_ID_SIZE, both became
    "ee700000.ethernet-ffffffff:01:" => FAIL,
  - Now, the triggers are "ee700000.ethernet-ffffffff:01:100Mbps" and
    "ee700000.ethernet-ffffffff:01:10Mbps", which are unique again.

Fixes: 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id")
Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_led_triggers.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h
index a2daea0a37d2..b37b05bfd1a6 100644
--- a/include/linux/phy_led_triggers.h
+++ b/include/linux/phy_led_triggers.h
@@ -18,11 +18,11 @@ struct phy_device;
 #ifdef CONFIG_LED_TRIGGER_PHY
 
 #include <linux/leds.h>
+#include <linux/phy.h>
 
 #define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE	10
-#define PHY_MII_BUS_ID_SIZE	(20 - 3)
 
-#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \
+#define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \
 				       FIELD_SIZEOF(struct mdio_device, addr)+\
 				       PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE)
 
-- 
cgit v1.2.3


From 54a07c7bb0da0343734c78212bbe9f3735394962 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 26 Jan 2017 06:44:03 +1000
Subject: Revert "drm/probe-helpers: Drop locking from poll_enable"

This reverts commit 3846fd9b86001bea171943cc3bb9222cb6da6b42.

There were some precursor commits missing for this around connector
locking, we should probably merge Lyude's nouveau avoid the problem patch.
---
 include/drm/drm_crtc_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index d026f5017c33..982c299e435a 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -73,5 +73,6 @@ extern void drm_kms_helper_hotplug_event(struct drm_device *dev);
 
 extern void drm_kms_helper_poll_disable(struct drm_device *dev);
 extern void drm_kms_helper_poll_enable(struct drm_device *dev);
+extern void drm_kms_helper_poll_enable_locked(struct drm_device *dev);
 
 #endif
-- 
cgit v1.2.3


From 65e251a4634c5644efca6f7e15803f0962d8943d Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 3 Jan 2017 17:34:56 +0000
Subject: iommu: Drop the of_iommu_{set/get}_ops() interface

With the introduction of the new iommu_{register/get}_instance()
interface in commit e4f10ffe4c9b ("iommu: Make of_iommu_set/get_ops() DT
agnostic") (based on struct fwnode_handle as look-up token, so firmware
agnostic) to register IOMMU instances with the core IOMMU layer there is
no reason to keep the old OF based interface around any longer.

Convert all the IOMMU drivers (and OF IOMMU core code) that rely on the
of_iommu_{set/get}_ops() to the new kernel interface to register/retrieve
IOMMU instances and remove the of_iommu_{set/get}_ops() remaining glue
code in order to complete the interface rework.

Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Sricharan R <sricharan@codeaurora.org>
Tested-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/of_iommu.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 6a7fc5051099..13394ac83c66 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,17 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-static inline void of_iommu_set_ops(struct device_node *np,
-				    const struct iommu_ops *ops)
-{
-	iommu_register_instance(&np->fwnode, ops);
-}
-
-static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
-{
-	return iommu_get_instance(&np->fwnode);
-}
-
 extern struct of_device_id __iommu_of_table;
 
 typedef int (*of_iommu_init_fn)(struct device_node *);
-- 
cgit v1.2.3


From 92e55f412cffd016cc245a74278cb4d7b89bb3bc Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Thu, 26 Jan 2017 22:56:21 +0100
Subject: tcp: don't annotate mark on control socket from
 tcp_v6_send_response()

Unlike ipv4, this control socket is shared by all cpus so we cannot use
it as scratchpad area to annotate the mark that we pass to ip6_xmit().

Add a new parameter to ip6_xmit() to indicate the mark. The SCTP socket
family caches the flowi6 structure in the sctp_transport structure, so
we cannot use to carry the mark unless we later on reset it back, which
I discarded since it looks ugly to me.

Fixes: bf99b4ded5f8 ("tcp: fix mark propagation with fwmark_reflect enabled")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 487e57391664..7afe991e900e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -871,7 +871,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
  *	upper-layer output functions
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-	     struct ipv6_txoptions *opt, int tclass);
+	     __u32 mark, struct ipv6_txoptions *opt, int tclass);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
 
-- 
cgit v1.2.3


From 9d162ed69f51cbd9ee5a0c7e82aba7acc96362ff Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Fri, 27 Jan 2017 08:46:23 +0100
Subject: net: phy: micrel: add support for KSZ8795

This is adds support for the PHYs in the KSZ8795 5port managed switch.

It will allow to detect the link between the switch and the soc
and uses the same read_status functions as the KSZ8873MLL switch.

Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 257173e0095e..f541da68d1e7 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -35,6 +35,8 @@
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
 
+#define PHY_ID_KSZ8795		0x00221550
+
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
 #define MICREL_PHY_FXEN		0x00000002
-- 
cgit v1.2.3


From 966d2b04e070bc040319aaebfec09e0144dc3341 Mon Sep 17 00:00:00 2001
From: Douglas Miller <dougmill@linux.vnet.ibm.com>
Date: Sat, 28 Jan 2017 06:42:20 -0600
Subject: percpu-refcount: fix reference leak during percpu-atomic transition

percpu_ref_tryget() and percpu_ref_tryget_live() should return
"true" IFF they acquire a reference. But the return value from
atomic_long_inc_not_zero() is a long and may have high bits set,
e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines
is bool so the reference may actually be acquired but the routines
return "false" which results in a reference leak since the caller
assumes it does not need to do a corresponding percpu_ref_put().

This was seen when performing CPU hotplug during I/O, as hangs in
blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start)
raced with percpu_ref_tryget (blk_mq_timeout_work).
Sample stack trace:

__switch_to+0x2c0/0x450
__schedule+0x2f8/0x970
schedule+0x48/0xc0
blk_mq_freeze_queue_wait+0x94/0x120
blk_mq_queue_reinit_work+0xb8/0x180
blk_mq_queue_reinit_prepare+0x84/0xa0
cpuhp_invoke_callback+0x17c/0x600
cpuhp_up_callbacks+0x58/0x150
_cpu_up+0xf0/0x1c0
do_cpu_up+0x120/0x150
cpu_subsys_online+0x64/0xe0
device_online+0xb4/0x120
online_store+0xb4/0xc0
dev_attr_store+0x68/0xa0
sysfs_kf_write+0x80/0xb0
kernfs_fop_write+0x17c/0x250
__vfs_write+0x6c/0x1e0
vfs_write+0xd0/0x270
SyS_write+0x6c/0x110
system_call+0x38/0xe0

Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS,
and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests.
However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0
and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set.

The fix is to make the tryget routines use an actual boolean internally instead
of the atomic long result truncated to a int.

Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints
Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751
Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints")
Cc: stable@vger.kernel.org # v3.18+
---
 include/linux/percpu-refcount.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 1c7eec09e5eb..3a481a49546e 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret;
+	bool ret;
 
 	rcu_read_lock_sched();
 
@@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret = false;
+	bool ret = false;
 
 	rcu_read_lock_sched();
 
-- 
cgit v1.2.3


From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Jan 2017 08:11:44 -0800
Subject: can: Fix kernel panic at security_sock_rcv_skb

Zhang Yanmin reported crashes [1] and provided a patch adding a
synchronize_rcu() call in can_rx_unregister()

The main problem seems that the sockets themselves are not RCU
protected.

If CAN uses RCU for delivery, then sockets should be freed only after
one RCU grace period.

Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's
ease stable backports with the following fix instead.

[1]
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81495e25>] selinux_socket_sock_rcv_skb+0x65/0x2a0

Call Trace:
 <IRQ>
 [<ffffffff81485d8c>] security_sock_rcv_skb+0x4c/0x60
 [<ffffffff81d55771>] sk_filter+0x41/0x210
 [<ffffffff81d12913>] sock_queue_rcv_skb+0x53/0x3a0
 [<ffffffff81f0a2b3>] raw_rcv+0x2a3/0x3c0
 [<ffffffff81f06eab>] can_rcv_filter+0x12b/0x370
 [<ffffffff81f07af9>] can_receive+0xd9/0x120
 [<ffffffff81f07beb>] can_rcv+0xab/0x100
 [<ffffffff81d362ac>] __netif_receive_skb_core+0xd8c/0x11f0
 [<ffffffff81d36734>] __netif_receive_skb+0x24/0xb0
 [<ffffffff81d37f67>] process_backlog+0x127/0x280
 [<ffffffff81d36f7b>] net_rx_action+0x33b/0x4f0
 [<ffffffff810c88d4>] __do_softirq+0x184/0x440
 [<ffffffff81f9e86c>] do_softirq_own_stack+0x1c/0x30
 <EOI>
 [<ffffffff810c76fb>] do_softirq.part.18+0x3b/0x40
 [<ffffffff810c8bed>] do_softirq+0x1d/0x20
 [<ffffffff81d30085>] netif_rx_ni+0xe5/0x110
 [<ffffffff8199cc87>] slcan_receive_buf+0x507/0x520
 [<ffffffff8167ef7c>] flush_to_ldisc+0x21c/0x230
 [<ffffffff810e3baf>] process_one_work+0x24f/0x670
 [<ffffffff810e44ed>] worker_thread+0x9d/0x6f0
 [<ffffffff810e4450>] ? rescuer_thread+0x480/0x480
 [<ffffffff810ebafc>] kthread+0x12c/0x150
 [<ffffffff81f9ccef>] ret_from_fork+0x3f/0x70

Reported-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index a0875001b13c..df08a41d5be5 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -45,10 +45,9 @@ struct can_proto {
 extern int  can_proto_register(const struct can_proto *cp);
 extern void can_proto_unregister(const struct can_proto *cp);
 
-extern int  can_rx_register(struct net_device *dev, canid_t can_id,
-			    canid_t mask,
-			    void (*func)(struct sk_buff *, void *),
-			    void *data, char *ident);
+int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
+		    void (*func)(struct sk_buff *, void *),
+		    void *data, char *ident, struct sock *sk);
 
 extern void can_rx_unregister(struct net_device *dev, canid_t can_id,
 			      canid_t mask,
-- 
cgit v1.2.3


From 4e5b54f127426c82dc2816340c26d951a5bb3429 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 18 Dec 2016 14:35:45 +0100
Subject: drm: prevent double-(un)registration for connectors

If we're unlucky then the registration from a hotplugged connector
might race with the final registration step on driver load. And since
MST topology discover is asynchronous that's even somewhat likely.

v2: Also update the kerneldoc for @registered!

v3: Review from Chris:
- Improve kerneldoc for late_register/early_unregister callbacks.
- Use mutex_destroy.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161218133545.2106-1-daniel.vetter@ffwll.ch
(cherry picked from commit e73ab00e9a0f1731f34d0620a9c55f5c30c4ad4e)
---
 include/drm/drm_connector.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index a9b95246e26e..045a97cbeba2 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -381,6 +381,8 @@ struct drm_connector_funcs {
 	 * core drm connector interfaces. Everything added from this callback
 	 * should be unregistered in the early_unregister callback.
 	 *
+	 * This is called while holding drm_connector->mutex.
+	 *
 	 * Returns:
 	 *
 	 * 0 on success, or a negative error code on failure.
@@ -395,6 +397,8 @@ struct drm_connector_funcs {
 	 * late_register(). It is called from drm_connector_unregister(),
 	 * early in the driver unload sequence to disable userspace access
 	 * before data structures are torndown.
+	 *
+	 * This is called while holding drm_connector->mutex.
 	 */
 	void (*early_unregister)(struct drm_connector *connector);
 
@@ -559,7 +563,6 @@ struct drm_cmdline_mode {
  * @interlace_allowed: can this connector handle interlaced modes?
  * @doublescan_allowed: can this connector handle doublescan?
  * @stereo_allowed: can this connector handle stereo modes?
- * @registered: is this connector exposed (registered) with userspace?
  * @modes: modes available on this connector (from fill_modes() + user)
  * @status: one of the drm_connector_status enums (connected, not, or unknown)
  * @probed_modes: list of modes derived directly from the display
@@ -607,6 +610,13 @@ struct drm_connector {
 
 	char *name;
 
+	/**
+	 * @mutex: Lock for general connector state, but currently only protects
+	 * @registered. Most of the connector state is still protected by the
+	 * mutex in &drm_mode_config.
+	 */
+	struct mutex mutex;
+
 	/**
 	 * @index: Compacted connector index, which matches the position inside
 	 * the mode_config.list for drivers not supporting hot-add/removing. Can
@@ -620,6 +630,10 @@ struct drm_connector {
 	bool interlace_allowed;
 	bool doublescan_allowed;
 	bool stereo_allowed;
+	/**
+	 * @registered: Is this connector exposed (registered) with userspace?
+	 * Protected by @mutex.
+	 */
 	bool registered;
 	struct list_head modes; /* list of modes on this connector */
 
-- 
cgit v1.2.3


From e6e7b48b295afa5a5ab440de0a94d9ad8b3ce2d0 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 12 Jan 2017 17:15:56 +0100
Subject: drm: Don't race connector registration

I was under the misconception that the sysfs dev stuff can be fully
set up, and then registered all in one step with device_add. That's
true for properties and property groups, but not for parents and child
devices. Those must be fully registered before you can register a
child.

Add a bit of tracking to make sure that asynchronous mst connector
hotplugging gets this right. For consistency we rely upon the implicit
barriers of the connector->mutex, which is taken anyway, to ensure
that at least either the connector or device registration call will
work out.

Mildly tested since I can't reliably reproduce this on my mst box
here.

Reported-by: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1484237756-2720-1-git-send-email-daniel.vetter@ffwll.ch
---
 include/drm/drmP.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 192016e2b518..9c4ee144b5f6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -517,6 +517,7 @@ struct drm_device {
 	struct drm_minor *control;		/**< Control node */
 	struct drm_minor *primary;		/**< Primary node */
 	struct drm_minor *render;		/**< Render node */
+	bool registered;
 
 	/* currently active master for this device. Protected by master_mutex */
 	struct drm_master *master;
-- 
cgit v1.2.3


From 08d85f3ea99f1eeafc4e8507936190e86a16ee8c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 17 Jan 2017 16:00:48 +0000
Subject: irqdomain: Avoid activating interrupts more than once

Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are
activated early"), we can end-up activating a PCI/MSI twice (once
at allocation time, and once at startup time).

This is normally of no consequences, except that there is some
HW out there that may misbehave if activate is used more than once
(the GICv3 ITS, for example, uses the activate callback
to issue the MAPVI command, and the architecture spec says that
"If there is an existing mapping for the EventID-DeviceID
combination, behavior is UNPREDICTABLE").

While this could be worked around in each individual driver, it may
make more sense to tackle the issue at the core level. In order to
avoid getting in that situation, let's have a per-interrupt flag
to remember if we have already activated that interrupt or not.

Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-and-tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e79875574b39..39e3254e5769 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -184,6 +184,7 @@ struct irq_data {
  *
  * IRQD_TRIGGER_MASK		- Mask for the trigger type bits
  * IRQD_SETAFFINITY_PENDING	- Affinity setting is pending
+ * IRQD_ACTIVATED		- Interrupt has already been activated
  * IRQD_NO_BALANCING		- Balancing disabled for this IRQ
  * IRQD_PER_CPU			- Interrupt is per cpu
  * IRQD_AFFINITY_SET		- Interrupt affinity was set
@@ -202,6 +203,7 @@ struct irq_data {
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
 	IRQD_SETAFFINITY_PENDING	= (1 <<  8),
+	IRQD_ACTIVATED			= (1 <<  9),
 	IRQD_NO_BALANCING		= (1 << 10),
 	IRQD_PER_CPU			= (1 << 11),
 	IRQD_AFFINITY_SET		= (1 << 12),
@@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
 }
 
+static inline bool irqd_is_activated(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_ACTIVATED;
+}
+
+static inline void irqd_set_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_ACTIVATED;
+}
+
+static inline void irqd_clr_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
-- 
cgit v1.2.3


From 94842b4fc4d6b1691cfc86c6f5251f299d27f4ba Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.s.belous@gmail.com>
Date: Sat, 28 Jan 2017 22:53:28 +0300
Subject: net: ethtool: add support for 2500BaseT and 5000BaseT link modes

This patch introduce support for 2500BaseT and 5000BaseT link modes.
These modes are included in the new IEEE 802.3bz standard.

Signed-off-by: Pavel Belous <pavel.s.belous@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f0db7788f887..3dc91a46e8b8 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1384,6 +1384,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT	= 44,
 	ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT	= 45,
 	ETHTOOL_LINK_MODE_10000baseER_Full_BIT	= 46,
+	ETHTOOL_LINK_MODE_2500baseT_Full_BIT	= 47,
+	ETHTOOL_LINK_MODE_5000baseT_Full_BIT	= 48,
 
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
@@ -1393,7 +1395,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+	  = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
-- 
cgit v1.2.3


From 433e19cf33d34bb6751c874a9c00980552fe508c Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 28 Jan 2017 11:46:02 -0700
Subject: Drivers: hv: vmbus: finally fix hv_need_to_signal_on_read()

Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")
added the proper mb(), but removed the test "prev_write_sz < pending_sz"
when making the signal decision.

As a result, the guest can signal the host unnecessarily,
and then the host can throttle the guest because the host
thinks the guest is buggy or malicious; finally the user
running stress test can perceive intermittent freeze of
the guest.

This patch brings back the test, and properly handles the
in-place consumption APIs used by NetVSC (see get_next_pkt_raw(),
put_pkt_raw() and commit_rd_index()).

Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42fe43fb0c80..183efde54269 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -128,6 +128,7 @@ struct hv_ring_buffer_info {
 	u32 ring_data_startoffset;
 	u32 priv_write_index;
 	u32 priv_read_index;
+	u32 cached_read_index;
 };
 
 /*
@@ -180,6 +181,19 @@ static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi)
 	return write;
 }
 
+static inline u32 hv_get_cached_bytes_to_write(
+	const struct hv_ring_buffer_info *rbi)
+{
+	u32 read_loc, write_loc, dsize, write;
+
+	dsize = rbi->ring_datasize;
+	read_loc = rbi->cached_read_index;
+	write_loc = rbi->ring_buffer->write_index;
+
+	write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+		read_loc - write_loc;
+	return write;
+}
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
@@ -1488,7 +1502,7 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
 
 static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 {
-	u32 cur_write_sz;
+	u32 cur_write_sz, cached_write_sz;
 	u32 pending_sz;
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
@@ -1512,12 +1526,24 @@ static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 
 	cur_write_sz = hv_get_bytes_to_write(rbi);
 
-	if (cur_write_sz >= pending_sz)
+	if (cur_write_sz < pending_sz)
+		return;
+
+	cached_write_sz = hv_get_cached_bytes_to_write(rbi);
+	if (cached_write_sz < pending_sz)
 		vmbus_setevent(channel);
 
 	return;
 }
 
+static inline void
+init_cached_read_index(struct vmbus_channel *channel)
+{
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	rbi->cached_read_index = rbi->ring_buffer->read_index;
+}
+
 /*
  * An API to support in-place processing of incoming VMBUS packets.
  */
@@ -1569,6 +1595,8 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
  * This call commits the read index and potentially signals the host.
  * Here is the pattern for using the "in-place" consumption APIs:
  *
+ * init_cached_read_index();
+ *
  * while (get_next_pkt_raw() {
  *	process the packet "in-place";
  *	put_pkt_raw();
-- 
cgit v1.2.3


From aaa59306b0b7e0ca4ba92cc04c5db101cbb1c096 Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Mon, 30 Jan 2017 09:39:52 -0800
Subject: iommu/vt-d: Fix some macros that are incorrectly specified in
 intel-iommu

Some of the macros are incorrect with wrong bit-shifts resulting in picking
the incorrect invalidation granularity. Incorrect Source-ID in extended
devtlb invalidation caused device side errors.

To: Joerg Roedel <joro@8bytes.org>
To: David Woodhouse <dwmw2@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Cc: CQ Tang <cq.tang@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>

Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support")
Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Tested-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c6cdc7..23e129ef6726 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
 #define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
 #define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
 #define DMA_TLB_READ_DRAIN (((u64)1) << 49)
 #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
 #define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
@@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 
 /* INVALID_DESC */
 #define DMA_CCMD_INVL_GRANU_OFFSET  61
-#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
+#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 4)
 #define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
 #define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
 #define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
@@ -316,8 +316,8 @@ enum {
 #define QI_DEV_EIOTLB_SIZE	(((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)	((u64)g)
 #define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
-#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
-#define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd)	((u64)((qd) & 0x1f) << 4)
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
 #define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-- 
cgit v1.2.3


From 90427ef5d2a4b9a24079889bf16afdcdaebc4240 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dmichail@google.com>
Date: Mon, 30 Jan 2017 14:09:42 -0800
Subject: ipv6: fix flow labels when the traffic class is non-0

ip6_make_flowlabel() determines the flow label for IPv6 packets. It's
supposed to be passed a flow label, which it returns as is if non-0 and
in some other cases, otherwise it calculates a new value.

The problem is callers often pass a flowi6.flowlabel, which may also
contain traffic class bits. If the traffic class is non-0
ip6_make_flowlabel() mistakes the non-0 it gets as a flow label and
returns the whole thing. Thus it can return a 'flow label' longer than
20b and the low 20b of that is typically 0 resulting in packets with 0
label. Moreover, different packets of a flow may be labeled differently.
For a TCP flow with ECN non-payload and payload packets get different
labels as exemplified by this pair of consecutive packets:

(pure ACK)
Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... .... .... 0001 1100 1110 0100 1001 = Flow Label: 0x1ce49
    Payload Length: 32
    Next Header: TCP (6)

(payload)
Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0))
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2)
    .... .... .... 0000 0000 0000 0000 0000 = Flow Label: 0x00000
    Payload Length: 688
    Next Header: TCP (6)

This patch allows ip6_make_flowlabel() to be passed more than just a
flow label and has it extract the part it really wants. This was simpler
than modifying the callers. With this patch packets like the above become

Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e
    Payload Length: 32
    Next Header: TCP (6)

Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0))
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2)
    .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e
    Payload Length: 688
    Next Header: TCP (6)

Signed-off-by: Dimitris Michailidis <dmichail@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7afe991e900e..dbf0abba33b8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -776,6 +776,11 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 {
 	u32 hash;
 
+	/* @flowlabel may include more than a flow label, eg, the traffic class.
+	 * Here we want only the flow label value.
+	 */
+	flowlabel &= IPV6_FLOWLABEL_MASK;
+
 	if (flowlabel ||
 	    net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
 	    (!autolabel &&
-- 
cgit v1.2.3


From e26bfebdfc0d212d366de9990a096665d5c0209a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 09:45:28 +0000
Subject: fscache: Fix dead object requeue

Under some circumstances, an fscache object can become queued such that it
fscache_object_work_func() can be called once the object is in the
OBJECT_DEAD state.  This results in the kernel oopsing when it tries to
invoke the handler for the state (which is hard coded to 0x2).

The way this comes about is something like the following:

 (1) The object dispatcher is processing a work state for an object.  This
     is done in workqueue context.

 (2) An out-of-band event comes in that isn't masked, causing the object to
     be queued, say EV_KILL.

 (3) The object dispatcher finishes processing the current work state on
     that object and then sees there's another event to process, so,
     without returning to the workqueue core, it processes that event too.
     It then follows the chain of events that initiates until we reach
     OBJECT_DEAD without going through a wait state (such as
     WAIT_FOR_CLEARANCE).

     At this point, object->events may be 0, object->event_mask will be 0
     and oob_event_mask will be 0.

 (4) The object dispatcher returns to the workqueue processor, and in due
     course, this sees that the object's work item is still queued and
     invokes it again.

 (5) The current state is a work state (OBJECT_DEAD), so the dispatcher
     jumps to it - resulting in an OOPS.

When I'm seeing this, the work state in (1) appears to have been either
LOOK_UP_OBJECT or CREATE_OBJECT (object->oob_table is
fscache_osm_lookup_oob).

The window for (2) is very small:

 (A) object->event_mask is cleared whilst the event dispatch process is
     underway - though there's no memory barrier to force this to the top
     of the function.

     The window, therefore is from the time the object was selected by the
     workqueue processor and made requeueable to the time the mask was
     cleared.

 (B) fscache_raise_event() will only queue the object if it manages to set
     the event bit and the corresponding event_mask bit was set.

     The enqueuement is then deferred slightly whilst we get a ref on the
     object and get the per-CPU variable for workqueue congestion.  This
     slight deferral slightly increases the probability by allowing extra
     time for the workqueue to make the item requeueable.

Handle this by giving the dead state a processor function and checking the
for the dead state address rather than seeing if the processor function is
address 0x2.  The dead state processor function can then set a flag to
indicate that it's occurred and give a warning if it occurs more than once
per object.

If this race occurs, an oops similar to the following is seen (note the RIP
value):

BUG: unable to handle kernel NULL pointer dereference at 0000000000000002
IP: [<0000000000000002>] 0x1
PGD 0
Oops: 0010 [#1] SMP
Modules linked in: ...
CPU: 17 PID: 16077 Comm: kworker/u48:9 Not tainted 3.10.0-327.18.2.el7.x86_64 #1
Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015
Workqueue: fscache_object fscache_object_work_func [fscache]
task: ffff880302b63980 ti: ffff880717544000 task.ti: ffff880717544000
RIP: 0010:[<0000000000000002>]  [<0000000000000002>] 0x1
RSP: 0018:ffff880717547df8  EFLAGS: 00010202
RAX: ffffffffa0368640 RBX: ffff880edf7a4480 RCX: dead000000200200
RDX: 0000000000000002 RSI: 00000000ffffffff RDI: ffff880edf7a4480
RBP: ffff880717547e18 R08: 0000000000000000 R09: dfc40a25cb3a4510
R10: dfc40a25cb3a4510 R11: 0000000000000400 R12: 0000000000000000
R13: ffff880edf7a4510 R14: ffff8817f6153400 R15: 0000000000000600
FS:  0000000000000000(0000) GS:ffff88181f420000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000002 CR3: 000000000194a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffffffffa0363695 ffff880edf7a4510 ffff88093f16f900 ffff8817faa4ec00
 ffff880717547e60 ffffffff8109d5db 00000000faa4ec18 0000000000000000
 ffff8817faa4ec18 ffff88093f16f930 ffff880302b63980 ffff88093f16f900
Call Trace:
 [<ffffffffa0363695>] ? fscache_object_work_func+0xa5/0x200 [fscache]
 [<ffffffff8109d5db>] process_one_work+0x17b/0x470
 [<ffffffff8109e4ac>] worker_thread+0x21c/0x400
 [<ffffffff8109e290>] ? rescuer_thread+0x400/0x400
 [<ffffffff810a5acf>] kthread+0xcf/0xe0
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff816460d8>] ret_from_fork+0x58/0x90
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeremy McNicoll <jeremymc@redhat.com>
Tested-by: Frank Sorenson <sorenson@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fscache-cache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 13ba552e6c09..4c467ef50159 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -360,6 +360,7 @@ struct fscache_object {
 #define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */
 #define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */
 #define FSCACHE_OBJECT_KILLED_BY_CACHE	7	/* T if object was killed by the cache */
+#define FSCACHE_OBJECT_RUN_AFTER_DEAD	8	/* T if object has been dispatched after death */
 
 	struct list_head	cache_link;	/* link in cache->object_list */
 	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */
-- 
cgit v1.2.3


From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:38 +0100
Subject: perf/x86/intel/rapl: Make package handling more robust

The package management code in RAPL relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left RAPL in broken state. This was not noticed because on a regular boot
all CPUs are online before RAPL is initialized.

A possible fix would be to reintroduce the mess which allocates a package
data structure in CPU prepare and when it turns out to already exist in
starting throw it away later in the CPU online callback. But that's a
horrible hack and not required at all because RAPL becomes functional for
perf only in the CPU online callback. That's correct because user space is
not yet informed about the CPU being onlined, so nothing caan rely on RAPL
being available on that particular CPU.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

This also adds a missing check for available package data in the
event_init() function.

Reported-by: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d936a0021839..8329f3dc592c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -10,7 +10,6 @@ enum cpuhp_state {
 	CPUHP_PERF_X86_PREPARE,
 	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
-	CPUHP_PERF_X86_RAPL_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,
-- 
cgit v1.2.3


From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:40 +0100
Subject: perf/x86/intel/uncore: Make package handling more robust

The package management code in uncore relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left uncore in broken state. This was not noticed because on a regular boot
all CPUs are online before uncore is initialized.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8329f3dc592c..921acaaa1601 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,7 +8,6 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
-	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
@@ -85,7 +84,6 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-	CPUHP_AP_PERF_X86_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
-- 
cgit v1.2.3


From 1a2a14444d32b89b28116daea86f63ced1716668 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dmichail@google.com>
Date: Tue, 31 Jan 2017 16:03:13 -0800
Subject: net: fix ndo_features_check/ndo_fix_features comment ordering

Commit cdba756f5803a2 ("net: move ndo_features_check() close to
ndo_start_xmit()") inadvertently moved the doc comment for
.ndo_fix_features instead of .ndo_features_check. Fix the comment
ordering.

Fixes: cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()")
Signed-off-by: Dimitris Michailidis <dmichail@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9bde9558b596..70ad0291d517 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -866,11 +866,15 @@ struct netdev_xdp {
  *	of useless work if you return NETDEV_TX_BUSY.
  *	Required; cannot be NULL.
  *
- * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
- *		netdev_features_t features);
- *	Adjusts the requested feature flags according to device-specific
- *	constraints, and returns the resulting flags. Must not modify
- *	the device state.
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ *					   struct net_device *dev
+ *					   netdev_features_t features);
+ *	Called by core transmit path to determine if device is capable of
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
@@ -1028,6 +1032,12 @@ struct netdev_xdp {
  *	Called to release previously enslaved netdev.
  *
  *      Feature/offload setting functions.
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ *		netdev_features_t features);
+ *	Adjusts the requested feature flags according to device-specific
+ *	constraints, and returns the resulting flags. Must not modify
+ *	the device state.
+ *
  * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
  *	Called to update device configuration to new features. Passed
  *	feature set might be less than what was returned by ndo_fix_features()).
@@ -1100,15 +1110,6 @@ struct netdev_xdp {
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
- *					   struct net_device *dev
- *					   netdev_features_t features);
- *	Called by core transmit path to determine if device is capable of
- *	performing offload operations on a given packet. This is to give
- *	the device an opportunity to implement any restrictions that cannot
- *	be otherwise expressed by feature flags. The check is called with
- *	the set of features that the stack has calculated and it returns
- *	those the driver believes to be appropriate.
  * int (*ndo_set_tx_maxrate)(struct net_device *dev,
  *			     int queue_index, u32 maxrate);
  *	Called when a user wants to set a max-rate limitation of specific
-- 
cgit v1.2.3


From 71810db27c1c853b335675bee335d893bc3d324b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:06 +0000
Subject: modversions: treat symbol CRCs as 32 bit quantities

The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.

This has a couple of downsides:

 - Given that the CRCs are treated as memory addresses, we waste 4 bytes
   for each CRC on 64 bit architectures,

 - On architectures that support runtime relocation, a R_<arch>_RELATIVE
   relocation entry is emitted for each CRC value, which identifies it
   as a quantity that requires fixing up based on the actual runtime
   load offset of the kernel. This results in corrupted CRCs unless we
   explicitly undo the fixup (and this is currently being handled in the
   core module code)

 - Such runtime relocation entries take up 24 bytes of __init space
   each, resulting in a x8 overhead in [uncompressed] kernel size for
   CRCs.

Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset.  Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant.  Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.

So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff).  To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.

Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/export.h | 11 ++++++-----
 include/linux/export.h       | 14 ++++++++++++++
 include/linux/module.h       | 14 +++++++-------
 3 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 63554e9f6e0c..719db1968d81 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -9,18 +9,15 @@
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
-#ifndef KCRC_ALIGN
-#define KCRC_ALIGN 8
-#endif
 #else
 #define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
+#endif
 #ifndef KCRC_ALIGN
 #define KCRC_ALIGN 4
 #endif
-#endif
 
 #ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
 #define KSYM(name) _##name
@@ -52,7 +49,11 @@ KSYM(__kstrtab_\name):
 	.section ___kcrctab\sec+\name,"a"
 	.balign KCRC_ALIGN
 KSYM(__kcrctab_\name):
-	__put KSYM(__crc_\name)
+#if defined(CONFIG_MODULE_REL_CRCS)
+	.long KSYM(__crc_\name) - .
+#else
+	.long KSYM(__crc_\name)
+#endif
 	.weak KSYM(__crc_\name)
 	.previous
 #endif
diff --git a/include/linux/export.h b/include/linux/export.h
index 2a0f61fbc731..7473fba6a60c 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -43,6 +43,13 @@ extern struct module __this_module;
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
+#if defined(CONFIG_MODULE_REL_CRCS)
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
+	    "	.previous					\n");
+#elif !defined(CONFIG_64BIT)
 #define __CRC_SYMBOL(sym, sec)						\
 	extern __visible void *__crc_##sym __attribute__((weak));	\
 	static const unsigned long __kcrctab_##sym			\
@@ -50,6 +57,13 @@ extern struct module __this_module;
 	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
 	= (unsigned long) &__crc_##sym;
 #else
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.previous					\n");
+#endif
+#else
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..cc7cba219b20 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -346,7 +346,7 @@ struct module {
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	unsigned int num_syms;
 
 	/* Kernel parameters. */
@@ -359,18 +359,18 @@ struct module {
 	/* GPL-only exported symbols. */
 	unsigned int num_gpl_syms;
 	const struct kernel_symbol *gpl_syms;
-	const unsigned long *gpl_crcs;
+	const s32 *gpl_crcs;
 
 #ifdef CONFIG_UNUSED_SYMBOLS
 	/* unused exported symbols. */
 	const struct kernel_symbol *unused_syms;
-	const unsigned long *unused_crcs;
+	const s32 *unused_crcs;
 	unsigned int num_unused_syms;
 
 	/* GPL-only, unused exported symbols. */
 	unsigned int num_unused_gpl_syms;
 	const struct kernel_symbol *unused_gpl_syms;
-	const unsigned long *unused_gpl_crcs;
+	const s32 *unused_gpl_crcs;
 #endif
 
 #ifdef CONFIG_MODULE_SIG
@@ -382,7 +382,7 @@ struct module {
 
 	/* symbols that will be GPL-only in the near future. */
 	const struct kernel_symbol *gpl_future_syms;
-	const unsigned long *gpl_future_crcs;
+	const s32 *gpl_future_crcs;
 	unsigned int num_gpl_future_syms;
 
 	/* Exception table */
@@ -523,7 +523,7 @@ struct module *find_module(const char *name);
 
 struct symsearch {
 	const struct kernel_symbol *start, *stop;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	enum {
 		NOT_GPL_ONLY,
 		GPL_ONLY,
@@ -539,7 +539,7 @@ struct symsearch {
  */
 const struct kernel_symbol *find_symbol(const char *name,
 					struct module **owner,
-					const unsigned long **crc,
+					const s32 **crc,
 					bool gplok,
 					bool warn);
 
-- 
cgit v1.2.3


From 4b9eee96fcb361a5e16a8d2619825e8a048f81f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:07 +0000
Subject: module: unify absolute krctab definitions for 32-bit and 64-bit

The previous patch introduced a separate inline asm version of the
krcrctab declaration template for use with 64-bit architectures, which
cannot refer to ELF symbols using 32-bit quantities.

This declaration should be equivalent to the C one for 32-bit
architectures, but just in case - unify them in a separate patch, which
can simply be dropped if it turns out to break anything.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/export.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/export.h b/include/linux/export.h
index 7473fba6a60c..1a1dfdb2a5c6 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -49,13 +49,6 @@ extern struct module __this_module;
 	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
 	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
 	    "	.previous					\n");
-#elif !defined(CONFIG_64BIT)
-#define __CRC_SYMBOL(sym, sec)						\
-	extern __visible void *__crc_##sym __attribute__((weak));	\
-	static const unsigned long __kcrctab_##sym			\
-	__used								\
-	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
-	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
-- 
cgit v1.2.3


From 29905b52fad0854351f57bab867647e4982285bf Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 2 Feb 2017 18:05:26 +0000
Subject: log2: make order_base_2() behave correctly on const input value zero

The function order_base_2() is defined (according to the comment block)
as returning zero on input zero, but subsequently passes the input into
roundup_pow_of_two(), which is explicitly undefined for input zero.

This has gone unnoticed until now, but optimization passes in GCC 7 may
produce constant folded function instances where a constant value of
zero is passed into order_base_2(), resulting in link errors against the
deliberately undefined '____ilog2_NaN'.

So update order_base_2() to adhere to its own documented interface.

[ See

     http://marc.info/?l=linux-kernel&m=147672952517795&w=2

  and follow-up discussion for more background. The gcc "optimization
  pass" is really just broken, but now the GCC trunk problem seems to
  have escaped out of just specially built daily images, so we need to
  work around it in mainline.    - Linus ]

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/log2.h b/include/linux/log2.h
index fd7ff3d91e6a..ef3d4f67118c 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -203,6 +203,17 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
  *  ... and so on.
  */
 
-#define order_base_2(n) ilog2(roundup_pow_of_two(n))
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+	return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
 
+#define order_base_2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 0 || (n) == 1) ? 0 :	\
+		ilog2((n) - 1) + 1) :		\
+	__order_base_2(n)			\
+)
 #endif /* _LINUX_LOG2_H */
-- 
cgit v1.2.3


From a96dfddbcc04336bbed50dc2b24823e45e09e80c Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 3 Feb 2017 13:13:23 -0800
Subject: base/memory, hotplug: fix a kernel oops in show_valid_zones()

Reading a sysfs "memoryN/valid_zones" file leads to the following oops
when the first page of a range is not backed by struct page.
show_valid_zones() assumes that 'start_pfn' is always valid for
page_zone().

 BUG: unable to handle kernel paging request at ffffea017a000000
 IP: show_valid_zones+0x6f/0x160

This issue may happen on x86-64 systems with 64GiB or more memory since
their memory block size is bumped up to 2GiB.  [1] An example of such
systems is desribed below.  0x3240000000 is only aligned by 1GiB and
this memory block starts from 0x3200000000, which is not backed by
struct page.

 BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable

Since test_pages_in_a_zone() already checks holes, fix this issue by
extending this function to return 'valid_start' and 'valid_end' for a
given range.  show_valid_zones() then proceeds with the valid range.

[1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on
    large-memory x86-64 systems")'

Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>	[4.4+]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index c1784c0b4f35..134a2f69c21a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
-extern int test_pages_in_a_zone(unsigned long, unsigned long);
+extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
+	unsigned long *valid_start, unsigned long *valid_end);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
-- 
cgit v1.2.3


From a1831bb9403720db6d4c033fe2d6bd0116dd28fe Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 1 Feb 2017 17:53:04 +0000
Subject: iommu/dma: Remove bogus dma_supported() implementation

Back when this was first written, dma_supported() was somewhat of a
murky mess, with subtly different interpretations being relied upon in
various places. The "does device X support DMA to address range Y?"
uses assuming Y to be physical addresses, which motivated the current
iommu_dma_supported() implementation and are alluded to in the comment
therein, have since been cleaned up, leaving only the far less ambiguous
"can device X drive address bits Y" usage internal to DMA API mask
setting. As such, there is no reason to keep a slightly misleading
callback which does nothing but duplicate the current default behaviour;
we already constrain IOVA allocations to the iommu_domain aperture where
necessary, so let's leave DMA mask business to architecture-specific
code where it belongs.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dma-iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 3a846f9ec0fd..5725c94b1f12 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -67,7 +67,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
-int iommu_dma_supported(struct device *dev, u64 mask);
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 /* The DMA API isn't _quite_ the whole story, though... */
-- 
cgit v1.2.3


From 534766dfef999f7e7349bbd91cd19c1673792af3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 31 Jan 2017 16:58:42 +0100
Subject: iommu: Rename iommu_get_instance()

Rename the function to iommu_ops_from_fwnode(), because that
is what the function actually does. The new name is much
more descriptive about what the function does.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h    | 4 ++--
 include/linux/of_iommu.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..085e1f0e6c07 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -354,7 +354,7 @@ void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
 void iommu_register_instance(struct fwnode_handle *fwnode,
 			     const struct iommu_ops *ops);
-const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode);
+const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 
 #else /* CONFIG_IOMMU_API */
 
@@ -590,7 +590,7 @@ static inline void iommu_register_instance(struct fwnode_handle *fwnode,
 }
 
 static inline
-const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode)
+const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 6a7fc5051099..66fcbc949899 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -39,7 +39,7 @@ static inline void of_iommu_set_ops(struct device_node *np,
 
 static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
-	return iommu_get_instance(&np->fwnode);
+	return iommu_ops_from_fwnode(&np->fwnode);
 }
 
 extern struct of_device_id __iommu_of_table;
-- 
cgit v1.2.3


From b0119e870837dcd15a207b4701542ebac5d19b45 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 13:23:08 +0100
Subject: iommu: Introduce new 'struct iommu_device'

This struct represents one hardware iommu in the iommu core
code. For now it only has the iommu-ops associated with it,
but that will be extended soon.

The register/unregister interface is also added, as well as
making use of it in the Intel and AMD IOMMU drivers.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h |  2 ++
 include/linux/iommu.h       | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c6cdc7..99a65a397861 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -29,6 +29,7 @@
 #include <linux/dma_remapping.h>
 #include <linux/mmu_notifier.h>
 #include <linux/list.h>
+#include <linux/iommu.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
@@ -440,6 +441,7 @@ struct intel_iommu {
 	struct irq_domain *ir_msi_domain;
 #endif
 	struct device	*iommu_dev; /* IOMMU-sysfs device */
+	struct iommu_device iommu;  /* IOMMU core code handle */
 	int		node;
 	u32		flags;      /* Software defined flags */
 };
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 085e1f0e6c07..900ddd212364 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -204,6 +204,26 @@ struct iommu_ops {
 	unsigned long pgsize_bitmap;
 };
 
+/**
+ * struct iommu_device - IOMMU core representation of one IOMMU hardware
+ *			 instance
+ * @list: Used by the iommu-core to keep a list of registered iommus
+ * @ops: iommu-ops for talking to this iommu
+ */
+struct iommu_device {
+	struct list_head list;
+	const struct iommu_ops *ops;
+};
+
+int  iommu_device_register(struct iommu_device *iommu);
+void iommu_device_unregister(struct iommu_device *iommu);
+
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
+{
+	iommu->ops = ops;
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -361,6 +381,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 struct iommu_ops {};
 struct iommu_group {};
 struct iommu_fwspec {};
+struct iommu_device {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
@@ -558,6 +579,20 @@ static inline void iommu_device_destroy(struct device *dev)
 {
 }
 
+static inline int  iommu_device_register(struct iommu_device *iommu)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
+{
+}
+
+static inline void iommu_device_unregister(struct iommu_device *iommu)
+{
+}
+
 static inline int iommu_device_link(struct device *dev, struct device *link)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 39ab9555c24110671f8dc671311a26e5c985b592 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 16:56:46 +0100
Subject: iommu: Add sysfs bindings for struct iommu_device

There is currently support for iommu sysfs bindings, but
those need to be implemented in the IOMMU drivers. Add a
more generic version of this by adding a struct device to
struct iommu_device and use that for the sysfs bindings.

Also convert the AMD and Intel IOMMU driver to make use of
it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h |  1 -
 include/linux/iommu.h       | 33 ++++++++++++++++++---------------
 2 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 99a65a397861..3ba9b536387b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -440,7 +440,6 @@ struct intel_iommu {
 	struct irq_domain *ir_domain;
 	struct irq_domain *ir_msi_domain;
 #endif
-	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	struct iommu_device iommu;  /* IOMMU core code handle */
 	int		node;
 	u32		flags;      /* Software defined flags */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 900ddd212364..c578ca135bed 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -209,14 +209,21 @@ struct iommu_ops {
  *			 instance
  * @list: Used by the iommu-core to keep a list of registered iommus
  * @ops: iommu-ops for talking to this iommu
+ * @dev: struct device for sysfs handling
  */
 struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
+	struct device dev;
 };
 
 int  iommu_device_register(struct iommu_device *iommu);
 void iommu_device_unregister(struct iommu_device *iommu);
+int  iommu_device_sysfs_add(struct iommu_device *iommu,
+			    struct device *parent,
+			    const struct attribute_group **groups,
+			    const char *fmt, ...) __printf(4, 5);
+void iommu_device_sysfs_remove(struct iommu_device *iommu);
 
 static inline void iommu_device_set_ops(struct iommu_device *iommu,
 					const struct iommu_ops *ops)
@@ -287,10 +294,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-struct device *iommu_device_create(struct device *parent, void *drvdata,
-				   const struct attribute_group **groups,
-				   const char *fmt, ...) __printf(4, 5);
-void iommu_device_destroy(struct device *dev);
 int iommu_device_link(struct device *dev, struct device *link);
 void iommu_device_unlink(struct device *dev, struct device *link);
 
@@ -567,29 +570,29 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain,
 	return -EINVAL;
 }
 
-static inline struct device *iommu_device_create(struct device *parent,
-					void *drvdata,
-					const struct attribute_group **groups,
-					const char *fmt, ...)
+static inline int  iommu_device_register(struct iommu_device *iommu)
 {
-	return ERR_PTR(-ENODEV);
+	return -ENODEV;
 }
 
-static inline void iommu_device_destroy(struct device *dev)
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
 {
 }
 
-static inline int  iommu_device_register(struct iommu_device *iommu)
+static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
-	return -ENODEV;
 }
 
-static inline void iommu_device_set_ops(struct iommu_device *iommu,
-					const struct iommu_ops *ops)
+static inline int  iommu_device_sysfs_add(struct iommu_device *iommu,
+					  struct device *parent,
+					  const struct attribute_group **groups,
+					  const char *fmt, ...)
 {
+	return -ENODEV;
 }
 
-static inline void iommu_device_unregister(struct iommu_device *iommu)
+static inline void iommu_device_sysfs_remove(struct iommu_device *iommu)
 {
 }
 
-- 
cgit v1.2.3


From e3d10af1128b6bc394f21656ff13753130f3c107 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 17:23:22 +0100
Subject: iommu: Make iommu_device_link/unlink take a struct iommu_device

This makes the interface more consistent with
iommu_device_sysfs_add/remove.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c578ca135bed..bae3cfc8b4a3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -224,6 +224,8 @@ int  iommu_device_sysfs_add(struct iommu_device *iommu,
 			    const struct attribute_group **groups,
 			    const char *fmt, ...) __printf(4, 5);
 void iommu_device_sysfs_remove(struct iommu_device *iommu);
+int  iommu_device_link(struct iommu_device   *iommu, struct device *link);
+void iommu_device_unlink(struct iommu_device *iommu, struct device *link);
 
 static inline void iommu_device_set_ops(struct iommu_device *iommu,
 					const struct iommu_ops *ops)
@@ -294,8 +296,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-int iommu_device_link(struct device *dev, struct device *link);
-void iommu_device_unlink(struct device *dev, struct device *link);
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-- 
cgit v1.2.3


From c73e1ac8b2bc6ab18d9f9a96b17ee7388b49a0c0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 7 Feb 2017 18:18:46 +0100
Subject: iommu: Add iommu_device_set_fwnode() interface

Allow to store a fwnode in 'struct iommu_device';

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bae3cfc8b4a3..626c935edee1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -214,6 +214,7 @@ struct iommu_ops {
 struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
+	struct fwnode_handle *fwnode;
 	struct device dev;
 };
 
@@ -233,6 +234,12 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu,
 	iommu->ops = ops;
 }
 
+static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
+					   struct fwnode_handle *fwnode)
+{
+	iommu->fwnode = fwnode;
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -580,6 +587,11 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu,
 {
 }
 
+static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
+					   struct fwnode_handle *fwnode)
+{
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }
-- 
cgit v1.2.3


From d0f6f5832603931b0a8da044fb9abe8289e201ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 2 Feb 2017 12:19:12 +0100
Subject: iommu: Remove iommu_register_instance interface

And also move its remaining functionality to
iommu_device_register() and 'struct iommu_device'.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h    | 7 -------
 include/linux/of_iommu.h | 6 ------
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 626c935edee1..9e82fc83765e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -382,8 +382,6 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
 		      const struct iommu_ops *ops);
 void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
-void iommu_register_instance(struct fwnode_handle *fwnode,
-			     const struct iommu_ops *ops);
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 
 #else /* CONFIG_IOMMU_API */
@@ -634,11 +632,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
 	return -ENODEV;
 }
 
-static inline void iommu_register_instance(struct fwnode_handle *fwnode,
-					   const struct iommu_ops *ops)
-{
-}
-
 static inline
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 66fcbc949899..fc4add39361a 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,12 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-static inline void of_iommu_set_ops(struct device_node *np,
-				    const struct iommu_ops *ops)
-{
-	iommu_register_instance(&np->fwnode, ops);
-}
-
 static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
 	return iommu_ops_from_fwnode(&np->fwnode);
-- 
cgit v1.2.3