From fe052a1810ec4687ee7d606290561af504047707 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Mon, 29 Jun 2015 13:08:19 +0300
Subject: target: Use struct t10_pi_tuple

Its not a good idea to keep target specific definition of
the same t10-pi tuple.

(Fix v4.2-rc1 patch fuzz - nab)

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 17ae2d6a4891..a6816444d81b 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -6,6 +6,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
 #include <linux/percpu_ida.h>
+#include <linux/t10-pi.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
@@ -426,12 +427,6 @@ enum target_core_dif_check {
 	TARGET_DIF_CHECK_REFTAG = 0x1 << 2,
 };
 
-struct se_dif_v1_tuple {
-	__be16			guard_tag;
-	__be16			app_tag;
-	__be32			ref_tag;
-};
-
 /* for sam_task_attr */
 #define TCM_SIMPLE_TAG	0x20
 #define TCM_HEAD_TAG	0x21
-- 
cgit v1.2.3


From e986a35aba67558381d5cec59a14c4d0b20f0d47 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 18 Jun 2015 11:43:38 +0200
Subject: tcm_loop: Send I_T_NEXUS_LOSS_OCCURRED UA

If the virtual SAS link is set to 'offline' we should be
queueing an I_T_NEXUS_LOSS_OCCURRED UA.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_fabric.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 18afef91b447..69355feabd1d 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -152,6 +152,7 @@ int	transport_generic_handle_tmr(struct se_cmd *);
 void	transport_generic_request_failure(struct se_cmd *, sense_reason_t);
 void	__target_execute_cmd(struct se_cmd *);
 int	transport_lookup_tmr_lun(struct se_cmd *, u64);
+void	core_allocate_nexus_loss_ua(struct se_node_acl *acl);
 
 struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg,
 		unsigned char *);
-- 
cgit v1.2.3


From 7708c1656552ddd60b9b9df3a9ee156acd1801ba Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 8 Jul 2015 17:58:52 +0300
Subject: scsi: Move sense handling routines to scsi_common

Sense data handling is also done in the target stack.
Hence, move sense handling routines to scsi_common so
the target will be able to use them as well.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/scsi/scsi_common.h | 5 +++++
 include/scsi/scsi_eh.h     | 7 +------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h
index 676b03b78e57..156d673db900 100644
--- a/include/scsi/scsi_common.h
+++ b/include/scsi/scsi_common.h
@@ -61,4 +61,9 @@ static inline bool scsi_sense_valid(const struct scsi_sense_hdr *sshdr)
 extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
 				 struct scsi_sense_hdr *sshdr);
 
+extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
+extern void scsi_set_sense_information(u8 *buf, u64 info);
+extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
+				       int desc_type);
+
 #endif /* _SCSI_COMMON_H_ */
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index 4942710ef720..dbb8c640e26f 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -4,6 +4,7 @@
 #include <linux/scatterlist.h>
 
 #include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_common.h>
 struct scsi_device;
 struct Scsi_Host;
 
@@ -21,15 +22,9 @@ static inline bool scsi_sense_is_deferred(const struct scsi_sense_hdr *sshdr)
 	return ((sshdr->response_code >= 0x70) && (sshdr->response_code & 1));
 }
 
-extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
-				       int desc_type);
-
 extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
 				   u64 * info_out);
 
-extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
-extern void scsi_set_sense_information(u8 *buf, u64 info);
-
 extern int scsi_ioctl_reset(struct scsi_device *, int __user *);
 
 struct scsi_eh_save {
-- 
cgit v1.2.3


From f5a8b3a796db01b639435515b3adc003b9f27387 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 15 Jul 2015 10:55:37 +0300
Subject: scsi: Protect against buffer possible overflow in
 scsi_set_sense_information

Make sure that the input sense buffer has sufficient length
to fit the information descriptor (12 additional bytes).
Modify scsi_set_sense_information to receive the sense buffer
length and adjust its callers scsi target and libata.

(Fix patch fuzz in scsi_set_sense_information - nab)

Reported-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/scsi/scsi_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h
index 156d673db900..11571b2a831e 100644
--- a/include/scsi/scsi_common.h
+++ b/include/scsi/scsi_common.h
@@ -62,7 +62,7 @@ extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
 				 struct scsi_sense_hdr *sshdr);
 
 extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq);
-extern void scsi_set_sense_information(u8 *buf, u64 info);
+int scsi_set_sense_information(u8 *buf, int buf_len, u64 info);
 extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
 				       int desc_type);
 
-- 
cgit v1.2.3


From 4e4937e8aefde8d49340e803ebbedcdf4b43e5f0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Thu, 16 Jul 2015 10:28:05 +0300
Subject: target: Return descriptor format sense data in case the LU spans
 64bit sectors

In case a LU spans 64bit sectors, fixed size sense data information
field is only 32 bits which means the sector information will be truncated.

Thus, if the LU spans 64bit sectors, use descriptor format sense data to
correctly report sector information.

Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_backend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 1e5c8f949bae..56cf8e485ef2 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -93,4 +93,6 @@ bool	target_lun_is_rdonly(struct se_cmd *);
 sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
+bool target_sense_desc_format(struct se_device *dev);
+
 #endif /* TARGET_CORE_BACKEND_H */
-- 
cgit v1.2.3


From 53c46995b6ed7cb32a28bce1f4a25065baf65d8f Mon Sep 17 00:00:00 2001
From: Chris Zankel <czankel@purestorage.com>
Date: Mon, 20 Jul 2015 16:29:49 -0700
Subject: target: remove unused lun_flags field from se_lun

The lun_flags field is not used, so drop it.

Signed-off-by: Chris Zankel <czankel@purestorage.com>
Signed-off-by: Spencer Baugh <spencer.baugh@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index a6816444d81b..d57a0cbf265b 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -680,7 +680,6 @@ struct se_lun {
 #define SE_LUN_LINK_MAGIC			0xffff7771
 	u32			lun_link_magic;
 	u32			lun_access;
-	u32			lun_flags;
 	u32			lun_index;
 
 	/* RELATIVE TARGET PORT IDENTIFER */
-- 
cgit v1.2.3


From b6a54b8d895648d915c7e8308f3d3e6bf2505d69 Mon Sep 17 00:00:00 2001
From: Chris Zankel <czankel@purestorage.com>
Date: Mon, 20 Jul 2015 16:29:50 -0700
Subject: target: remove initiatorname field in se_acl_lun

From: Chris Zankel <czankel@purestorage.com>

The initiatorname field in se_acl_lun is only a copy of the same field
in se_node_acl, so remove it and use the version in se_node_acl where
needed (it's actually only used for pr_debug)

Signed-off-by: Chris Zankel <czankel@purestorage.com>
Signed-off-by: Spencer Baugh <spencer.baugh@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index d57a0cbf265b..95e65bd31e05 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -593,7 +593,6 @@ struct se_ml_stat_grps {
 };
 
 struct se_lun_acl {
-	char			initiatorname[TRANSPORT_IQN_LEN];
 	u64			mapped_lun;
 	struct se_node_acl	*se_lun_nacl;
 	struct se_lun		*se_lun;
-- 
cgit v1.2.3


From 9c31820b6ab93ec298ad98abeee49759b5f5958c Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Tue, 21 Jul 2015 17:45:32 -0700
Subject: target: Remove cmd->se_ordered_id (unused except debug log lines)

For every command, we set se_ordered_id by doing atomic_inc_return on
dev->dev_ordered_id for the corresponding device.  However, the only
places this value gets used are in pr_debug() calls, which doesn't
seem worth an extra atomic op per IO.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 95e65bd31e05..3afd8dba54e8 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -454,7 +454,6 @@ struct se_cmd {
 	unsigned		unknown_data_length:1;
 	/* See se_cmd_flags_table */
 	u32			se_cmd_flags;
-	u32			se_ordered_id;
 	/* Total size in bytes associated with command */
 	u32			data_length;
 	u32			residual_count;
@@ -744,7 +743,6 @@ struct se_device {
 	atomic_long_t		write_bytes;
 	/* Active commands on this virtual SE device */
 	atomic_t		simple_cmds;
-	atomic_t		dev_ordered_id;
 	atomic_t		dev_ordered_sync;
 	atomic_t		dev_qf_count;
 	u32			export_count;
-- 
cgit v1.2.3


From 915270c3cacfc80cb3fe7fdd8130439039a85bbb Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Tue, 21 Jul 2015 17:45:33 -0700
Subject: target: Shrink struct se_cmd by rearranging fields

On x86-64, these changes reduce the struct size from 528 bytes to 496
bytes.  We save a cacheline and get under 512 bytes for better packing.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Acked-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 3afd8dba54e8..ac9bf1c0e42d 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -439,6 +439,9 @@ struct se_cmd {
 	u8			scsi_asc;
 	u8			scsi_ascq;
 	u16			scsi_sense_length;
+	unsigned		cmd_wait_set:1;
+	unsigned		unknown_data_length:1;
+	bool			state_active:1;
 	u64			tag; /* SAM command identifier aka task tag */
 	/* Delay for ALUA Active/NonOptimized state access in milliseconds */
 	int			alua_nonop_delay;
@@ -450,8 +453,6 @@ struct se_cmd {
 	unsigned int		map_tag;
 	/* Transport protocol dependent state, see transport_state_table */
 	enum transport_state_table t_state;
-	unsigned		cmd_wait_set:1;
-	unsigned		unknown_data_length:1;
 	/* See se_cmd_flags_table */
 	u32			se_cmd_flags;
 	/* Total size in bytes associated with command */
@@ -471,7 +472,6 @@ struct se_cmd {
 	struct se_tmr_req	*se_tmr_req;
 	struct list_head	se_cmd_list;
 	struct completion	cmd_wait_comp;
-	struct kref		cmd_kref;
 	const struct target_core_fabric_ops *se_tfo;
 	sense_reason_t		(*execute_cmd)(struct se_cmd *);
 	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
@@ -491,6 +491,7 @@ struct se_cmd {
 #define CMD_T_REQUEST_STOP	(1 << 8)
 #define CMD_T_BUSY		(1 << 9)
 	spinlock_t		t_state_lock;
+	struct kref		cmd_kref;
 	struct completion	t_transport_stop_comp;
 
 	struct work_struct	work;
@@ -503,8 +504,10 @@ struct se_cmd {
 	struct scatterlist	*t_bidi_data_sg;
 	unsigned int		t_bidi_data_nents;
 
+	/* Used for lun->lun_ref counting */
+	int			lun_ref_active;
+
 	struct list_head	state_list;
-	bool			state_active;
 
 	/* old task stop completion, consider merging with some of the above */
 	struct completion	task_stop_comp;
@@ -512,20 +515,17 @@ struct se_cmd {
 	/* backend private data */
 	void			*priv;
 
-	/* Used for lun->lun_ref counting */
-	int			lun_ref_active;
-
 	/* DIF related members */
 	enum target_prot_op	prot_op;
 	enum target_prot_type	prot_type;
 	u8			prot_checks;
+	bool			prot_pto;
 	u32			prot_length;
 	u32			reftag_seed;
 	struct scatterlist	*t_prot_sg;
 	unsigned int		t_prot_nents;
 	sense_reason_t		pi_err;
 	sector_t		bad_sector;
-	bool			prot_pto;
 };
 
 struct se_ua {
-- 
cgit v1.2.3


From a6415cddc4e6e1675a5648e7785aef716980c90c Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Sat, 1 Aug 2015 00:10:12 -0700
Subject: iscsi-target: Add tpg_enabled_sendtargets for disabled discovery

This patch adds a new tpg_enabled_sendtargets configfs attribute
to allow in-band sendtargets discovery information to include
target-portal-groups (TPGs) in !TPG_STATE_ACTIVE state.

This functionality is useful for clustered iSCSI targets, where
TPGTs handled on remote cluster nodes should be advertised in
the SendTargets response.

By default, this new attribute retains the default behaviour of
existing code which to ignore portal-groups in !TPG_STATE_ACTIVE
state.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 34117b8b72e4..ab465858f462 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -62,6 +62,8 @@
 /* T10 protection information disabled by default */
 #define TA_DEFAULT_T10_PI		0
 #define TA_DEFAULT_FABRIC_PROT_TYPE	0
+/* TPG status needs to be enabled to return sendtargets discovery endpoint info */
+#define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1
 
 #define ISCSI_IOV_DATA_BUFFER		5
 
@@ -763,6 +765,7 @@ struct iscsi_tpg_attrib {
 	u32			default_erl;
 	u8			t10_pi;
 	u32			fabric_prot_type;
+	u32			tpg_enabled_sendtargets;
 	struct iscsi_portal_group *tpg;
 };
 
-- 
cgit v1.2.3


From 109e2381749c1cfd94a0d22b2b54142539024973 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Thu, 23 Jul 2015 14:53:32 -0700
Subject: target: Drop iSCSI use of mutex around max_cmd_sn increment

In a performance profile, taking a mutex in iscsit_increment_maxcmdsn()
shows up very high.  However taking a mutex around "sess->max_cmd_sn += 1"
seems pretty silly: we're not serializing against other contexts in
any useful way.

I did a quick audit and there don't appear to be any other places that
use max_cmd_sn within the mutex more than once, so this lock can't be
providing any useful serialization.

(Get correct values for logging - fix whitespace damage)

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Spencer Baugh <sbaugh@catern.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index ab465858f462..d4616ef12e04 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -637,7 +637,7 @@ struct iscsi_session {
 	/* session wide counter: expected command sequence number */
 	u32			exp_cmd_sn;
 	/* session wide counter: maximum allowed command sequence number */
-	u32			max_cmd_sn;
+	atomic_t		max_cmd_sn;
 	struct list_head	sess_ooo_cmdsn_list;
 
 	/* LIO specific session ID */
-- 
cgit v1.2.3


From 17e8351a77397e8a83727eb17e3a3e9b8ab5257a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 24 Jul 2015 08:12:54 +0200
Subject: thermal: consistently use int for temperatures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The thermal code uses int, long and unsigned long for temperatures
in different places.

Using an unsigned type limits the thermal framework to positive
temperatures without need. Also several drivers currently will report
temperatures near UINT_MAX for temperatures below 0°C. This will probably
immediately shut the machine down due to overtemperature if started below
0°C.

'long' is 64bit on several architectures. This is not needed since INT_MAX °mC
is above the melting point of all known materials.

Consistently use a plain 'int' for temperatures throughout the thermal code and
the drivers. This only changes the places in the drivers where the temperature
is passed around as pointer, when drivers internally use another type this is
not changed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Lukasz Majewski <l.majewski@samsung.com>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Peter Feuerer <peter@piie.net>
Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Jean Delvare <jdelvare@suse.de>
Cc: Peter Feuerer <peter@piie.net>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Lukasz Majewski <l.majewski@samsung.com>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-acpi@vger.kernel.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-omap@vger.kernel.org
Cc: linux-samsung-soc@vger.kernel.org
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: lm-sensors@lm-sensors.org
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h                        | 26 +++++++++++---------------
 include/trace/events/thermal_power_allocator.h |  6 +++---
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 037e9df2f610..17292fee8686 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -92,23 +92,19 @@ struct thermal_zone_device_ops {
 		     struct thermal_cooling_device *);
 	int (*unbind) (struct thermal_zone_device *,
 		       struct thermal_cooling_device *);
-	int (*get_temp) (struct thermal_zone_device *, unsigned long *);
+	int (*get_temp) (struct thermal_zone_device *, int *);
 	int (*get_mode) (struct thermal_zone_device *,
 			 enum thermal_device_mode *);
 	int (*set_mode) (struct thermal_zone_device *,
 		enum thermal_device_mode);
 	int (*get_trip_type) (struct thermal_zone_device *, int,
 		enum thermal_trip_type *);
-	int (*get_trip_temp) (struct thermal_zone_device *, int,
-			      unsigned long *);
-	int (*set_trip_temp) (struct thermal_zone_device *, int,
-			      unsigned long);
-	int (*get_trip_hyst) (struct thermal_zone_device *, int,
-			      unsigned long *);
-	int (*set_trip_hyst) (struct thermal_zone_device *, int,
-			      unsigned long);
-	int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *);
-	int (*set_emul_temp) (struct thermal_zone_device *, unsigned long);
+	int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
+	int (*set_trip_temp) (struct thermal_zone_device *, int, int);
+	int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);
+	int (*set_trip_hyst) (struct thermal_zone_device *, int, int);
+	int (*get_crit_temp) (struct thermal_zone_device *, int *);
+	int (*set_emul_temp) (struct thermal_zone_device *, int);
 	int (*get_trend) (struct thermal_zone_device *, int,
 			  enum thermal_trend *);
 	int (*notify) (struct thermal_zone_device *, int,
@@ -332,9 +328,9 @@ struct thermal_genl_event {
  *		   temperature.
  */
 struct thermal_zone_of_device_ops {
-	int (*get_temp)(void *, long *);
+	int (*get_temp)(void *, int *);
 	int (*get_trend)(void *, long *);
-	int (*set_emul_temp)(void *, unsigned long);
+	int (*set_emul_temp)(void *, int);
 };
 
 /**
@@ -406,7 +402,7 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *,
 				   const struct thermal_cooling_device_ops *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
-int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp);
+int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
 
 int get_tz_trend(struct thermal_zone_device *, int);
 struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
@@ -457,7 +453,7 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name(
 		const char *name)
 { return ERR_PTR(-ENODEV); }
 static inline int thermal_zone_get_temp(
-		struct thermal_zone_device *tz, unsigned long *temp)
+		struct thermal_zone_device *tz, int *temp)
 { return -ENODEV; }
 static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
 { return -ENODEV; }
diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h
index 12e1321c4e0c..5afae8fe3795 100644
--- a/include/trace/events/thermal_power_allocator.h
+++ b/include/trace/events/thermal_power_allocator.h
@@ -11,7 +11,7 @@ TRACE_EVENT(thermal_power_allocator,
 		 u32 total_req_power, u32 *granted_power,
 		 u32 total_granted_power, size_t num_actors,
 		 u32 power_range, u32 max_allocatable_power,
-		 unsigned long current_temp, s32 delta_temp),
+		 int current_temp, s32 delta_temp),
 	TP_ARGS(tz, req_power, total_req_power, granted_power,
 		total_granted_power, num_actors, power_range,
 		max_allocatable_power, current_temp, delta_temp),
@@ -24,7 +24,7 @@ TRACE_EVENT(thermal_power_allocator,
 		__field(size_t,        num_actors               )
 		__field(u32,           power_range              )
 		__field(u32,           max_allocatable_power    )
-		__field(unsigned long, current_temp             )
+		__field(int,           current_temp             )
 		__field(s32,           delta_temp               )
 	),
 	TP_fast_assign(
@@ -42,7 +42,7 @@ TRACE_EVENT(thermal_power_allocator,
 		__entry->delta_temp = delta_temp;
 	),
 
-	TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d",
+	TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%d delta_temperature=%d",
 		__entry->tz_id,
 		__print_array(__get_dynamic_array(req_power),
                               __entry->num_actors, 4),
-- 
cgit v1.2.3


From 8025e5ddf9c1cac0e632dad49a63abf7848b78cb Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jul 2015 11:55:44 -0300
Subject: [media] mm: Provide new get_vaddr_frames() helper

Provide new function get_vaddr_frames().  This function maps virtual
addresses from given start and fills given array with page frame numbers of
the corresponding pages. If given start belongs to a normal vma, the function
grabs reference to each of the pages to pin them in memory. If start
belongs to VM_IO | VM_PFNMAP vma, we don't touch page structures. Caller
must make sure pfns aren't reused for anything else while he is using
them.

This function is created for various drivers to simplify handling of
their buffers.

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/linux/mm.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2e872f92dbac..79ad29a8a60a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -20,6 +20,7 @@
 #include <linux/shrinker.h>
 #include <linux/resource.h>
 #include <linux/page_ext.h>
+#include <linux/err.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1198,6 +1199,49 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 		    int write, int force, struct page **pages);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+
+/* Container for pinned pfns / pages */
+struct frame_vector {
+	unsigned int nr_allocated;	/* Number of frames we have space for */
+	unsigned int nr_frames;	/* Number of frames stored in ptrs array */
+	bool got_ref;		/* Did we pin pages by getting page ref? */
+	bool is_pfns;		/* Does array contain pages or pfns? */
+	void *ptrs[0];		/* Array of pinned pfns / pages. Use
+				 * pfns_vector_pages() or pfns_vector_pfns()
+				 * for access */
+};
+
+struct frame_vector *frame_vector_create(unsigned int nr_frames);
+void frame_vector_destroy(struct frame_vector *vec);
+int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
+		     bool write, bool force, struct frame_vector *vec);
+void put_vaddr_frames(struct frame_vector *vec);
+int frame_vector_to_pages(struct frame_vector *vec);
+void frame_vector_to_pfns(struct frame_vector *vec);
+
+static inline unsigned int frame_vector_count(struct frame_vector *vec)
+{
+	return vec->nr_frames;
+}
+
+static inline struct page **frame_vector_pages(struct frame_vector *vec)
+{
+	if (vec->is_pfns) {
+		int err = frame_vector_to_pages(vec);
+
+		if (err)
+			return ERR_PTR(err);
+	}
+	return (struct page **)(vec->ptrs);
+}
+
+static inline unsigned long *frame_vector_pfns(struct frame_vector *vec)
+{
+	if (!vec->is_pfns)
+		frame_vector_to_pfns(vec);
+	return (unsigned long *)(vec->ptrs);
+}
+
 struct kvec;
 int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
 			struct page **pages);
-- 
cgit v1.2.3


From 21fb0cb7ec65a40b9f5f7cda59eba0eb2ae76473 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jul 2015 11:55:46 -0300
Subject: [media] vb2: Provide helpers for mapping virtual addresses

Provide simple helper functions to map virtual address range into an
array of pfns / pages.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-memops.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h
index f05444ca8c0c..2f0564ff5f31 100644
--- a/include/media/videobuf2-memops.h
+++ b/include/media/videobuf2-memops.h
@@ -15,6 +15,7 @@
 #define _MEDIA_VIDEOBUF2_MEMOPS_H
 
 #include <media/videobuf2-core.h>
+#include <linux/mm.h>
 
 /**
  * vb2_vmarea_handler - common vma refcount tracking handler
@@ -36,5 +37,9 @@ int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size,
 struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma);
 void vb2_put_vma(struct vm_area_struct *vma);
 
+struct frame_vector *vb2_create_framevec(unsigned long start,
+					 unsigned long length,
+					 bool write);
+void vb2_destroy_framevec(struct frame_vector *vec);
 
 #endif
-- 
cgit v1.2.3


From 6690c8c78c745239bb1f22b23f3889a0a14c249b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jul 2015 11:55:50 -0300
Subject: [media] media: vb2: Remove unused functions

Conversion to the use of pinned pfns made some functions unused. Remove
them. Also there's no need to lock mmap_sem in __buf_prepare() anymore.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-memops.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h
index 2f0564ff5f31..830b5239fd8b 100644
--- a/include/media/videobuf2-memops.h
+++ b/include/media/videobuf2-memops.h
@@ -31,12 +31,6 @@ struct vb2_vmarea_handler {
 
 extern const struct vm_operations_struct vb2_common_vm_ops;
 
-int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size,
-			   struct vm_area_struct **res_vma, dma_addr_t *res_pa);
-
-struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma);
-void vb2_put_vma(struct vm_area_struct *vma);
-
 struct frame_vector *vb2_create_framevec(unsigned long start,
 					 unsigned long length,
 					 bool write);
-- 
cgit v1.2.3


From 76c28f1fcfeb42b47f798fe498351ee1d60086ae Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Mon, 24 Aug 2015 10:26:03 -0700
Subject: target/iscsi: Fix np_ip bracket issue by removing np_ip

Revert commit 1997e6259, which causes double brackets on ipv6
inaddr_any addresses.

Since we have np_sockaddr, if we need a textual representation we can
use "%pISc".

Change iscsit_add_network_portal() and iscsit_add_np() signatures to remove
*ip_str parameter.

Fix and extend some comments earlier in the function.

Tested to work for :: and ::1 via iscsiadm, previously :: failed, see
https://bugzilla.redhat.com/show_bug.cgi?id=1249107 .

CC: stable@vger.kernel.org
Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index d4616ef12e04..1051d0c40ddd 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -778,7 +778,6 @@ struct iscsi_np {
 	enum iscsi_timer_flags_table np_login_timer_flags;
 	u32			np_exports;
 	enum np_flags_table	np_flags;
-	unsigned char		np_ip[IPV6_ADDRESS_SPACE];
 	u16			np_port;
 	spinlock_t		np_thread_lock;
 	struct completion	np_restart_comp;
-- 
cgit v1.2.3


From 69d755747d31c07a416064f251c2f408938fb67a Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Mon, 24 Aug 2015 10:26:04 -0700
Subject: target/iscsi: Keep local_ip as the actual sockaddr

This is a more natural format that lets us format it with the appropriate
printk specifier as needed.

This also lets us handle v4-mapped ipv6 addresses a little more nicely, by
storing the addr as an actual v4 sockaddr in conn->local_sockaddr.

Finally, we no longer need to maintain variables for port, since this is
contained in sockaddr. Remove iscsi_np.np_port and iscsi_conn.local_port.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 1051d0c40ddd..b9434117785f 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -519,7 +519,6 @@ struct iscsi_conn {
 	u16			cid;
 	/* Remote TCP Port */
 	u16			login_port;
-	u16			local_port;
 	int			net_size;
 	int			login_family;
 	u32			auth_id;
@@ -531,7 +530,7 @@ struct iscsi_conn {
 	u32			stat_sn;
 #define IPV6_ADDRESS_SPACE				48
 	unsigned char		login_ip[IPV6_ADDRESS_SPACE];
-	unsigned char		local_ip[IPV6_ADDRESS_SPACE];
+	struct __kernel_sockaddr_storage local_sockaddr;
 	int			conn_usage_count;
 	int			conn_waiting_on_uc;
 	atomic_t		check_immediate_queue;
@@ -778,7 +777,6 @@ struct iscsi_np {
 	enum iscsi_timer_flags_table np_login_timer_flags;
 	u32			np_exports;
 	enum np_flags_table	np_flags;
-	u16			np_port;
 	spinlock_t		np_thread_lock;
 	struct completion	np_restart_comp;
 	struct socket		*np_socket;
-- 
cgit v1.2.3


From dc58f760e2e1f8f2265b581d35f211415c4fee0c Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Mon, 24 Aug 2015 10:26:05 -0700
Subject: target/iscsi: Replace conn->login_ip with login_sockaddr

Very similar to how it went with local_sockaddr.

It was embedded in iscsi_login_stats so some changes there, and we needed
to copy in a sockaddr_storage comparison function. Hopefully the kernel
will get a standard one soon, our implementation makes the 3rd.

isert_set_conn_info() became much smaller.

IPV6_ADDRESS_SPACE define goes away, had to modify a call to in6_pton(),
can just use -1 since we are sure string is null-terminated.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 3 +--
 include/target/iscsi/iscsi_target_stat.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index b9434117785f..f3eb99809557 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -528,8 +528,7 @@ struct iscsi_conn {
 	u32			exp_statsn;
 	/* Per connection status sequence number */
 	u32			stat_sn;
-#define IPV6_ADDRESS_SPACE				48
-	unsigned char		login_ip[IPV6_ADDRESS_SPACE];
+	struct __kernel_sockaddr_storage login_sockaddr;
 	struct __kernel_sockaddr_storage local_sockaddr;
 	int			conn_usage_count;
 	int			conn_waiting_on_uc;
diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h
index 3ff76b4faad3..f2a583cdf08b 100644
--- a/include/target/iscsi/iscsi_target_stat.h
+++ b/include/target/iscsi/iscsi_target_stat.h
@@ -50,7 +50,7 @@ struct iscsi_login_stats {
 	u64		last_fail_time;		/* time stamp (jiffies) */
 	u32		last_fail_type;
 	int		last_intr_fail_ip_family;
-	unsigned char	last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE];
+	struct __kernel_sockaddr_storage last_intr_fail_sockaddr;
 	char		last_intr_fail_name[224];
 } ____cacheline_aligned;
 
-- 
cgit v1.2.3


From 13a3cf08fa1e4b3a252f24202d47a556242aea03 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Mon, 24 Aug 2015 10:26:06 -0700
Subject: target/iscsi: Replace __kernel_sockaddr_storage with sockaddr_storage

It appears to be what the rest of the kernel does, so let's do it too.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/iscsi/iscsi_target_core.h | 6 +++---
 include/target/iscsi/iscsi_target_stat.h | 2 +-
 include/target/iscsi/iscsi_transport.h   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index f3eb99809557..84abe73450c5 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -528,8 +528,8 @@ struct iscsi_conn {
 	u32			exp_statsn;
 	/* Per connection status sequence number */
 	u32			stat_sn;
-	struct __kernel_sockaddr_storage login_sockaddr;
-	struct __kernel_sockaddr_storage local_sockaddr;
+	struct sockaddr_storage login_sockaddr;
+	struct sockaddr_storage local_sockaddr;
 	int			conn_usage_count;
 	int			conn_waiting_on_uc;
 	atomic_t		check_immediate_queue;
@@ -779,7 +779,7 @@ struct iscsi_np {
 	spinlock_t		np_thread_lock;
 	struct completion	np_restart_comp;
 	struct socket		*np_socket;
-	struct __kernel_sockaddr_storage np_sockaddr;
+	struct sockaddr_storage np_sockaddr;
 	struct task_struct	*np_thread;
 	struct timer_list	np_login_timer;
 	void			*np_context;
diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h
index f2a583cdf08b..e615bb485d0b 100644
--- a/include/target/iscsi/iscsi_target_stat.h
+++ b/include/target/iscsi/iscsi_target_stat.h
@@ -50,7 +50,7 @@ struct iscsi_login_stats {
 	u64		last_fail_time;		/* time stamp (jiffies) */
 	u32		last_fail_type;
 	int		last_intr_fail_ip_family;
-	struct __kernel_sockaddr_storage last_intr_fail_sockaddr;
+	struct sockaddr_storage last_intr_fail_sockaddr;
 	char		last_intr_fail_name[224];
 } ____cacheline_aligned;
 
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index e6bb166f12c2..90e37faa2ede 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -9,7 +9,7 @@ struct iscsit_transport {
 	int priv_size;
 	struct module *owner;
 	struct list_head t_node;
-	int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *);
+	int (*iscsit_setup_np)(struct iscsi_np *, struct sockaddr_storage *);
 	int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *);
 	void (*iscsit_free_np)(struct iscsi_np *);
 	void (*iscsit_wait_conn)(struct iscsi_conn *);
-- 
cgit v1.2.3


From 1bab0de0274fbe8c8ac92179e6705584c55ed169 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Aug 2015 14:16:54 +0200
Subject: dm-mpath, scsi_dh: don't let dm detach device handlers

While allowing dm-mpath to attach device handlers is a functionality we need
for backwards compatibility reason there is no reason to reference count
them and detach them if dm-mpath stops using the device for some reason.

If the device handler works for the given device it can just stay attached,
and we can take the retain_hw_handler codepath.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Hannes Reinecke <hare@Suse.de>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/scsi/scsi_device.h | 1 -
 include/scsi/scsi_dh.h     | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 50c2a363bc8f..798d67994aeb 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -221,7 +221,6 @@ struct scsi_device_handler {
 struct scsi_dh_data {
 	struct scsi_device_handler *scsi_dh;
 	struct scsi_device *sdev;
-	struct kref kref;
 };
 
 #define	to_scsi_device(d)	\
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 620c723ee8ed..99c9196fe606 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -59,7 +59,6 @@ enum {
 extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
 extern int scsi_dh_handler_exist(const char *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
-extern void scsi_dh_detach(struct request_queue *);
 extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t);
 extern int scsi_dh_set_params(struct request_queue *, const char *);
 #else
@@ -77,10 +76,6 @@ static inline int scsi_dh_attach(struct request_queue *req, const char *name)
 {
 	return SCSI_DH_NOSYS;
 }
-static inline void scsi_dh_detach(struct request_queue *q)
-{
-	return;
-}
 static inline const char *scsi_dh_attached_handler_name(struct request_queue *q,
 							gfp_t gfp)
 {
-- 
cgit v1.2.3


From 566079c849cfe538e908c44ac11a9c4638db8f91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Aug 2015 14:16:55 +0200
Subject: dm-mpath, scsi_dh: request scsi_dh modules in scsi_dh, not dm-mpath

This way we can reused the same code any attachment method, not just those
requested from dm-mpath.

[jejb: fixup checkpatch error]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/scsi/scsi_dh.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 99c9196fe606..966b921135b5 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -57,7 +57,6 @@ enum {
 };
 #if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE)
 extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
-extern int scsi_dh_handler_exist(const char *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
 extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t);
 extern int scsi_dh_set_params(struct request_queue *, const char *);
@@ -68,10 +67,6 @@ static inline int scsi_dh_activate(struct request_queue *req,
 	fn(data, 0);
 	return 0;
 }
-static inline int scsi_dh_handler_exist(const char *name)
-{
-	return 0;
-}
 static inline int scsi_dh_attach(struct request_queue *req, const char *name)
 {
 	return SCSI_DH_NOSYS;
-- 
cgit v1.2.3


From 086b91d052ebe4ead5d28021afe3bdfd70af15bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Aug 2015 14:16:57 +0200
Subject: scsi_dh: integrate into the core SCSI code

Stop building scsi_dh as a separate module and integrate it fully into the
core SCSI code with explicit callouts at bus scan time.  For now the
callouts are placed at the same point as the old bus notifiers were called,
but in the future we will be able to look at ALUA INQUIRY data earlier on.

Note that this also means that the device handler modules need to be loaded
by the time we scan the bus.  The next patches will add support for
autoloading device handlers at bus scan time to make sure they are always
loaded if they are enabled in the kernel config.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/scsi/scsi_dh.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 966b921135b5..3a37b4c45997 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -55,7 +55,7 @@ enum {
 	SCSI_DH_NOSYS,
 	SCSI_DH_DRIVER_MAX,
 };
-#if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE)
+#ifdef CONFIG_SCSI_DH
 extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
 extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t);
-- 
cgit v1.2.3


From d95dbff2a41e934cd8789734b34dc591e78ba11c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Aug 2015 14:16:58 +0200
Subject: scsi_dh: move device matching to the core code

Add a single list of devices that need non-ALUA device handlers to the core
scsi_dh code so that we can autoload the modules for them at probe time.

While this is a little ugly in terms of architecture it actually
significantly simplifies the code in addition to the new autoloading
functionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/scsi/scsi_device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 798d67994aeb..4d501b7baa9b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -215,7 +215,6 @@ struct scsi_device_handler {
 	int (*activate)(struct scsi_device *, activate_complete, void *);
 	int (*prep_fn)(struct scsi_device *, struct request *);
 	int (*set_params)(struct scsi_device *, const char *);
-	bool (*match)(struct scsi_device *);
 };
 
 struct scsi_dh_data {
-- 
cgit v1.2.3


From ee14c674e8fc57251223054fb52dc0ecfe711028 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Aug 2015 14:16:59 +0200
Subject: scsi_dh: kill struct scsi_dh_data

Add a ->handler and a ->handler_data field to struct scsi_device and kill
this indirection.  Also move struct scsi_device_handler to scsi_dh.h so that
changes to it don't require rebuilding every SCSI LLDD.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <JBottomley@Odin.com>
---
 include/scsi/scsi_device.h | 25 +++----------------------
 include/scsi/scsi_dh.h     | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 4d501b7baa9b..fe89d7cd67b9 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -196,32 +196,13 @@ struct scsi_device {
 	struct execute_work	ew; /* used to get process context on put */
 	struct work_struct	requeue_work;
 
-	struct scsi_dh_data	*scsi_dh_data;
+	struct scsi_device_handler *handler;
+	void			*handler_data;
+
 	enum scsi_device_state sdev_state;
 	unsigned long		sdev_data[0];
 } __attribute__((aligned(sizeof(unsigned long))));
 
-typedef void (*activate_complete)(void *, int);
-struct scsi_device_handler {
-	/* Used by the infrastructure */
-	struct list_head list; /* list of scsi_device_handlers */
-
-	/* Filled by the hardware handler */
-	struct module *module;
-	const char *name;
-	int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *);
-	struct scsi_dh_data *(*attach)(struct scsi_device *);
-	void (*detach)(struct scsi_device *);
-	int (*activate)(struct scsi_device *, activate_complete, void *);
-	int (*prep_fn)(struct scsi_device *, struct request *);
-	int (*set_params)(struct scsi_device *, const char *);
-};
-
-struct scsi_dh_data {
-	struct scsi_device_handler *scsi_dh;
-	struct scsi_device *sdev;
-};
-
 #define	to_scsi_device(d)	\
 	container_of(d, struct scsi_device, sdev_gendev)
 #define	class_to_sdev(d)	\
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 3a37b4c45997..85d731746834 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -55,6 +55,23 @@ enum {
 	SCSI_DH_NOSYS,
 	SCSI_DH_DRIVER_MAX,
 };
+
+typedef void (*activate_complete)(void *, int);
+struct scsi_device_handler {
+	/* Used by the infrastructure */
+	struct list_head list; /* list of scsi_device_handlers */
+
+	/* Filled by the hardware handler */
+	struct module *module;
+	const char *name;
+	int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *);
+	int (*attach)(struct scsi_device *);
+	void (*detach)(struct scsi_device *);
+	int (*activate)(struct scsi_device *, activate_complete, void *);
+	int (*prep_fn)(struct scsi_device *, struct request *);
+	int (*set_params)(struct scsi_device *, const char *);
+};
+
 #ifdef CONFIG_SCSI_DH
 extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
-- 
cgit v1.2.3


From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001
From: Vatika Harlalka <vatikaharlalka@gmail.com>
Date: Tue, 1 Sep 2015 16:50:59 +0200
Subject: nohz: Affine unpinned timers to housekeepers

The problem addressed in this patch is about affining unpinned
timers. Adaptive or Full Dynticks CPUs are currently disturbed
by unnecessary jitter due to firing of such timers on them.

This patch will affine timers to online CPUs which are not full
dynticks in NOHZ_FULL configured systems. It should not
introduce overhead in nohz full off case due to static keys.

Signed-off-by: Vatika Harlalka <vatikaharlalka@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tick.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 48d901f83f92..e312219ff823 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
 		cpumask_or(mask, mask, tick_nohz_full_mask);
 }
 
+static inline int housekeeping_any_cpu(void)
+{
+	return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+}
+
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(void);
 #else
+static inline int housekeeping_any_cpu(void)
+{
+	return smp_processor_id();
+}
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
-- 
cgit v1.2.3


From 5e7c4274a70aa2d6f485996d0ca1dad52d0039ca Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 3 Sep 2015 19:28:20 +0300
Subject: block: Check for gaps on front and back merges

We are checking for gaps to previous bio_vec, which can
only detect back merges gaps. Moreover, at the point where
we check for a gap, we don't know if we will attempt a back
or a front merge. Thus, check for gap to prev in a back merge
attempt and check for a gap to next in a front merge attempt.

Signed-off-by: Jens Axboe <axboe@fb.com>
[sagig: Minor rename change]
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
---
 include/linux/blkdev.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a622f270f09e..2ff94def041e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1368,6 +1368,26 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
 }
 
+static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
+			 struct bio *next)
+{
+	if (!bio_has_data(prev))
+		return false;
+
+	return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
+				next->bi_io_vec[0].bv_offset);
+}
+
+static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
+{
+	return bio_will_gap(req->q, req->biotail, bio);
+}
+
+static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
+{
+	return bio_will_gap(req->q, bio, req->bio);
+}
+
 struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
-- 
cgit v1.2.3


From e74bfeedad08180b968d8613dcde141ffb0720c3 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 13 Jul 2015 08:07:17 -0400
Subject: NTB: Add flow control to the ntb_netdev

Right now if we push the NTB really hard, we start dropping packets due
to not able to process the packets fast enough. We need to st:qop the
upper layer from flooding us when that happens.

A timer is necessary in order to restart the queue once the resource has
been processed on the receive side. Due to the way NTB is setup, the
resources on the tx side are tied to the processing of the rx side and
there's no async way to know when the rx side has released those
resources.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb_transport.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h
index 2862861366a5..7243eb98a722 100644
--- a/include/linux/ntb_transport.h
+++ b/include/linux/ntb_transport.h
@@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
 void ntb_transport_link_up(struct ntb_transport_qp *qp);
 void ntb_transport_link_down(struct ntb_transport_qp *qp);
 bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
-- 
cgit v1.2.3


From a7c23237481782fbea3c2230e362b72863e144b0 Mon Sep 17 00:00:00 2001
From: Allen Hubbe <Allen.Hubbe@emc.com>
Date: Wed, 15 Jul 2015 04:15:28 -0400
Subject: NTB: Fix documentation for ntb_link_is_up

There was a copy and paste error in the documentation for
ntb_link_is_up.  The long description was mistakenly copied from
ntb_link_set_trans.

This adds the appropriate long description for ntb_link_is_up.

Reported-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Allen Hubbe <Allen.Hubbe@emc.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index b02f72bb8e32..e3d3299c6052 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
  * @speed:	OUT - The link speed expressed as PCIe generation number.
  * @width:	OUT - The link width expressed as the number of PCIe lanes.
  *
- * Set the translation of a memory window.  The peer may access local memory
- * through the window starting at the address, up to the size.  The address
- * must be aligned to the alignment specified by ntb_mw_get_range().  The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * Get the current state of the ntb link.  It is recommended to query the link
+ * state once after every link event.  It is safe to query the link state in
+ * the context of the link event callback.
  *
  * Return: One if the link is up, zero if the link is down, otherwise a
  *		negative value indicating the error number.
-- 
cgit v1.2.3


From 86663c91866ae85c219f1a80ef2c9460b7ca5cd8 Mon Sep 17 00:00:00 2001
From: Allen Hubbe <Allen.Hubbe@emc.com>
Date: Wed, 15 Jul 2015 12:43:21 -0400
Subject: NTB: Fix documentation for ntb_peer_db_clear.

The documentation should say "peer" not "local" when referring to the
peer doorbell register.

Reported-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Allen Hubbe <Allen.Hubbe@emc.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/ntb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index e3d3299c6052..f798e2afba88 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -794,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
 }
 
 /**
- * ntb_peer_db_clear() - clear bits in the local doorbell register
+ * ntb_peer_db_clear() - clear bits in the peer doorbell register
  * @ntb:	NTB device context.
  * @db_bits:	Doorbell bits to clear.
  *
-- 
cgit v1.2.3


From edcd591c77a48da753456f92daf8bb50fe9bac93 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 7 Sep 2015 13:18:03 -0600
Subject: locking/static_keys: Fix a silly typo

Commit:

  412758cb2670 ("jump label, locking/static_keys: Update docs")

introduced a typo that might as well get fixed.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150907131803.54c027e1@lwn.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7f653e8f6690..0684bd3a48fc 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -22,7 +22,7 @@
  * DEFINE_STATIC_KEY_TRUE(key);
  * DEFINE_STATIC_KEY_FALSE(key);
  * static_key_likely()
- * statick_key_unlikely()
+ * static_key_unlikely()
  *
  * Jump labels provide an interface to generate dynamic branches using
  * self-modifying code. Assuming toolchain and architecture support, if we
-- 
cgit v1.2.3


From 8b9558aab853e98ba6e3fee0dd8545544966958c Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 1 Sep 2015 17:19:38 +0800
Subject: libceph: use keepalive2 to verify the mon session is alive

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h   | 2 ++
 include/linux/ceph/messenger.h | 4 ++++
 include/linux/ceph/msgr.h      | 4 +++-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 9ebee53d3bf5..397c5cd09794 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -46,6 +46,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long monc_ping_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -66,6 +67,7 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_MONC_PING_TIMEOUT_DEFAULT	msecs_to_jiffies(30 * 1000)
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 37753278987a..7e1252e97a30 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -248,6 +248,8 @@ struct ceph_connection {
 	int in_base_pos;     /* bytes read */
 	__le64 in_temp_ack;  /* for reading an ack */
 
+	struct timespec last_keepalive_ack;
+
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
 };
@@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg);
 extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
 
 extern void ceph_con_keepalive(struct ceph_connection *con);
+extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
+				       unsigned long interval);
 
 extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 				size_t length, size_t alignment);
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 1c1887206ffa..0fe2656ac415 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -84,10 +84,12 @@ struct ceph_entity_inst {
 #define CEPH_MSGR_TAG_MSG           7  /* message */
 #define CEPH_MSGR_TAG_ACK           8  /* message ack */
 #define CEPH_MSGR_TAG_KEEPALIVE     9  /* just a keepalive byte! */
-#define CEPH_MSGR_TAG_BADPROTOVER  10  /* bad protocol version */
+#define CEPH_MSGR_TAG_BADPROTOVER   10 /* bad protocol version */
 #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
 #define CEPH_MSGR_TAG_FEATURES      12 /* insufficient features */
 #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
+#define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
+#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
 
 
 /*
-- 
cgit v1.2.3


From 4eafbd15b6c84cd3f6c76022c8a6c27f7cc076e1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Tue, 8 Sep 2015 18:41:01 +0200
Subject: PM / OPP: add dev_pm_opp_get_suspend_opp() helper

Add dev_pm_opp_get_suspend_opp() helper to obtain suspend opp.

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cab7ba55bedb..e817722ee3f0 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 
 int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
+struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
@@ -80,6 +81,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 	return 0;
 }
 
+static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
+{
+	return NULL;
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
-- 
cgit v1.2.3


From 08e75e754a6d9838e490b74551d19fc04d0fd6f9 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Fri, 14 Aug 2015 18:56:56 +0100
Subject: PM / devfreq: cache the last call to get_dev_status()

The return value of get_dev_status() can be reused.  Cache it so that
other parts of the kernel can reuse it instead of having to call the
same function again.

Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/devfreq.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index ce447f0f1bad..70a1c60ddda4 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -161,6 +161,7 @@ struct devfreq {
 	struct delayed_work work;
 
 	unsigned long previous_freq;
+	struct devfreq_dev_status last_status;
 
 	void *data; /* private data for governors */
 
@@ -204,6 +205,15 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev,
 extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 						struct devfreq *devfreq);
 
+/**
+ * devfreq_update_stats() - update the last_status pointer in struct devfreq
+ * @df:		the devfreq instance whose status needs updating
+ */
+static inline int devfreq_update_stats(struct devfreq *df)
+{
+	return df->profile->get_dev_status(df->dev.parent, &df->last_status);
+}
+
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
@@ -289,6 +299,11 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev,
 							struct devfreq *devfreq)
 {
 }
+
+static inline int devfreq_update_stats(struct devfreq *df)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_PM_DEVFREQ */
 
 #endif /* __LINUX_DEVFREQ_H__ */
-- 
cgit v1.2.3


From d54cdf3fc91aae3780433471d15d73413a845bc0 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Tue, 18 Aug 2015 13:45:49 +0900
Subject: PM / devfreq: comments for get_dev_status usage updated

With the introduction of devfreq_update_stats(), governors
are not recommended to use get_dev_status() directly.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/devfreq.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 70a1c60ddda4..68030e22af35 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -65,7 +65,10 @@ struct devfreq_dev_status {
  *			The "flags" parameter's possible values are
  *			explained above with "DEVFREQ_FLAG_*" macros.
  * @get_dev_status:	The device should provide the current performance
- *			status to devfreq, which is used by governors.
+ *			status to devfreq. Governors are recommended not to
+ *			use this directly. Instead, governors are recommended
+ *			to use devfreq_update_stats() along with
+ *			devfreq.last_status.
  * @get_cur_freq:	The device should provide the current frequency
  *			at which it is operating.
  * @exit:		An optional callback that is called when devfreq
@@ -208,6 +211,10 @@ extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 /**
  * devfreq_update_stats() - update the last_status pointer in struct devfreq
  * @df:		the devfreq instance whose status needs updating
+ *
+ *  Governors are recommended to use this function along with last_status,
+ * which allows other entities to reuse the last_status without affecting
+ * the values fetched later by governors.
  */
 static inline int devfreq_update_stats(struct devfreq *df)
 {
-- 
cgit v1.2.3


From 43b3f02899f74ae9914a39547cc5492156f0027a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Sep 2015 17:25:23 +0200
Subject: locking/qspinlock/x86: Fix performance regression under unaccelerated
 VMs

Dave ran into horrible performance on a VM without PARAVIRT_SPINLOCKS
set and Linus noted that the test-and-set implementation was retarded.

One should spin on the variable with a load, not a RMW.

While there, remove 'queued' from the name, as the lock isn't queued
at all, but a simple test-and-set.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Dave Chinner <david@fromorbit.com>
Tested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: stable@vger.kernel.org # v4.2+
Link: http://lkml.kernel.org/r/20150904152523.GR18673@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qspinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 83bfb87f5bf1..e2aadbc7151f 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock)
 		cpu_relax();
 }
 
-#ifndef virt_queued_spin_lock
-static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#ifndef virt_spin_lock
+static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 8f9b565482c537821588444e09ff732c7d65ed6e Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 30 Jul 2015 18:28:13 -0700
Subject: target/qla2xxx: Honor max_data_sg_nents I/O transfer limit

This patch adds an optional fabric driver provided SGL limit
that target-core will honor as it's own internal I/O maximum
transfer length limit, as exposed by EVPD=0xb0 block limits
parameters.

This is required for handling cases when host I/O transfer
length exceeds the requested EVPD block limits maximum
transfer length. The initial user of this logic is qla2xxx,
so that we can avoid having to reject I/Os from some legacy
FC hosts where EVPD=0xb0 parameters are not honored.

When se_cmd payload length exceeds the provided limit in
target_check_max_data_sg_nents() code, se_cmd->data_length +
se_cmd->prot_length are reset with se_cmd->residual_count
plus underflow bit for outgoing TFO response callbacks.
It also checks for existing CDB level underflow + overflow
and recalculates final residual_count as necessary.

Note this patch currently assumes 1:1 mapping of PAGE_SIZE
per struct scatterlist entry.

Reported-by: Craig Watson <craig.watson@vanguard-rugged.com>
Cc: Craig Watson <craig.watson@vanguard-rugged.com>
Tested-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Roland Dreier <roland@purestorage.com>
Cc: Arun Easi <arun.easi@qlogic.com>
Cc: Giridhar Malavali <giridhar.malavali@qlogic.com>
Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_fabric.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 69355feabd1d..7fb2557a760e 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -5,6 +5,19 @@ struct target_core_fabric_ops {
 	struct module *module;
 	const char *name;
 	size_t node_acl_size;
+	/*
+	 * Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload.
+	 * Setting this value tells target-core to enforce this limit, and
+	 * report as INQUIRY EVPD=b0 MAXIMUM TRANSFER LENGTH.
+	 *
+	 * target-core will currently reset se_cmd->data_length to this
+	 * maximum size, and set UNDERFLOW residual count if length exceeds
+	 * this limit.
+	 *
+	 * XXX: Not all initiator hosts honor this block-limit EVPD
+	 * XXX: Currently assumes single PAGE_SIZE per scatterlist entry
+	 */
+	u32 max_data_sg_nents;
 	char *(*get_fabric_name)(void);
 	char *(*tpg_get_wwn)(struct se_portal_group *);
 	u16 (*tpg_get_tag)(struct se_portal_group *);
-- 
cgit v1.2.3


From ac64a2ce509104a746321a4f9646b6750cf281eb Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 4 Sep 2015 01:39:56 +0200
Subject: target: use stringify.h instead of own definition

Signed-off-by: David Disseldorp <ddiss@suse.de>
Acked-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/uapi/linux/target_core_user.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index b67f99d3c520..95c6521d8a95 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -42,10 +42,6 @@
 #define TCMU_MAILBOX_VERSION 2
 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */
 
-/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */
-#define xstr(s) str(s)
-#define str(s) #s
-
 struct tcmu_mailbox {
 	__u16 version;
 	__u16 flags;
-- 
cgit v1.2.3


From 7f39add3b08cbbdb99abe50e6d7c342e6800d684 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Fri, 11 Sep 2015 09:03:04 -0600
Subject: block: Refuse request/bio merges with gaps in the integrity payload

If a driver sets the block queue virtual boundary mask, it means that
it cannot handle gaps so we must not allow those in the integrity
payload as well.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>

Fixed up by me to have duplicate integrity merge functions, depending
on whether block integrity is enabled or not. Fixes a compilations
issue with CONFIG_BLK_DEV_INTEGRITY unset.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2ff94def041e..1aac7316a4b5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1514,6 +1514,26 @@ queue_max_integrity_segments(struct request_queue *q)
 	return q->limits.max_integrity_segments;
 }
 
+static inline bool integrity_req_gap_back_merge(struct request *req,
+						struct bio *next)
+{
+	struct bio_integrity_payload *bip = bio_integrity(req->bio);
+	struct bio_integrity_payload *bip_next = bio_integrity(next);
+
+	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+				bip_next->bip_vec[0].bv_offset);
+}
+
+static inline bool integrity_req_gap_front_merge(struct request *req,
+						 struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
+
+	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+				bip_next->bip_vec[0].bv_offset);
+}
+
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct bio;
@@ -1580,6 +1600,16 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
 {
 	return 0;
 }
+static inline bool integrity_req_gap_back_merge(struct request *req,
+						struct bio *next)
+{
+	return false;
+}
+static inline bool integrity_req_gap_front_merge(struct request *req,
+						 struct bio *bio)
+{
+	return false;
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 5b25b13ab08f616efd566347d809b4ece54570d1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 11 Sep 2015 13:07:39 -0700
Subject: sys_membarrier(): system-wide memory barrier (generic, x86)

Here is an implementation of a new system call, sys_membarrier(), which
executes a memory barrier on all threads running on the system.  It is
implemented by calling synchronize_sched().  It can be used to
distribute the cost of user-space memory barriers asymmetrically by
transforming pairs of memory barriers into pairs consisting of
sys_membarrier() and a compiler barrier.  For synchronization primitives
that distinguish between read-side and write-side (e.g.  userspace RCU
[1], rwlocks), the read-side can be accelerated significantly by moving
the bulk of the memory barrier overhead to the write-side.

The existing applications of which I am aware that would be improved by
this system call are as follows:

* Through Userspace RCU library (http://urcu.so)
  - DNS server (Knot DNS) https://www.knot-dns.cz/
  - Network sniffer (http://netsniff-ng.org/)
  - Distributed object storage (https://sheepdog.github.io/sheepdog/)
  - User-space tracing (http://lttng.org)
  - Network storage system (https://www.gluster.org/)
  - Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf)
  - Financial software (https://lkml.org/lkml/2015/3/23/189)

Those projects use RCU in userspace to increase read-side speed and
scalability compared to locking.  Especially in the case of RCU used by
libraries, sys_membarrier can speed up the read-side by moving the bulk of
the memory barrier cost to synchronize_rcu().

* Direct users of sys_membarrier
  - core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198)

Microsoft core dotnet GC developers are planning to use the mprotect()
side-effect of issuing memory barriers through IPIs as a way to implement
Windows FlushProcessWriteBuffers() on Linux.  They are referring to
sys_membarrier in their github thread, specifically stating that
sys_membarrier() is what they are looking for.

To explain the benefit of this scheme, let's introduce two example threads:

Thread A (non-frequent, e.g. executing liburcu synchronize_rcu())
Thread B (frequent, e.g. executing liburcu
rcu_read_lock()/rcu_read_unlock())

In a scheme where all smp_mb() in thread A are ordering memory accesses
with respect to smp_mb() present in Thread B, we can change each
smp_mb() within Thread A into calls to sys_membarrier() and each
smp_mb() within Thread B into compiler barriers "barrier()".

Before the change, we had, for each smp_mb() pairs:

Thread A                    Thread B
previous mem accesses       previous mem accesses
smp_mb()                    smp_mb()
following mem accesses      following mem accesses

After the change, these pairs become:

Thread A                    Thread B
prev mem accesses           prev mem accesses
sys_membarrier()            barrier()
follow mem accesses         follow mem accesses

As we can see, there are two possible scenarios: either Thread B memory
accesses do not happen concurrently with Thread A accesses (1), or they
do (2).

1) Non-concurrent Thread A vs Thread B accesses:

Thread A                    Thread B
prev mem accesses
sys_membarrier()
follow mem accesses
                            prev mem accesses
                            barrier()
                            follow mem accesses

In this case, thread B accesses will be weakly ordered. This is OK,
because at that point, thread A is not particularly interested in
ordering them with respect to its own accesses.

2) Concurrent Thread A vs Thread B accesses

Thread A                    Thread B
prev mem accesses           prev mem accesses
sys_membarrier()            barrier()
follow mem accesses         follow mem accesses

In this case, thread B accesses, which are ensured to be in program
order thanks to the compiler barrier, will be "upgraded" to full
smp_mb() by synchronize_sched().

* Benchmarks

On Intel Xeon E5405 (8 cores)
(one thread is calling sys_membarrier, the other 7 threads are busy
looping)

1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call.

* User-space user of this system call: Userspace RCU library

Both the signal-based and the sys_membarrier userspace RCU schemes
permit us to remove the memory barrier from the userspace RCU
rcu_read_lock() and rcu_read_unlock() primitives, thus significantly
accelerating them. These memory barriers are replaced by compiler
barriers on the read-side, and all matching memory barriers on the
write-side are turned into an invocation of a memory barrier on all
active threads in the process. By letting the kernel perform this
synchronization rather than dumbly sending a signal to every process
threads (as we currently do), we diminish the number of unnecessary wake
ups and only issue the memory barriers on active threads. Non-running
threads do not need to execute such barrier anyway, because these are
implied by the scheduler context switches.

Results in liburcu:

Operations in 10s, 6 readers, 2 writers:

memory barriers in reader:    1701557485 reads, 2202847 writes
signal-based scheme:          9830061167 reads,    6700 writes
sys_membarrier:               9952759104 reads,     425 writes
sys_membarrier (dyn. check):  7970328887 reads,     425 writes

The dynamic sys_membarrier availability check adds some overhead to
the read-side compared to the signal-based scheme, but besides that,
sys_membarrier slightly outperforms the signal-based scheme. However,
this non-expedited sys_membarrier implementation has a much slower grace
period than signal and memory barrier schemes.

Besides diminishing the number of wake-ups, one major advantage of the
membarrier system call over the signal-based scheme is that it does not
need to reserve a signal. This plays much more nicely with libraries,
and with processes injected into for tracing purposes, for which we
cannot expect that signals will be unused by the application.

An expedited version of this system call can be added later on to speed
up the grace period. Its implementation will likely depend on reading
the cpu_curr()->mm without holding each CPU's rq lock.

This patch adds the system call to x86 and to asm-generic.

[1] http://urcu.so

membarrier(2) man page:

MEMBARRIER(2)              Linux Programmer's Manual             MEMBARRIER(2)

NAME
       membarrier - issue memory barriers on a set of threads

SYNOPSIS
       #include <linux/membarrier.h>

       int membarrier(int cmd, int flags);

DESCRIPTION
       The cmd argument is one of the following:

       MEMBARRIER_CMD_QUERY
              Query  the  set  of  supported commands. It returns a bitmask of
              supported commands.

       MEMBARRIER_CMD_SHARED
              Execute a memory barrier on all threads running on  the  system.
              Upon  return from system call, the caller thread is ensured that
              all running threads have passed through a state where all memory
              accesses  to  user-space  addresses  match program order between
              entry to and return from the system  call  (non-running  threads
              are de facto in such a state). This covers threads from all pro=E2=80=90
              cesses running on the system.  This command returns 0.

       The flags argument needs to be 0. For future extensions.

       All memory accesses performed  in  program  order  from  each  targeted
       thread is guaranteed to be ordered with respect to sys_membarrier(). If
       we use the semantic "barrier()" to represent a compiler barrier forcing
       memory  accesses  to  be performed in program order across the barrier,
       and smp_mb() to represent explicit memory barriers forcing full  memory
       ordering  across  the barrier, we have the following ordering table for
       each pair of barrier(), sys_membarrier() and smp_mb():

       The pair ordering is detailed as (O: ordered, X: not ordered):

                              barrier()   smp_mb() sys_membarrier()
              barrier()          X           X            O
              smp_mb()           X           O            O
              sys_membarrier()   O           O            O

RETURN VALUE
       On success, these system calls return zero.  On error, -1 is  returned,
       and errno is set appropriately. For a given command, with flags
       argument set to 0, this system call is guaranteed to always return the
       same value until reboot.

ERRORS
       ENOSYS System call is not implemented.

       EINVAL Invalid arguments.

Linux                             2015-04-15                     MEMBARRIER(2)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nicholas Miell <nmiell@comcast.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h          |  2 ++
 include/uapi/asm-generic/unistd.h |  4 ++-
 include/uapi/linux/Kbuild         |  1 +
 include/uapi/linux/membarrier.h   | 53 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/membarrier.h

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 08001317aee7..a460e2ef2843 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -885,4 +885,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
 
+asmlinkage long sys_membarrier(int cmd, int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index e016bd9b1a04..8da542a2874d 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_membarrier 282
+__SYSCALL(__NR_membarrier, sys_membarrier)
 
 #undef __NR_syscalls
-#define __NR_syscalls 282
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 70ff1d9abf0d..f7b2db44eb4b 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -252,6 +252,7 @@ header-y += mdio.h
 header-y += media.h
 header-y += media-bus-format.h
 header-y += mei.h
+header-y += membarrier.h
 header-y += memfd.h
 header-y += mempolicy.h
 header-y += meye.h
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
new file mode 100644
index 000000000000..e0b108bd2624
--- /dev/null
+++ b/include/uapi/linux/membarrier.h
@@ -0,0 +1,53 @@
+#ifndef _UAPI_LINUX_MEMBARRIER_H
+#define _UAPI_LINUX_MEMBARRIER_H
+
+/*
+ * linux/membarrier.h
+ *
+ * membarrier system call API
+ *
+ * Copyright (c) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * enum membarrier_cmd - membarrier system call command
+ * @MEMBARRIER_CMD_QUERY:   Query the set of supported commands. It returns
+ *                          a bitmask of valid commands.
+ * @MEMBARRIER_CMD_SHARED:  Execute a memory barrier on all running threads.
+ *                          Upon return from system call, the caller thread
+ *                          is ensured that all running threads have passed
+ *                          through a state where all memory accesses to
+ *                          user-space addresses match program order between
+ *                          entry to and return from the system call
+ *                          (non-running threads are de facto in such a
+ *                          state). This covers threads from all processes
+ *                          running on the system. This command returns 0.
+ *
+ * Command to be passed to the membarrier system call. The commands need to
+ * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
+ * the value 0.
+ */
+enum membarrier_cmd {
+	MEMBARRIER_CMD_QUERY = 0,
+	MEMBARRIER_CMD_SHARED = (1 << 0),
+};
+
+#endif /* _UAPI_LINUX_MEMBARRIER_H */
-- 
cgit v1.2.3


From 6798a8caaf64fa68b9ab2044e070fe4545034e03 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 11 Sep 2015 13:07:48 -0700
Subject: fs/seq_file: convert int seq_vprint/seq_printf/etc... returns to void

The seq_<foo> function return values were frequently misused.

See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to
     seq_has_overflowed() and make public")

All uses of these return values have been removed, so convert the
return types to void.

Miscellanea:

o Move seq_put_decimal_<type> and seq_escape prototypes closer the
  other seq_vprintf prototypes
o Reorder seq_putc and seq_puts to return early on overflow
o Add argument names to seq_vprintf and seq_printf
o Update the seq_escape kernel-doc
o Convert a couple of leading spaces to tabs in seq_escape

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seq_file.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index adeadbd6d7bf..dde00defbaa5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -114,13 +114,18 @@ int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
 loff_t seq_lseek(struct file *, loff_t, int);
 int seq_release(struct inode *, struct file *);
-int seq_escape(struct seq_file *, const char *, const char *);
-int seq_putc(struct seq_file *m, char c);
-int seq_puts(struct seq_file *m, const char *s);
 int seq_write(struct seq_file *seq, const void *data, size_t len);
 
-__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...);
-__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args);
+__printf(2, 0)
+void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
+__printf(2, 3)
+void seq_printf(struct seq_file *m, const char *fmt, ...);
+void seq_putc(struct seq_file *m, char c);
+void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+			 unsigned long long num);
+void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num);
+void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
 		  int rowsize, int groupsize, const void *buf, size_t len,
@@ -138,10 +143,6 @@ int single_release(struct inode *, struct file *);
 void *__seq_open_private(struct file *, const struct seq_operations *, int);
 int seq_open_private(struct file *, const struct seq_operations *, int);
 int seq_release_private(struct inode *, struct file *);
-int seq_put_decimal_ull(struct seq_file *m, char delimiter,
-			unsigned long long num);
-int seq_put_decimal_ll(struct seq_file *m, char delimiter,
-			long long num);
 
 static inline struct user_namespace *seq_user_ns(struct seq_file *seq)
 {
-- 
cgit v1.2.3


From 10fbd36e362a0f367e34a7cd876a81295d8fc5ca Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 May 2015 15:32:15 -0700
Subject: blk: rq_data_dir() should not return a boolean
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rq_data_dir() returns either READ or WRITE (0 == READ, 1 == WRITE), not
a boolean value.

Now, admittedly the "!= 0" doesn't really change the value (0 stays as
zero, 1 stays as one), but it's not only redundant, it confuses gcc, and
causes gcc to warn about the construct

    switch (rq_data_dir(req)) {
        case READ:
            ...
        case WRITE:
            ...

that we have in a few drivers.

Now, the gcc warning is silly and stupid (it seems to warn not about the
switch value having a different type from the case statements, but about
_any_ boolean switch value), but in this case the code itself is silly
and stupid too, so let's just change it, and get rid of warnings like
this:

  drivers/block/hd.c: In function ‘hd_request’:
  drivers/block/hd.c:630:11: warning: switch condition has boolean value [-Wswitch-bool]
     switch (rq_data_dir(req)) {

The odd '!= 0' came in when "cmd_flags" got turned into a "u64" in
commit 5953316dbf90 ("block: make rq->cmd_flags be 64-bit") and is
presumably because the old code (that just did a logical 'and' with 1)
would then end up making the type of rq_data_dir() be u64 too.

But if we want to retain the old regular integer type, let's just cast
the result to 'int' rather than use that rather odd '!= 0'.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 708923b9b623..38a5ff772a37 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -584,7 +584,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		(((rq)->cmd_flags & 1) != 0)
+#define rq_data_dir(rq)		((int)((rq)->cmd_flags & 1))
 
 /*
  * Driver can handle struct request, if it either has an old style
-- 
cgit v1.2.3


From cd33dc9ac2977ebe30cecbf39d2992190fbac5b4 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 8 Sep 2015 14:51:12 +0100
Subject: thermal: Fix thermal_zone_of_sensor_register to match documentation

thermal_zone_of_sensor_register is documented as returning a pointer
to either a valid thermal_zone_device on success, or a corresponding
ERR_PTR() value.

In contrast, the function returns NULL when THERMAL_OF is configured
off. Fix this.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Cc: Eduardo Valentin <edubezval@gmail.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 17292fee8686..0c5518e13584 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -360,7 +360,7 @@ static inline struct thermal_zone_device *
 thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
 				const struct thermal_zone_of_device_ops *ops)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline
-- 
cgit v1.2.3


From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 11 Sep 2015 09:34:26 +0530
Subject: clockevents: Remove unused set_mode() callback

All users are migrated to the per-state callbacks, get rid of the
unused interface and the core support code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linaro-kernel@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/clockchips.h | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 31ce435981fe..bdcf358dfce2 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -18,15 +18,6 @@
 struct clock_event_device;
 struct module;
 
-/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
-enum clock_event_mode {
-	CLOCK_EVT_MODE_UNUSED,
-	CLOCK_EVT_MODE_SHUTDOWN,
-	CLOCK_EVT_MODE_PERIODIC,
-	CLOCK_EVT_MODE_ONESHOT,
-	CLOCK_EVT_MODE_RESUME,
-};
-
 /*
  * Possible states of a clock event device.
  *
@@ -86,16 +77,14 @@ enum clock_event_state {
  * @min_delta_ns:	minimum delta value in ns
  * @mult:		nanosecond to cycles multiplier
  * @shift:		nanoseconds to cycles divisor (power of two)
- * @mode:		operating mode, relevant only to ->set_mode(), OBSOLETE
  * @state_use_accessors:current state of the device, assigned by the core code
  * @features:		features
  * @retries:		number of forced programming retries
- * @set_mode:		legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
- * @set_state_periodic:	switch state to periodic, if !set_mode
- * @set_state_oneshot:	switch state to oneshot, if !set_mode
- * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
- * @set_state_shutdown:	switch state to shutdown, if !set_mode
- * @tick_resume:	resume clkevt device, if !set_mode
+ * @set_state_periodic:	switch state to periodic
+ * @set_state_oneshot:	switch state to oneshot
+ * @set_state_oneshot_stopped: switch state to oneshot_stopped
+ * @set_state_shutdown:	switch state to shutdown
+ * @tick_resume:	resume clkevt device
  * @broadcast:		function to broadcast events
  * @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
  * @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
@@ -116,18 +105,10 @@ struct clock_event_device {
 	u64			min_delta_ns;
 	u32			mult;
 	u32			shift;
-	enum clock_event_mode	mode;
 	enum clock_event_state	state_use_accessors;
 	unsigned int		features;
 	unsigned long		retries;
 
-	/*
-	 * State transition callback(s): Only one of the two groups should be
-	 * defined:
-	 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
-	 * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
-	 */
-	void			(*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
 	int			(*set_state_periodic)(struct clock_event_device *);
 	int			(*set_state_oneshot)(struct clock_event_device *);
 	int			(*set_state_oneshot_stopped)(struct clock_event_device *);
-- 
cgit v1.2.3


From c973c3bcec3752455c4d7545edd42935cd7942d9 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Mon, 14 Sep 2015 14:23:50 +0100
Subject: thermal: Add a function to get the minimum power

The thermal core already has a function to get the maximum power of a
cooling device: power_actor_get_max_power().  Add a function to get the
minimum power of a cooling device.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0c5518e13584..157d366e761b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -380,6 +380,8 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
 
 int power_actor_get_max_power(struct thermal_cooling_device *,
 			      struct thermal_zone_device *tz, u32 *max_power);
+int power_actor_get_min_power(struct thermal_cooling_device *,
+			      struct thermal_zone_device *tz, u32 *min_power);
 int power_actor_set_power(struct thermal_cooling_device *,
 			  struct thermal_instance *, u32);
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
@@ -415,6 +417,10 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
 static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev,
 			      struct thermal_zone_device *tz, u32 *max_power)
 { return 0; }
+static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev,
+					    struct thermal_zone_device *tz,
+					    u32 *min_power)
+{ return -ENODEV; }
 static inline int power_actor_set_power(struct thermal_cooling_device *cdev,
 			  struct thermal_instance *tz, u32 power)
 { return 0; }
-- 
cgit v1.2.3


From 63cdbc06b357dcb3a7104a421ee4a4550d7fadfd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 14 Sep 2015 17:06:27 +0200
Subject: netfilter: bridge: fix routing of bridge frames with call-iptables=1

We can't re-use the physoutdev storage area.

1.  When using NFQUEUE in PREROUTING, we attempt to bump a bogus
refcnt since nf_bridge->physoutdev is garbage (ipv4/ipv6 address)

2. for same reason, we crash in physdev match in FORWARD or later if
skb is routed instead of bridged.

This increases nf_bridge_info to 40 bytes, but we have no other choice.

Fixes: 72b1e5e4cac7 ("netfilter: bridge: reduce nf_bridge_info to 32 bytes again")
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2738d355cdf9..9987af080fa0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -179,6 +179,9 @@ struct nf_bridge_info {
 	u8			bridged_dnat:1;
 	__u16			frag_max_size;
 	struct net_device	*physindev;
+
+	/* always valid & non-NULL from FORWARD on, for physdev match */
+	struct net_device	*physoutdev;
 	union {
 		/* prerouting: detect dnat in orig/reply direction */
 		__be32          ipv4_daddr;
@@ -189,9 +192,6 @@ struct nf_bridge_info {
 		 * skb is out in neigh layer.
 		 */
 		char neigh_header[8];
-
-		/* always valid & non-NULL from FORWARD on, for physdev match */
-		struct net_device *physoutdev;
 	};
 };
 #endif
-- 
cgit v1.2.3


From bcb2b0b2bae2de744223c68521cd51c57feb486c Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Mon, 14 Sep 2015 16:01:55 +0100
Subject: ACPI: Eliminate CONFIG_.*{, _MODULE} #ifdef in favor of IS_ENABLED()

This commit removes all CONFIG_.*{,_MODULE} in ACPI code, replacing it
with IS_ENABLED().

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/button.h | 4 ++--
 include/acpi/video.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/acpi/button.h b/include/acpi/button.h
index 97eea0e4c016..1cad8b2d460c 100644
--- a/include/acpi/button.h
+++ b/include/acpi/button.h
@@ -3,7 +3,7 @@
 
 #include <linux/notifier.h>
 
-#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
+#if IS_ENABLED(CONFIG_ACPI_BUTTON)
 extern int acpi_lid_notifier_register(struct notifier_block *nb);
 extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
 extern int acpi_lid_open(void);
@@ -20,6 +20,6 @@ static inline int acpi_lid_open(void)
 {
 	return 1;
 }
-#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
+#endif /* IS_ENABLED(CONFIG_ACPI_BUTTON) */
 
 #endif /* ACPI_BUTTON_H */
diff --git a/include/acpi/video.h b/include/acpi/video.h
index e840b294c6f5..c62392d9b52a 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -24,7 +24,7 @@ enum acpi_backlight_type {
 	acpi_backlight_native,
 };
 
-#if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
+#if IS_ENABLED(CONFIG_ACPI_VIDEO)
 extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
-- 
cgit v1.2.3


From 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 14 Sep 2015 17:11:05 -0600
Subject: locking/static_keys: Fix up the static keys documentation

Fix a few small mistakes in the static key documentation and
delete an unneeded sentence.

Suggested-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150914171105.511e1e21@lwn.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0684bd3a48fc..f1094238ab2a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -21,8 +21,8 @@
  *
  * DEFINE_STATIC_KEY_TRUE(key);
  * DEFINE_STATIC_KEY_FALSE(key);
- * static_key_likely()
- * static_key_unlikely()
+ * static_branch_likely()
+ * static_branch_unlikely()
  *
  * Jump labels provide an interface to generate dynamic branches using
  * self-modifying code. Assuming toolchain and architecture support, if we
@@ -45,12 +45,10 @@
  * statement, setting the key to true requires us to patch in a jump
  * to the out-of-line of true branch.
  *
- * In addtion to static_branch_{enable,disable}, we can also reference count
+ * In addition to static_branch_{enable,disable}, we can also reference count
  * the key or branch direction via static_branch_{inc,dec}. Thus,
  * static_branch_inc() can be thought of as a 'make more true' and
- * static_branch_dec() as a 'make more false'. The inc()/dec()
- * interface is meant to be used exclusively from the inc()/dec() for a given
- * key.
+ * static_branch_dec() as a 'make more false'.
  *
  * Since this relies on modifying code, the branch modifying functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
-- 
cgit v1.2.3


From 3829c664b1eec243f2a355829efa40f0f414de8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 15 Sep 2015 13:47:24 +0200
Subject: genirq: Remove stale comment

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 6f8b34066442..72a6b2feb7bf 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -151,10 +151,6 @@ struct irq_common_data {
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
  * @affinity:		IRQ affinity on SMP
- *
- * The fields here need to overlay the ones in irq_desc until we
- * cleaned up the direct references and switched everything over to
- * irq_data.
  */
 struct irq_data {
 	u32			mask;
-- 
cgit v1.2.3


From 6584d84c3e504c76ad291cc2e381bbeed59798ab Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie.huang@arm.com>
Date: Tue, 1 Sep 2015 10:35:50 +0800
Subject: genirq: Update the comment for generic_handle_irq_desc

__do_IRQ() was removed by commit 1c77ff2 "genirq: Remove __do_IRQ",
but the comment referring to __do_IRQ() was left.

Update the comment for generic_handle_irq_desc().

Signed-off-by: Huang Shijie <shijie.huang@arm.com>
Cc: jiang.liu@linux.intel.com
Cc: peterz@infradead.org
Cc: rafael.j.wysocki@intel.com
Cc: jason@lakedaemon.net
Cc: marc.zyngier@arm.com
Link: http://lkml.kernel.org/r/1441074950-3893-1-git-send-email-shijie.huang@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 5acfa26602e1..0593c691d091 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -137,9 +137,7 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
 
 /*
  * Architectures call this to let the generic IRQ layer
- * handle an interrupt. If the descriptor is attached to an
- * irqchip-style controller then we call the ->handle_irq() handler,
- * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
+ * handle an interrupt.
  */
 static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-- 
cgit v1.2.3


From f230d1e891ba1da5953460516960894154f265db Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 15 Sep 2015 14:30:06 -0700
Subject: ipv6: Rename the dst_cache helper functions in ip6_tunnel

It is a prep work to fix the dst_entry refcnt bugs in
ip6_tunnel.

This patch rename:
1. ip6_tnl_dst_check() to ip6_tnl_dst_get() to better
   reflect that it will take a dst refcnt in the next patch.
2. ip6_tnl_dst_store() to ip6_tnl_dst_set() to have a more
   conventional name matching with ip6_tnl_dst_get().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index b8529aa1dae7..979b081a47e8 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -60,9 +60,9 @@ struct ipv6_tlv_tnl_enc_lim {
 	__u8 encap_limit;	/* tunnel encapsulation limit   */
 } __packed;
 
-struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t);
+struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
 void ip6_tnl_dst_reset(struct ip6_tnl *t);
-void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst);
+void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
 int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
 		const struct in6_addr *raddr);
 int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
-- 
cgit v1.2.3


From cdf3464e6c6bd764277cbbe992cd12da735b92fb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 15 Sep 2015 14:30:07 -0700
Subject: ipv6: Fix dst_entry refcnt bugs in ip6_tunnel

Problems in the current dst_entry cache in the ip6_tunnel:

1. ip6_tnl_dst_set is racy.  There is no lock to protect it:
   - One major problem is that the dst refcnt gets messed up. F.e.
     the same dst_cache can be released multiple times and then
     triggering the infamous dst refcnt < 0 warning message.
   - Another issue is the inconsistency between dst_cache and
     dst_cookie.

   It can be reproduced by adding and removing the ip6gre tunnel
   while running a super_netperf TCP_CRR test.

2. ip6_tnl_dst_get does not take the dst refcnt before returning
   the dst.

This patch:
1. Create a percpu dst_entry cache in ip6_tnl
2. Use a spinlock to protect the dst_cache operations
3. ip6_tnl_dst_get always takes the dst refcnt before returning

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 979b081a47e8..60b4f402f78c 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -32,6 +32,12 @@ struct __ip6_tnl_parm {
 	__be32			o_key;
 };
 
+struct ip6_tnl_dst {
+	spinlock_t lock;
+	struct dst_entry *dst;
+	u32 cookie;
+};
+
 /* IPv6 tunnel */
 struct ip6_tnl {
 	struct ip6_tnl __rcu *next;	/* next tunnel in list */
@@ -39,8 +45,7 @@ struct ip6_tnl {
 	struct net *net;	/* netns for packet i/o */
 	struct __ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
-	struct dst_entry *dst_cache;    /* cached dst */
-	u32 dst_cookie;
+	struct ip6_tnl_dst __percpu *dst_cache;	/* cached dst */
 
 	int err_count;
 	unsigned long err_time;
@@ -61,6 +66,8 @@ struct ipv6_tlv_tnl_enc_lim {
 } __packed;
 
 struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
+int ip6_tnl_dst_init(struct ip6_tnl *t);
+void ip6_tnl_dst_destroy(struct ip6_tnl *t);
 void ip6_tnl_dst_reset(struct ip6_tnl *t);
 void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
 int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
-- 
cgit v1.2.3


From 70da5b5c532f0ec8aa76b4f46158da5f010f34b3 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 15 Sep 2015 14:30:09 -0700
Subject: ipv6: Replace spinlock with seqlock and rcu in ip6_tunnel

This patch uses a seqlock to ensure consistency between idst->dst and
idst->cookie.  It also makes dst freeing from fib tree to undergo a
rcu grace period.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 60b4f402f78c..65c2a9397b3c 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -33,8 +33,8 @@ struct __ip6_tnl_parm {
 };
 
 struct ip6_tnl_dst {
-	spinlock_t lock;
-	struct dst_entry *dst;
+	seqlock_t lock;
+	struct dst_entry __rcu *dst;
 	u32 cookie;
 };
 
-- 
cgit v1.2.3


From 1f0bd44e937468446d080b98b5669844744c24a1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Sep 2015 02:17:49 +0200
Subject: cpufreq: acpi-cpufreq: Use cpufreq_cpu_get_raw() in ->get()

cpufreq_cpu_get() called by get_cur_freq_on_cpu() is overkill,
because the ->get() callback is always invoked in a context in
which all of the conditions checked by cpufreq_cpu_get() are
guaranteed to be satisfied.

Use cpufreq_cpu_get_raw() instead of it and drop the
corresponding cpufreq_cpu_put() from get_cur_freq_on_cpu().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/cpufreq.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 430efcbea48e..dca22de98d94 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -127,9 +127,14 @@ struct cpufreq_policy {
 #define CPUFREQ_SHARED_TYPE_ANY	 (3) /* Freq can be set from any dependent CPU*/
 
 #ifdef CONFIG_CPU_FREQ
+struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu);
 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
 void cpufreq_cpu_put(struct cpufreq_policy *policy);
 #else
+static inline struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
+{
+	return NULL;
+}
 static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 {
 	return NULL;
-- 
cgit v1.2.3


From e902e14549e04c040fb6e15785efd35f810a223a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Sep 2015 12:36:04 +0200
Subject: genirq: Remove __irq_set_chip_handler_name_locked()

All users converted to irq_set_chip_handler_name_locked()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 0593c691d091..29741382593c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -184,19 +184,6 @@ static inline void __irq_set_handler_locked(unsigned int irq,
 	desc->handle_irq = handler;
 }
 
-/* caller has locked the irq_desc and both params are valid */
-static inline void
-__irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip,
-				   irq_flow_handler_t handler, const char *name)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	irq_desc_get_irq_data(desc)->chip = chip;
-	desc->handle_irq = handler;
-	desc->name = name;
-}
-
 /**
  * irq_set_handler_locked - Set irq handler from a locked region
  * @data:	Pointer to the irq_data structure which identifies the irq
-- 
cgit v1.2.3


From 123236ccacc933daac3b39c5eb1f0011c70d41d8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Sep 2015 12:54:23 +0200
Subject: genirq: Remove __irq_set_handler_locked()

All users converted to irq_set_handler_locked()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 29741382593c..dce395cd67de 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -174,16 +174,6 @@ static inline int irq_has_action(unsigned int irq)
 	return irq_desc_has_action(irq_to_desc(irq));
 }
 
-/* caller has locked the irq_desc and both params are valid */
-static inline void __irq_set_handler_locked(unsigned int irq,
-					    irq_flow_handler_t handler)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->handle_irq = handler;
-}
-
 /**
  * irq_set_handler_locked - Set irq handler from a locked region
  * @data:	Pointer to the irq_data structure which identifies the irq
-- 
cgit v1.2.3


From 755d119a6204974b2005a98549a48a75a7f5010b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Sep 2015 14:37:12 +0200
Subject: genirq: Simplify irq_data_to_desc()

Avoid the lookup of irq_desc and use the same mechanism for
hierarchical and flat irqdomains.

Based-on-a-patch-from: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdesc.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dce395cd67de..1fc5304641a1 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -98,11 +98,7 @@ extern struct irq_desc irq_desc[NR_IRQS];
 
 static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
 {
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-	return irq_to_desc(data->irq);
-#else
-	return container_of(data, struct irq_desc, irq_data);
-#endif
+	return container_of(data->common, struct irq_desc, irq_common_data);
 }
 
 static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
-- 
cgit v1.2.3


From fc5697126aa074c289df5e8baae28e115963023f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 15 Sep 2015 12:33:42 +0200
Subject: genirq: Provide IRQD_FORWARDED_TO_VCPU status flag

Provide a irq data flag to mark an irq forwarded to a VCPU along with
the accessor functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 72a6b2feb7bf..e54ae8295460 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -186,6 +186,7 @@ struct irq_data {
  * IRQD_IRQ_MASKED		- Masked state of the interrupt
  * IRQD_IRQ_INPROGRESS		- In progress state of the interrupt
  * IRQD_WAKEUP_ARMED		- Wakeup mode armed
+ * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -200,6 +201,7 @@ enum {
 	IRQD_IRQ_MASKED			= (1 << 17),
 	IRQD_IRQ_INPROGRESS		= (1 << 18),
 	IRQD_WAKEUP_ARMED		= (1 << 19),
+	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 };
 
 #define __irqd_to_state(d)		((d)->common->state_use_accessors)
@@ -278,6 +280,20 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_WAKEUP_ARMED;
 }
 
+static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU;
+}
+
+static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU;
+}
+
+static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
+}
 
 /*
  * Functions for chained handlers which can be enabled/disabled by the
-- 
cgit v1.2.3


From 449e9cae58b06be1293858ec8e5d8cb728238baa Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:16 +0800
Subject: genirq: Move field 'node' from irq_data into irq_common_data

NUMA node information is per-irq instead of per-irqchip, so move it into
struct irq_common_data. Also use CONFIG_NUMA to guard irq_common_data.node.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433145945-789-8-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e54ae8295460..ebcc5c6745eb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -129,9 +129,13 @@ struct irq_domain;
  * struct irq_common_data - per irq data shared by all irqchips
  * @state_use_accessors: status information for irq chip functions.
  *			Use accessor functions to deal with it
+ * @node:		node index useful for balancing
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
+#ifdef CONFIG_NUMA
+	unsigned int		node;
+#endif
 };
 
 /**
@@ -139,7 +143,6 @@ struct irq_common_data {
  * @mask:		precomputed bitmask for accessing the chip registers
  * @irq:		interrupt number
  * @hwirq:		hardware interrupt number, local to the interrupt domain
- * @node:		node index useful for balancing
  * @common:		point to data shared by all irqchips
  * @chip:		low level interrupt hardware access
  * @domain:		Interrupt translation domain; responsible for mapping
@@ -156,7 +159,6 @@ struct irq_data {
 	u32			mask;
 	unsigned int		irq;
 	unsigned long		hwirq;
-	unsigned int		node;
 	struct irq_common_data	*common;
 	struct irq_chip		*chip;
 	struct irq_domain	*domain;
@@ -664,9 +666,18 @@ static inline u32 irq_get_trigger_type(unsigned int irq)
 	return d ? irqd_get_trigger_type(d) : 0;
 }
 
-static inline int irq_data_get_node(struct irq_data *d)
+static inline int irq_common_data_get_node(struct irq_common_data *d)
 {
+#ifdef CONFIG_NUMA
 	return d->node;
+#else
+	return 0;
+#endif
+}
+
+static inline int irq_data_get_node(struct irq_data *d)
+{
+	return irq_common_data_get_node(d->common);
 }
 
 static inline struct cpumask *irq_get_affinity_mask(int irq)
-- 
cgit v1.2.3


From af7080e040d223b5e7d0a8de28f7cea24ef017c4 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:21 +0800
Subject: genirq: Move field 'handler_data' from irq_data into irq_common_data

Handler data (handler_data) is per-irq instead of per irqchip, so move
it into struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-13-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 8 ++++----
 include/linux/irqdesc.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ebcc5c6745eb..516aadbfc072 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -130,12 +130,14 @@ struct irq_domain;
  * @state_use_accessors: status information for irq chip functions.
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
+ * @handler_data:	per-IRQ data for the irq_chip methods
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
 #ifdef CONFIG_NUMA
 	unsigned int		node;
 #endif
+	void			*handler_data;
 };
 
 /**
@@ -149,7 +151,6 @@ struct irq_common_data {
  *			between hwirq number and linux irq number.
  * @parent_data:	pointer to parent struct irq_data to support hierarchy
  *			irq_domain
- * @handler_data:	per-IRQ data for the irq_chip methods
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
@@ -165,7 +166,6 @@ struct irq_data {
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_data		*parent_data;
 #endif
-	void			*handler_data;
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
@@ -641,12 +641,12 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
 static inline void *irq_get_handler_data(unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
-	return d ? d->handler_data : NULL;
+	return d ? d->common->handler_data : NULL;
 }
 
 static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
 {
-	return d->handler_data;
+	return d->common->handler_data;
 }
 
 static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1fc5304641a1..c7b3e1cc6d59 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -123,7 +123,7 @@ static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
 
 static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
 {
-	return desc->irq_data.handler_data;
+	return desc->irq_common_data.handler_data;
 }
 
 static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
-- 
cgit v1.2.3


From 9df872faa7e1619e9278bec00ceaed2236533530 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 3 Jun 2015 11:47:50 +0800
Subject: genirq: Move field 'affinity' from irq_data into irq_common_data

Irq affinity mask is per-irq instead of per irqchip, so move it into
struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: http://lkml.kernel.org/r/1433303281-27688-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 516aadbfc072..75d50544a18f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -110,8 +110,8 @@ enum {
 /*
  * Return value for chip->irq_set_affinity()
  *
- * IRQ_SET_MASK_OK	- OK, core updates irq_data.affinity
- * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_data.affinity
+ * IRQ_SET_MASK_OK	- OK, core updates irq_common_data.affinity
+ * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_common_data.affinity
  * IRQ_SET_MASK_OK_DONE	- Same as IRQ_SET_MASK_OK for core. Special code to
  *			  support stacked irqchips, which indicates skipping
  *			  all descendent irqchips.
@@ -131,6 +131,7 @@ struct irq_domain;
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
+ * @affinity:		IRQ affinity on SMP
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
@@ -138,6 +139,7 @@ struct irq_common_data {
 	unsigned int		node;
 #endif
 	void			*handler_data;
+	cpumask_var_t		affinity;
 };
 
 /**
@@ -154,7 +156,6 @@ struct irq_common_data {
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
  * @msi_desc:		MSI descriptor
- * @affinity:		IRQ affinity on SMP
  */
 struct irq_data {
 	u32			mask;
@@ -168,7 +169,6 @@ struct irq_data {
 #endif
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
-	cpumask_var_t		affinity;
 };
 
 /*
@@ -684,12 +684,12 @@ static inline struct cpumask *irq_get_affinity_mask(int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
 
-	return d ? d->affinity : NULL;
+	return d ? d->common->affinity : NULL;
 }
 
 static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 {
-	return d->affinity;
+	return d->common->affinity;
 }
 
 unsigned int arch_dynirq_lower_bound(unsigned int from);
-- 
cgit v1.2.3


From b237721c5d95082a803c0be686f56d2dd1de995b Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 1 Jun 2015 16:05:43 +0800
Subject: genirq: Move field 'msi_desc' from irq_data into irq_common_data

MSI descriptors are per-irq instead of per irqchip, so move it into
struct irq_common_data.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1433145945-789-35-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 8 ++++----
 include/linux/irqdesc.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 75d50544a18f..4913c32db942 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -132,6 +132,7 @@ struct irq_domain;
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
  * @affinity:		IRQ affinity on SMP
+ * @msi_desc:		MSI descriptor
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
@@ -139,6 +140,7 @@ struct irq_common_data {
 	unsigned int		node;
 #endif
 	void			*handler_data;
+	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
 };
 
@@ -155,7 +157,6 @@ struct irq_common_data {
  *			irq_domain
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
- * @msi_desc:		MSI descriptor
  */
 struct irq_data {
 	u32			mask;
@@ -168,7 +169,6 @@ struct irq_data {
 	struct irq_data		*parent_data;
 #endif
 	void			*chip_data;
-	struct msi_desc		*msi_desc;
 };
 
 /*
@@ -652,12 +652,12 @@ static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
 static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
-	return d ? d->msi_desc : NULL;
+	return d ? d->common->msi_desc : NULL;
 }
 
 static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d)
 {
-	return d->msi_desc;
+	return d->common->msi_desc;
 }
 
 static inline u32 irq_get_trigger_type(unsigned int irq)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c7b3e1cc6d59..fbb4d5afc32b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -128,7 +128,7 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
 
 static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
 {
-	return desc->irq_data.msi_desc;
+	return desc->irq_common_data.msi_desc;
 }
 
 /*
-- 
cgit v1.2.3


From bd0b9ac405e1794d72533c3d487aa65b6b955a0c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Sep 2015 10:42:37 +0200
Subject: genirq: Remove irq argument from irq flow handlers

Most interrupt flow handlers do not use the irq argument. Those few
which use it can retrieve the irq number from the irq descriptor.

Remove the argument.

Search and replace was done with coccinelle and some extra helper
scripts around it. Thanks to Julia for her help!

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
---
 include/linux/irq.h        | 16 ++++++++--------
 include/linux/irqdesc.h    |  4 ++--
 include/linux/irqhandler.h |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4913c32db942..11bf09288ddb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -475,14 +475,14 @@ static inline int irq_set_parent(int irq, int parent_irq)
  * Built-in IRQ handlers for various IRQ types,
  * callable via desc->handle_irq()
  */
-extern void handle_level_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
-extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_level_irq(struct irq_desc *desc);
+extern void handle_fasteoi_irq(struct irq_desc *desc);
+extern void handle_edge_irq(struct irq_desc *desc);
+extern void handle_edge_eoi_irq(struct irq_desc *desc);
+extern void handle_simple_irq(struct irq_desc *desc);
+extern void handle_percpu_irq(struct irq_desc *desc);
+extern void handle_percpu_devid_irq(struct irq_desc *desc);
+extern void handle_bad_irq(struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
 extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index fbb4d5afc32b..a587a33363c7 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -135,9 +135,9 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
  * Architectures call this to let the generic IRQ layer
  * handle an interrupt.
  */
-static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
+static inline void generic_handle_irq_desc(struct irq_desc *desc)
 {
-	desc->handle_irq(irq, desc);
+	desc->handle_irq(desc);
 }
 
 int generic_handle_irq(unsigned int irq);
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index 62d543004197..661bed0ed1f3 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -8,7 +8,7 @@
 
 struct irq_desc;
 struct irq_data;
-typedef	void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc);
+typedef	void (*irq_flow_handler_t)(struct irq_desc *desc);
 typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
 
 #endif
-- 
cgit v1.2.3


From 0c986253b939cc14c69d4adbe2b4121bdf4aa220 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Sep 2015 11:51:12 -0400
Subject: Revert "sched, cgroup: replace signal_struct->group_rwsem with a
 global percpu_rwsem"

This reverts commit d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14.

d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify
threadgroup locking") changed how cgroup synchronizes against task
fork and exits so that it uses global percpu_rwsem instead of
per-process rwsem; unfortunately, the write [un]lock paths of
percpu_rwsem always involve synchronize_rcu_expedited() which turned
out to be too expensive.

Improvements for percpu_rwsem are scheduled to be merged in the coming
v4.4-rc1 merge window which alleviates this issue.  For now, revert
the two commits to restore per-process rwsem.  They will be re-applied
for the v4.4-rc1 merge window.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org # v4.2+
---
 include/linux/cgroup-defs.h | 27 ++-------------------------
 include/linux/init_task.h   |  8 ++++++++
 include/linux/sched.h       | 12 ++++++++++++
 3 files changed, 22 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4d8fcf2187dc..8492721b39be 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -473,31 +473,8 @@ struct cgroup_subsys {
 	unsigned int depends_on;
 };
 
-extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
-
-/**
- * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
- * @tsk: target task
- *
- * Called from threadgroup_change_begin() and allows cgroup operations to
- * synchronize against threadgroup changes using a percpu_rw_semaphore.
- */
-static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
-{
-	percpu_down_read(&cgroup_threadgroup_rwsem);
-}
-
-/**
- * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
- * @tsk: target task
- *
- * Called from threadgroup_change_end().  Counterpart of
- * cgroup_threadcgroup_change_begin().
- */
-static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
-{
-	percpu_up_read(&cgroup_threadgroup_rwsem);
-}
+void cgroup_threadgroup_change_begin(struct task_struct *tsk);
+void cgroup_threadgroup_change_end(struct task_struct *tsk);
 
 #else	/* CONFIG_CGROUPS */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d0b380ee7d67..e38681f4912d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,6 +25,13 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
+#ifdef CONFIG_CGROUPS
+#define INIT_GROUP_RWSEM(sig)						\
+	.group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
+#else
+#define INIT_GROUP_RWSEM(sig)
+#endif
+
 #ifdef CONFIG_CPUSETS
 #define INIT_CPUSET_SEQ(tsk)							\
 	.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
@@ -57,6 +64,7 @@ extern struct fs_struct init_fs;
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
+	INIT_GROUP_RWSEM(sig)						\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a4ab9daa387c..b7b9501b41af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -762,6 +762,18 @@ struct signal_struct {
 	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+#ifdef CONFIG_CGROUPS
+	/*
+	 * group_rwsem prevents new tasks from entering the threadgroup and
+	 * member tasks from exiting,a more specifically, setting of
+	 * PF_EXITING.  fork and exit paths are protected with this rwsem
+	 * using threadgroup_change_begin/end().  Users which require
+	 * threadgroup to remain stable should use threadgroup_[un]lock()
+	 * which also takes care of exec path.  Currently, cgroup is the
+	 * only user.
+	 */
+	struct rw_semaphore group_rwsem;
+#endif
 
 	oom_flags_t oom_flags;
 	short oom_score_adj;		/* OOM kill score adjustment */
-- 
cgit v1.2.3


From 0243ed44ad4a25dbd2e92ad97e5e12a1a6c72d6c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Tue, 15 Sep 2015 04:59:21 -0700
Subject: spi: fix kernel-doc warnings in spi.h

Fix the following 'make htmldocs' warnings:

  .//include/linux/spi/spi.h:71: warning: No description found for parameter 'lock'
  .//include/linux/spi/spi.h:71: warning: Excess struct/union/enum/typedef member 'clock' description in 'spi_statistics'

Signed-off-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 269e8afd3e2a..6b00f18f5e6b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -34,7 +34,7 @@ extern struct bus_type spi_bus_type;
 
 /**
  * struct spi_statistics - statistics for spi transfers
- * @clock:         lock protecting this structure
+ * @lock:          lock protecting this structure
  *
  * @messages:      number of spi-messages handled
  * @transfers:     number of spi_transfers handled
-- 
cgit v1.2.3


From ef748917b529847277f07c98c55e1c0ce416449f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 2 Sep 2015 14:31:21 +0800
Subject: arm/arm64: KVM: Remove 'config KVM_ARM_MAX_VCPUS'

This patch removes config option of KVM_ARM_MAX_VCPUS,
and like other ARCHs, just choose the maximum allowed
value from hardware, and follows the reasons:

1) from distribution view, the option has to be
defined as the max allowed value because it need to
meet all kinds of virtulization applications and
need to support most of SoCs;

2) using a bigger value doesn't introduce extra memory
consumption, and the help text in Kconfig isn't accurate
because kvm_vpu structure isn't allocated until request
of creating VCPU is sent from QEMU;

3) the main effect is that the field of vcpus[] in 'struct kvm'
becomes a bit bigger(sizeof(void *) per vcpu) and need more cache
lines to hold the structure, but 'struct kvm' is one generic struct,
and it has worked well on other ARCHs already in this way. Also,
the world switch frequecy is often low, for example, it is ~2000
when running kernel building load in VM from APM xgene KVM host,
so the effect is very small, and the difference can't be observed
in my test at all.

Cc: Dann Frazier <dann.frazier@canonical.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d901f1a47be6..4e14dac282bb 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -35,11 +35,7 @@
 #define VGIC_V3_MAX_LRS		16
 #define VGIC_MAX_IRQS		1024
 #define VGIC_V2_MAX_CPUS	8
-
-/* Sanity checks... */
-#if (KVM_MAX_VCPUS > 255)
-#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
-#endif
+#define VGIC_V3_MAX_CPUS	255
 
 #if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
-- 
cgit v1.2.3


From 7f61f545657281a3a1b0faf68993165ebdecc51b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 14 Sep 2015 16:01:05 +0300
Subject: libceph: don't access invalid memory in keepalive2 path

This

    struct ceph_timespec ceph_ts;
    ...
    con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts);

wraps ceph_ts into a kvec and adds it to con->out_kvec array, yet
ceph_ts becomes invalid on return from prepare_write_keepalive().  As
a result, we send out bogus keepalive2 stamps.  Fix this by encoding
into a ceph_timespec member, similar to how acks are read and written.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
---
 include/linux/ceph/messenger.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 7e1252e97a30..b2371d9b51fa 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -238,6 +238,8 @@ struct ceph_connection {
 	bool out_kvec_is_msg; /* kvec refers to out_msg */
 	int out_more;        /* there is more data after the kvecs */
 	__le64 out_temp_ack; /* for writing an ack */
+	struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
+						     stamp */
 
 	/* message in temps */
 	struct ceph_msg_header in_hdr;
@@ -248,7 +250,7 @@ struct ceph_connection {
 	int in_base_pos;     /* bytes read */
 	__le64 in_temp_ack;  /* for reading an ack */
 
-	struct timespec last_keepalive_ack;
+	struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
 
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
-- 
cgit v1.2.3


From 335c25858218e76ef47f92ecb9d22e919d36140d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 14 Sep 2015 12:44:22 +0300
Subject: libceph: advertise support for keepalive2

We are the client, but advertise keepalive2 anyway - for consistency,
if nothing else.  In the future the server might want to know whether
its clients support keepalive2.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
---
 include/linux/ceph/ceph_features.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 4763ad64e832..f89b31d45cc8 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_OSDMAP_ENC |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
+	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_CRUSH_V4)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
-- 
cgit v1.2.3


From 37a1d3611c126fd9782ce5235791f898f053e763 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sun, 13 Sep 2015 10:18:33 -0700
Subject: ipv6: include NLM_F_REPLACE in route replace notifications

This patch adds NLM_F_REPLACE flag to ipv6 route replace notifications.
This makes nlm_flags in ipv6 replace notifications consistent
with ipv4.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 063d30474cf6..aaf9700fc9e5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -275,7 +275,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     struct nl_info *info, struct mx6_config *mxc);
 int fib6_del(struct rt6_info *rt, struct nl_info *info);
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info);
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+		     unsigned int flags);
 
 void fib6_run_gc(unsigned long expires, struct net *net, bool force);
 
-- 
cgit v1.2.3


From 0fdea1e8a2853f79d39b8555cc9de16a7e0ab26f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 16 Sep 2015 23:43:17 -0400
Subject: SUNRPC: Ensure that we wait for connections to complete before
 retrying

Commit 718ba5b87343, moved the responsibility for unlocking the socket to
xs_tcp_setup_socket, meaning that the socket will be unlocked before we
know that it has finished trying to connect. The following patch is based on
an initial patch by Russell King to ensure that we delay clearing the
XPRT_CONNECTING flag until we either know that we failed to initiate
a connection attempt, or the connection attempt itself failed.

Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing")
Reported-by: Russell King <linux@arm.linux.org.uk>
Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprtsock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 7591788e9fbf..357e44c1a46b 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -42,6 +42,7 @@ struct sock_xprt {
 	/*
 	 * Connection of transports
 	 */
+	unsigned long		sock_state;
 	struct delayed_work	connect_worker;
 	struct sockaddr_storage	srcaddr;
 	unsigned short		srcport;
@@ -76,6 +77,8 @@ struct sock_xprt {
  */
 #define TCP_RPC_REPLY		(1UL << 6)
 
+#define XPRT_SOCK_CONNECTING	1U
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
-- 
cgit v1.2.3


From 58189ca7b27411c3dc9a5cb9eeee0906da684c59 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 15 Sep 2015 15:10:50 -0700
Subject: net: Fix vti use case with oif in dst lookups

Steffen reported that the recent change to add oif to dst lookups breaks
the VTI use case. The problem is that with the oif set in the flow struct
the comparison to the nh_oif is triggered. Fix by splitting the
FLOWI_FLAG_VRFSRC into 2 flags -- one that triggers the vrf device cache
bypass (FLOWI_FLAG_VRFSRC) and another telling the lookup to not compare
nh oif (FLOWI_FLAG_SKIP_NH_OIF).

Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups")

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h  | 1 +
 include/net/route.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index acd6a096250e..9b85db85f13c 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -35,6 +35,7 @@ struct flowi_common {
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_KNOWN_NH		0x02
 #define FLOWI_FLAG_VRFSRC		0x04
+#define FLOWI_FLAG_SKIP_NH_OIF		0x08
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
 };
diff --git a/include/net/route.h b/include/net/route.h
index cc61cb95f059..f46af256880c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -255,7 +255,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 		flow_flags |= FLOWI_FLAG_ANYSRC;
 
 	if (netif_index_is_vrf(sock_net(sk), oif))
-		flow_flags |= FLOWI_FLAG_VRFSRC;
+		flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
 			   protocol, flow_flags, dst, src, dport, sport);
-- 
cgit v1.2.3


From aadfc3b2042d69a6b4b8d719d4221b988d7f31a5 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Wed, 9 Sep 2015 01:28:21 -0400
Subject: IB/hfi1: fix pstateinfo from returning improperly byteswapped value

Byteswap link_width_downgrade_*_active values before sending on the wire.  In
addition properly define the Port State Info structure.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Christian Gomez <christian.gomez@intel.com>
Signed-off-by: Rimmer, Todd <todd.rimmer@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/opa_port_info.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index 391dae1931c0..a0fa975cd1c1 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -294,8 +294,8 @@ struct opa_port_states {
 
 struct opa_port_state_info {
 	struct opa_port_states port_states;
-	u16 link_width_downgrade_tx_active;
-	u16 link_width_downgrade_rx_active;
+	__be16 link_width_downgrade_tx_active;
+	__be16 link_width_downgrade_rx_active;
 };
 
 struct opa_port_info {
-- 
cgit v1.2.3


From ae4f976968896f8f41b3a7aa21be6146492211e5 Mon Sep 17 00:00:00 2001
From: Tyler Baker <tyler.baker@linaro.org>
Date: Sat, 19 Sep 2015 03:58:10 -0400
Subject: mm: fix type cast in __pfn_to_phys()

The various definitions of __pfn_to_phys() have been consolidated to
use a generic macro in include/asm-generic/memory_model.h. This hit
mainline in the form of 012dcef3f058 "mm: move __phys_to_pfn and
__pfn_to_phys to asm/generic/memory_model.h". When the generic macro
was implemented the type cast to phys_addr_t was dropped which caused
boot regressions on ARM platforms with more than 4GB of memory and
LPAE enabled.

It was suggested to use PFN_PHYS() defined in include/linux/pfn.h
as provides the correct logic and avoids further duplication.

Reported-by: kernelci.org bot <bot@kernelci.org>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tyler Baker <tyler.baker@linaro.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/asm-generic/memory_model.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index f20f407ce45d..4b4b056a6eb0 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -73,7 +73,7 @@
  * Convert a physical address to a Page Frame Number and back
  */
 #define	__phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
-#define	__pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+#define	__pfn_to_phys(pfn)	PFN_PHYS(pfn)
 
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
-- 
cgit v1.2.3


From b7f76ea2ef6739ee484a165ffbac98deb855d3d3 Mon Sep 17 00:00:00 2001
From: Jann Horn <jann@thejh.net>
Date: Fri, 18 Sep 2015 23:41:23 +0200
Subject: security: fix typo in security_task_prctl

Signed-off-by: Jann Horn <jann@thejh.net>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/security.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 79d85ddf8093..2f4c1f7aa7db 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -946,7 +946,7 @@ static inline int security_task_prctl(int option, unsigned long arg2,
 				      unsigned long arg4,
 				      unsigned long arg5)
 {
-	return cap_task_prctl(option, arg2, arg3, arg3, arg5);
+	return cap_task_prctl(option, arg2, arg3, arg4, arg5);
 }
 
 static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
-- 
cgit v1.2.3


From 0315e382704817b279e5693dca8ab9d89aa20b3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
Date: Thu, 17 Sep 2015 16:01:32 +0200
Subject: net: Fix behaviour of unreachable, blackhole and prohibit routes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Man page of ip-route(8) says following about route types:

  unreachable - these destinations are unreachable.  Packets are dis‐
  carded and the ICMP message host unreachable is generated.  The local
  senders get an EHOSTUNREACH error.

  blackhole - these destinations are unreachable.  Packets are dis‐
  carded silently.  The local senders get an EINVAL error.

  prohibit - these destinations are unreachable.  Packets are discarded
  and the ICMP message communication administratively prohibited is
  generated.  The local senders get an EACCES error.

In the inet6 address family, this was correct, except the local senders
got ENETUNREACH error instead of EHOSTUNREACH in case of unreachable route.
In the inet address family, all three route types generated ICMP message
net unreachable, and the local senders got ENETUNREACH error.

In both address families all three route types now behave consistently
with documentation.

Signed-off-by: Nikola Forró <nforro@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a37d0432bebd..727d6e9a9685 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -236,8 +236,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 	rcu_read_lock();
 
 	tb = fib_get_table(net, RT_TABLE_MAIN);
-	if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF))
-		err = 0;
+	if (tb)
+		err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF);
+
+	if (err == -EAGAIN)
+		err = -ENETUNREACH;
 
 	rcu_read_unlock();
 
@@ -258,7 +261,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 			     struct fib_result *res, unsigned int flags)
 {
 	struct fib_table *tb;
-	int err;
+	int err = -ENETUNREACH;
 
 	flags |= FIB_LOOKUP_NOREF;
 	if (net->ipv4.fib_has_custom_rules)
@@ -268,15 +271,20 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 
 	res->tclassid = 0;
 
-	for (err = 0; !err; err = -ENETUNREACH) {
-		tb = rcu_dereference_rtnl(net->ipv4.fib_main);
-		if (tb && !fib_table_lookup(tb, flp, res, flags))
-			break;
+	tb = rcu_dereference_rtnl(net->ipv4.fib_main);
+	if (tb)
+		err = fib_table_lookup(tb, flp, res, flags);
+
+	if (!err)
+		goto out;
+
+	tb = rcu_dereference_rtnl(net->ipv4.fib_default);
+	if (tb)
+		err = fib_table_lookup(tb, flp, res, flags);
 
-		tb = rcu_dereference_rtnl(net->ipv4.fib_default);
-		if (tb && !fib_table_lookup(tb, flp, res, flags))
-			break;
-	}
+out:
+	if (err == -EAGAIN)
+		err = -ENETUNREACH;
 
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 83cf9a2521b0934a5f9d04082c9bb4f554fddcd4 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 18 Sep 2015 11:47:41 +0200
Subject: ip6tunnel: make rx/tx bytes counters consistent

Like the previous patch, which fixes ipv4 tunnels, here is the ipv6 part.

Before the patch, the external ipv6 header + gre header were included on
tx.

After the patch:
$ ping -c1 192.168.6.121 ; ip -s l ls dev ip6gre1
PING 192.168.6.121 (192.168.6.121) 56(84) bytes of data.
64 bytes from 192.168.6.121: icmp_req=1 ttl=64 time=1.92 ms

--- 192.168.6.121 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 1.923/1.923/1.923/0.000 ms
7: ip6gre1@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1440 qdisc noqueue state UNKNOWN mode DEFAULT group default
    link/gre6 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:23 peer 20:01:06:60:30:08:c1:c3:00:00:00:00:00:00:01:21
    RX: bytes  packets  errors  dropped overrun mcast
    84         1        0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    84         1        0       0       0       0
$ ping -c1 192.168.1.121 ; ip -s l ls dev ip6tnl1
PING 192.168.1.121 (192.168.1.121) 56(84) bytes of data.
64 bytes from 192.168.1.121: icmp_req=1 ttl=64 time=2.28 ms

--- 192.168.1.121 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 2.288/2.288/2.288/0.000 ms
8: ip6tnl1@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1452 qdisc noqueue state UNKNOWN mode DEFAULT group default
    link/tunnel6 2001:660:3008:c1c3::123 peer 2001:660:3008:c1c3::121
    RX: bytes  packets  errors  dropped overrun mcast
    84         1        0       0       0       0
    TX: bytes  packets  errors  dropped carrier collsns
    84         1        0       0       0       0

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 65c2a9397b3c..fa915fa0f703 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -86,7 +86,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 	struct net_device_stats *stats = &dev->stats;
 	int pkt_len, err;
 
-	pkt_len = skb->len;
+	pkt_len = skb->len - skb_inner_network_offset(skb);
 	err = ip6_local_out_sk(sk, skb);
 
 	if (net_xmit_eval(err) == 0) {
-- 
cgit v1.2.3


From ed2e923945892a8372ab70d2f61d364b0b6d9054 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Sep 2015 09:08:34 -0700
Subject: tcp/dccp: fix timewait races in timer handling

When creating a timewait socket, we need to arm the timer before
allowing other cpus to find it. The signal allowing cpus to find
the socket is setting tw_refcnt to non zero value.

As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to
call inet_twsk_schedule() first.

This also means we need to remove tw_refcnt changes from
inet_twsk_schedule() and let the caller handle it.

Note that because we use mod_timer_pinned(), we have the guarantee
the timer wont expire before we set tw_refcnt as we run in BH context.

To make things more readable I introduced inet_twsk_reschedule() helper.

When rearming the timer, we can use mod_timer_pending() to make sure
we do not rearm a canceled timer.

Note: This bug can possibly trigger if packets of a flow can hit
multiple cpus. This does not normally happen, unless flow steering
is broken somehow. This explains this bug was spotted ~5 months after
its introduction.

A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(),
but will be provided in a separate patch for proper tracking.

Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Ying Cai <ycai@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 879d6e5a973b..186f3a1e1b1f 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -110,7 +110,19 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 			   struct inet_hashinfo *hashinfo);
 
-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
+void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
+			  bool rearm);
+
+static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
+{
+	__inet_twsk_schedule(tw, timeo, false);
+}
+
+static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
+{
+	__inet_twsk_schedule(tw, timeo, true);
+}
+
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
 void inet_twsk_purge(struct inet_hashinfo *hashinfo,
-- 
cgit v1.2.3


From ac5be6b47e8bd25b62bed2c82cda7398999f59e9 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 22 Sep 2015 14:58:49 -0700
Subject: userfaultfd: revert "userfaultfd: waitqueue: add nr wake parameter to
 __wake_up_locked_key"

This reverts commit 51360155eccb907ff8635bd10fc7de876408c2e0 and adapts
fs/userfaultfd.c to use the old version of that function.

It didn't look robust to call __wake_up_common with "nr == 1" when we
absolutely require wakeall semantics, but we've full control of what we
insert in the two waitqueue heads of the blocked userfaults.  No
exclusive waitqueue risks to be inserted into those two waitqueue heads
so we can as well stick to "nr == 1" of the old code and we can rely
purely on the fact no waitqueue inserted in one of the two waitqueue
heads we must enforce as wakeall, has wait->flags WQ_FLAG_EXCLUSIVE set.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thierry Reding <treding@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index d3d077228d4c..1e1bf9f963a9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -147,8 +147,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 
 typedef int wait_bit_action_f(struct wait_bit_key *);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr,
-			  void *key);
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
@@ -180,7 +179,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_poll(x, m)						\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
 #define wake_up_locked_poll(x, m)					\
-	__wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m))
+	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
 #define wake_up_interruptible_poll(x, m)				\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
 #define wake_up_interruptible_sync_poll(x, m)				\
-- 
cgit v1.2.3


From 09f7298100ea9767324298ab0c7979f6d7463183 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Tue, 22 Sep 2015 14:59:09 -0700
Subject: userfaultfd: register uapi generic syscall (aarch64)

Add the userfaultfd syscalls to uapi asm-generic, it was tested with
postcopy live migration on aarch64 with both 4k and 64k pagesize
kernels.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/asm-generic/unistd.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8da542a2874d..ee124009e12a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,17 +709,19 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
-#define __NR_membarrier 282
+#define __NR_userfaultfd 282
+__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
+#define __NR_membarrier 283
 __SYSCALL(__NR_membarrier, sys_membarrier)
 
 #undef __NR_syscalls
-#define __NR_syscalls 283
+#define __NR_syscalls 284
 
 /*
  * All syscalls below here should go away really,
  * these are provided for both review and as a porting
  * help for the C library version.
-*
+ *
  * Last chance: are any of these important enough to
  * enable by default?
  */
-- 
cgit v1.2.3


From 2d8bff12699abc3a9bf886bb0b79f44d94d81496 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Wed, 23 Sep 2015 14:57:58 -0400
Subject: netpoll: Close race condition between poll_one_napi and napi_disable

Drivers might call napi_disable while not holding the napi instance poll_lock.
In those instances, its possible for a race condition to exist between
poll_one_napi and napi_disable.  That is to say, poll_one_napi only tests the
NAPI_STATE_SCHED bit to see if there is work to do during a poll, and as such
the following may happen:

CPU0				CPU1
ndo_tx_timeout			napi_poll_dev
 napi_disable			 poll_one_napi
  test_and_set_bit (ret 0)
				  test_bit (ret 1)
   reset adapter		   napi_poll_routine

If the adapter gets a tx timeout without a napi instance scheduled, its possible
for the adapter to think it has exclusive access to the hardware  (as the napi
instance is now scheduled via the napi_disable call), while the netpoll code
thinks there is simply work to do.  The result is parallel hardware access
leading to corrupt data structures in the driver, and a crash.

Additionaly, there is another, more critical race between netpoll and
napi_disable.  The disabled napi state is actually identical to the scheduled
state for a given napi instance.  The implication being that, if a napi instance
is disabled, a netconsole instance would see the napi state of the device as
having been scheduled, and poll it, likely while the driver was dong something
requiring exclusive access.  In the case above, its fairly clear that not having
the rings in a state ready to be polled will cause any number of crashes.

The fix should be pretty easy.  netpoll uses its own bit to indicate that that
the napi instance is in a state of being serviced by netpoll (NAPI_STATE_NPSVC).
We can just gate disabling on that bit as well as the sched bit.  That should
prevent netpoll from conducting a napi poll if we convert its set bit to a
test_and_set_bit operation to provide mutual exclusion

Change notes:
V2)
	Remove a trailing whtiespace
	Resubmit with proper subject prefix

V3)
	Clean up spacing nits

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: jmaxwell@redhat.com
Tested-by: jmaxwell@redhat.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88a00694eda5..2d15e3831440 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -507,6 +507,7 @@ static inline void napi_enable(struct napi_struct *n)
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 	smp_mb__before_atomic();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
+	clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 9badce000e2ce68ba74838a3cd356dde58221c2f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Sep 2015 17:07:29 -0400
Subject: cgroup, writeback: don't enable cgroup writeback on traditional
 hierarchies

inode_cgwb_enabled() gates cgroup writeback support.  If it returns
true, each inode is attached to the corresponding memory domain which
gets mapped to io domain.  It currently only tests whether the
filesystem and bdi support cgroup writeback; however, cgroup writeback
support doesn't work on traditional hierarchies and thus it should
also test whether memcg and iocg are on the default hierarchy.

This caused traditional hierarchy setups to hit the cgroup writeback
path inadvertently and ended up creating separate writeback domains
for each memcg and mapping them all to the root iocg uncovering a
couple issues in the cgroup writeback path.

cgroup writeback was never meant to be enabled on traditional
hierarchies.  Make inode_cgwb_enabled() test whether both memcg and
iocg are on the default hierarchy.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Artem Bityutskiy <dedekind1@gmail.com>
Reported-by: Dexuan Cui <decui@microsoft.com>
Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com
Link: http://lkml.kernel.org/g/f30d4a6aa8a546ff88f73021d026a453@SIXPR30MB031.064d.mgd.msft.net
---
 include/linux/backing-dev.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5a5d79ee256f..d5eb4ad1c534 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/memcontrol.h>
 #include <linux/blk-cgroup.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/slab.h>
@@ -252,13 +253,19 @@ int inode_congested(struct inode *inode, int cong_bits);
  * @inode: inode of interest
  *
  * cgroup writeback requires support from both the bdi and filesystem.
- * Test whether @inode has both.
+ * Also, both memcg and iocg have to be on the default hierarchy.  Test
+ * whether all conditions are met.
+ *
+ * Note that the test result may change dynamically on the same inode
+ * depending on how memcg and iocg are configured.
  */
 static inline bool inode_cgwb_enabled(struct inode *inode)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 
-	return bdi_cap_account_dirty(bdi) &&
+	return cgroup_on_dfl(mem_cgroup_root_css->cgroup) &&
+		cgroup_on_dfl(blkcg_root_css->cgroup) &&
+		bdi_cap_account_dirty(bdi) &&
 		(bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 		(inode->i_sb->s_iflags & SB_I_CGROUPWB);
 }
-- 
cgit v1.2.3


From 6ae459bdaaeebc632b16e54dcbabb490c6931d61 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 22 Sep 2015 12:57:53 -0700
Subject: skbuff: Fix skb checksum flag on skb pull

VXLAN device can receive skb with checksum partial. But the checksum
offset could be in outer header which is pulled on receive. This results
in negative checksum offset for the skb. Such skb can cause the assert
failure in skb_checksum_help(). Following patch fixes the bug by setting
checksum-none while pulling outer header.

Following is the kernel panic msg from old kernel hitting the bug.

------------[ cut here ]------------
kernel BUG at net/core/dev.c:1906!
RIP: 0010:[<ffffffff81518034>] skb_checksum_help+0x144/0x150
Call Trace:
<IRQ>
[<ffffffffa0164c28>] queue_userspace_packet+0x408/0x470 [openvswitch]
[<ffffffffa016614d>] ovs_dp_upcall+0x5d/0x60 [openvswitch]
[<ffffffffa0166236>] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch]
[<ffffffffa016629b>] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch]
[<ffffffffa016c51a>] ovs_vport_receive+0x2a/0x30 [openvswitch]
[<ffffffffa0171383>] vxlan_rcv+0x53/0x60 [openvswitch]
[<ffffffffa01734cb>] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch]
[<ffffffff8157addc>] udp_queue_rcv_skb+0x2dc/0x3b0
[<ffffffff8157b56f>] __udp4_lib_rcv+0x1cf/0x6c0
[<ffffffff8157ba7a>] udp_rcv+0x1a/0x20
[<ffffffff8154fdbd>] ip_local_deliver_finish+0xdd/0x280
[<ffffffff81550128>] ip_local_deliver+0x88/0x90
[<ffffffff8154fa7d>] ip_rcv_finish+0x10d/0x370
[<ffffffff81550365>] ip_rcv+0x235/0x300
[<ffffffff8151ba1d>] __netif_receive_skb+0x55d/0x620
[<ffffffff8151c360>] netif_receive_skb+0x80/0x90
[<ffffffff81459935>] virtnet_poll+0x555/0x6f0
[<ffffffff8151cd04>] net_rx_action+0x134/0x290
[<ffffffff810683d8>] __do_softirq+0xa8/0x210
[<ffffffff8162fe6c>] call_softirq+0x1c/0x30
[<ffffffff810161a5>] do_softirq+0x65/0xa0
[<ffffffff810687be>] irq_exit+0x8e/0xb0
[<ffffffff81630733>] do_IRQ+0x63/0xe0
[<ffffffff81625f2e>] common_interrupt+0x6e/0x6e

Reported-by: Anupam Chanda <achanda@vmware.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9987af080fa0..2b0a30a6e31c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2707,6 +2707,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
+	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		 skb_checksum_start_offset(skb) <= len)
+		skb->ip_summed = CHECKSUM_NONE;
 }
 
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
-- 
cgit v1.2.3


From 63d008a4e9ee86614ca5671b7f3ba447df007190 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 22 Sep 2015 18:12:11 +0200
Subject: ipv4: send arp replies to the correct tunnel

When using ip lwtunnels, the additional data for xmit (basically, the actual
tunnel to use) are carried in ip_tunnel_info either in dst->lwtstate or in
metadata dst. When replying to ARP requests, we need to send the reply to
the same tunnel the request came from. This means we need to construct
proper metadata dst for ARP replies.

We could perform another route lookup to get a dst entry with the correct
lwtstate. However, this won't always ensure that the outgoing tunnel is the
same as the incoming one, and it won't work anyway for IPv4 duplicate
address detection.

The only thing to do is to "reverse" the ip_tunnel_info.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9a6a3ba888e8..f6dafec9102c 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -276,6 +276,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
 int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 		  __be32 src, __be32 dst, u8 proto,
 		  u8 tos, u8 ttl, __be16 df, bool xnet);
+struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
+					     gfp_t flags);
 
 struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
 					 int gso_type_mask);
-- 
cgit v1.2.3


From b194f30c61efb0767a98f47a64530baa8b731670 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 22 Sep 2015 18:12:12 +0200
Subject: lwtunnel: remove source and destination UDP port config option

The UDP tunnel config is asymmetric wrt. to the ports used. The source and
destination ports from one direction of the tunnel are not related to the
ports of the other direction. We need to be able to respond to ARP requests
using the correct ports without involving routing.

As the consequence, UDP ports need to be fixed property of the tunnel
interface and cannot be set per route. Remove the ability to set ports per
route. This is still okay to do, as no kernel has been released with these
attributes yet.

Note that the ability to specify source and destination ports is preserved
for other users of the lwtunnel API which don't use routes for tunnel key
specification (like openvswitch).

If in the future we rework ARP handling to allow port specification, the
attributes can be added back.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/lwtunnel.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 34141a5dfe74..f8b01887a495 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -21,8 +21,6 @@ enum lwtunnel_ip_t {
 	LWTUNNEL_IP_SRC,
 	LWTUNNEL_IP_TTL,
 	LWTUNNEL_IP_TOS,
-	LWTUNNEL_IP_SPORT,
-	LWTUNNEL_IP_DPORT,
 	LWTUNNEL_IP_FLAGS,
 	__LWTUNNEL_IP_MAX,
 };
@@ -36,8 +34,6 @@ enum lwtunnel_ip6_t {
 	LWTUNNEL_IP6_SRC,
 	LWTUNNEL_IP6_HOPLIMIT,
 	LWTUNNEL_IP6_TC,
-	LWTUNNEL_IP6_SPORT,
-	LWTUNNEL_IP6_DPORT,
 	LWTUNNEL_IP6_FLAGS,
 	__LWTUNNEL_IP6_MAX,
 };
-- 
cgit v1.2.3


From 3e3aaf649416988ca8be4ad2c52dc24d8be7b46e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 24 Sep 2015 20:36:02 +0100
Subject: phy: fix mdiobus module safety

Re-implement the mdiobus module refcounting to ensure that we actually
ensure that the mdiobus module code does not go away while we might call
into it.

The old scheme using bus->dev.driver was buggy, because bus->dev is a
class device which never has a struct device_driver associated with it,
and hence the associated code trying to obtain a refcount did nothing
useful.

Instead, take the approach that other subsystems do: pass the module
when calling mdiobus_register(), and record that in the mii_bus struct.
When we need to increment the module use count in the phy code, use
this stored pointer.  When the phy is deteched, drop the module
refcount, remembering that the phy device might go away at that point.

This doesn't stop the mii_bus going away while there are in-use phys -
it merely stops the underlying code vanishing.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 962387a192f1..11bce44f6d65 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/mii.h>
+#include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mod_devicetable.h>
@@ -153,6 +154,7 @@ struct sk_buff;
  * PHYs should register using this structure
  */
 struct mii_bus {
+	struct module *owner;
 	const char *name;
 	char id[MII_BUS_ID_SIZE];
 	void *priv;
@@ -198,7 +200,8 @@ static inline struct mii_bus *mdiobus_alloc(void)
 	return mdiobus_alloc_size(0);
 }
 
-int mdiobus_register(struct mii_bus *bus);
+int __mdiobus_register(struct mii_bus *bus, struct module *owner);
+#define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE)
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
 struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
-- 
cgit v1.2.3


From 38737e490d4ea91660d3cec83ef88c4e6d360ae4 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 24 Sep 2015 20:36:28 +0100
Subject: phy: add phy_device_remove()

Add a phy_device_remove() function to complement phy_device_register(),
which undoes the effects of phy_device_register() by removing the phy
device from visibility, but not freeing it.

This allows these details to be moved out of the mdio bus code into
the phy code where this action belongs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 11bce44f6d65..4a4e3a092337 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -745,6 +745,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 				     struct phy_c45_device_ids *c45_ids);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
+void phy_device_remove(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
-- 
cgit v1.2.3