From 3dc59262f76241b342316bbac8b5ffe138995b2d Mon Sep 17 00:00:00 2001
From: Shrirang Bagul <shrirang.bagul@canonical.com>
Date: Thu, 24 Nov 2016 13:33:43 +0800
Subject: iio: st_sensors: match sensors using ACPI handle

Add support to match st sensors using information passed from ACPI DST
tables.

Signed-off-by: Shrirang Bagul <shrirang.bagul@canonical.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/common/st_sensors_i2c.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h
index 1796af093368..254de3c7dde8 100644
--- a/include/linux/iio/common/st_sensors_i2c.h
+++ b/include/linux/iio/common/st_sensors_i2c.h
@@ -28,4 +28,13 @@ static inline void st_sensors_of_i2c_probe(struct i2c_client *client,
 }
 #endif
 
+#ifdef CONFIG_ACPI
+int st_sensors_match_acpi_device(struct device *dev);
+#else
+static inline int st_sensors_match_acpi_device(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif
+
 #endif /* ST_SENSORS_I2C_H */
-- 
cgit v1.2.3


From a96cd0f901eecd9589477cc2cd46bdb4f1f3e49a Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 28 Nov 2016 14:41:16 -0800
Subject: iio: accel: hid-sensor-accel-3d: Add timestamp

Added timestamp channel. With this change, each sample has a timestamp.
This timestamp can be from the sensor hub when present or local kernel
timestamp. HID sensors can send timestamp with input data using usage id
HID_USAGE_SENSOR_TIME_TIMESTAMP. This timestamp value is converted to
nano seconds before pushing this sample to the iio core.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/hid-sensor-hub.h | 4 ++++
 include/linux/hid-sensor-ids.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index dd85f3503410..7ef111d3ecc5 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -232,6 +232,7 @@ struct hid_sensor_common {
 	atomic_t data_ready;
 	atomic_t user_requested_state;
 	struct iio_trigger *trigger;
+	int timestamp_ns_scale;
 	struct hid_sensor_hub_attribute_info poll;
 	struct hid_sensor_hub_attribute_info report_state;
 	struct hid_sensor_hub_attribute_info power_state;
@@ -271,4 +272,7 @@ int hid_sensor_format_scale(u32 usage_id,
 
 s32 hid_sensor_read_poll_value(struct hid_sensor_common *st);
 
+int64_t hid_sensor_convert_timestamp(struct hid_sensor_common *st,
+				     int64_t raw_value);
+
 #endif
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index f2ee90aed0c2..9a3a9db1b39d 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -95,6 +95,7 @@
 #define HID_USAGE_SENSOR_TIME_HOUR				0x200525
 #define HID_USAGE_SENSOR_TIME_MINUTE				0x200526
 #define HID_USAGE_SENSOR_TIME_SECOND				0x200527
+#define HID_USAGE_SENSOR_TIME_TIMESTAMP				0x200529
 
 /* Units */
 #define HID_USAGE_SENSOR_UNITS_NOT_SPECIFIED			0x00
-- 
cgit v1.2.3


From 122020af856181c24fe45903e43e3cc987c175f7 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 15 Nov 2016 15:46:42 +0000
Subject: dma-buf: Provide wrappers for reservation's lock

Joonas complained that writing ww_mutex_lock(&resv->lock, ctx) was too
intrusive compared to reservation_object_lock(resv, ctx);

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161115154642.31850-1-chris@chris-wilson.co.uk
---
 include/linux/reservation.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index d9706a6f5ae2..2b5a4679daea 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -144,6 +144,40 @@ reservation_object_get_list(struct reservation_object *obj)
 					 reservation_object_held(obj));
 }
 
+/**
+ * reservation_object_lock - lock the reservation object
+ * @obj: the reservation object
+ * @ctx: the locking context
+ *
+ * Locks the reservation object for exclusive access and modification. Note,
+ * that the lock is only against other writers, readers will run concurrently
+ * with a writer under RCU. The seqlock is used to notify readers if they
+ * overlap with a writer.
+ *
+ * As the reservation object may be locked by multiple parties in an
+ * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle
+ * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation
+ * object may be locked by itself by passing NULL as @ctx.
+ */
+static inline int
+reservation_object_lock(struct reservation_object *obj,
+			struct ww_acquire_ctx *ctx)
+{
+	return ww_mutex_lock(&obj->lock, ctx);
+}
+
+/**
+ * reservation_object_unlock - unlock the reservation object
+ * @obj: the reservation object
+ *
+ * Unlocks the reservation object following exclusive access.
+ */
+static inline void
+reservation_object_unlock(struct reservation_object *obj)
+{
+	ww_mutex_unlock(&obj->lock);
+}
+
 /**
  * reservation_object_get_excl - get the reservation object's
  * exclusive fence, with update-side lock held
-- 
cgit v1.2.3


From 2904a8c1311f02896635fd35744262413a0b2726 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 9 Dec 2016 19:53:07 +0100
Subject: dma-buf: Reorganize device dma access docs

- Put the initial overview for dma-buf into dma-buf.rst.
- Put all the comments about detailed semantics into the right
  kernel-doc comment for functions or ops structure member.
- To allow that detail, switch the reworked kerneldoc to inline style
  for dma_buf_ops.
- Tie everything together into a much more streamlined overview
  comment, relying on the hyperlinks for all the details.
- Also sprinkle some links into the kerneldoc for dma_buf and
  dma_buf_attachment to tie it all together.

Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-4-daniel.vetter@ffwll.ch
---
 include/linux/dma-buf.h | 133 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 110 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 8daeb3ce0016..6df170fb243f 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -39,19 +39,6 @@ struct dma_buf_attachment;
 
 /**
  * struct dma_buf_ops - operations possible on struct dma_buf
- * @attach: [optional] allows different devices to 'attach' themselves to the
- *	    given buffer. It might return -EBUSY to signal that backing storage
- *	    is already allocated and incompatible with the requirements
- *	    of requesting device.
- * @detach: [optional] detach a given device from this buffer.
- * @map_dma_buf: returns list of scatter pages allocated, increases usecount
- *		 of the buffer. Requires atleast one attach to be called
- *		 before. Returned sg list should already be mapped into
- *		 _device_ address space. This call may sleep. May also return
- *		 -EINTR. Should return -EINVAL if attach hasn't been called yet.
- * @unmap_dma_buf: decreases usecount of buffer, might deallocate scatter
- *		   pages.
- * @release: release this buffer; to be called after the last dma_buf_put.
  * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
  * 		      caches and allocate backing storage (if not yet done)
  * 		      respectively pin the object into memory.
@@ -72,25 +59,109 @@ struct dma_buf_attachment;
  * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
+	/**
+	 * @attach:
+	 *
+	 * This is called from dma_buf_attach() to make sure that a given
+	 * &device can access the provided &dma_buf. Exporters which support
+	 * buffer objects in special locations like VRAM or device-specific
+	 * carveout areas should check whether the buffer could be move to
+	 * system memory (or directly accessed by the provided device), and
+	 * otherwise need to fail the attach operation.
+	 *
+	 * The exporter should also in general check whether the current
+	 * allocation fullfills the DMA constraints of the new device. If this
+	 * is not the case, and the allocation cannot be moved, it should also
+	 * fail the attach operation.
+	 *
+	 * Any exporter-private housekeeping data can be stored in the priv
+	 * pointer of &dma_buf_attachment structure.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success, negative error code on failure. It might return -EBUSY
+	 * to signal that backing storage is already allocated and incompatible
+	 * with the requirements of requesting device.
+	 */
 	int (*attach)(struct dma_buf *, struct device *,
-			struct dma_buf_attachment *);
+		      struct dma_buf_attachment *);
 
+	/**
+	 * @detach:
+	 *
+	 * This is called by dma_buf_detach() to release a &dma_buf_attachment.
+	 * Provided so that exporters can clean up any housekeeping for an
+	 * &dma_buf_attachment.
+	 *
+	 * This callback is optional.
+	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
-	/* For {map,unmap}_dma_buf below, any specific buffer attributes
-	 * required should get added to device_dma_parameters accessible
-	 * via dev->dma_params.
+	/**
+	 * @map_dma_buf:
+	 *
+	 * This is called by dma_buf_map_attachment() and is used to map a
+	 * shared &dma_buf into device address space, and it is mandatory. It
+	 * can only be called if @attach has been called successfully. This
+	 * essentially pins the DMA buffer into place, and it cannot be moved
+	 * any more
+	 *
+	 * This call may sleep, e.g. when the backing storage first needs to be
+	 * allocated, or moved to a location suitable for all currently attached
+	 * devices.
+	 *
+	 * Note that any specific buffer attributes required for this function
+	 * should get added to device_dma_parameters accessible via
+	 * device->dma_params from the &dma_buf_attachment. The @attach callback
+	 * should also check these constraints.
+	 *
+	 * If this is being called for the first time, the exporter can now
+	 * choose to scan through the list of attachments for this buffer,
+	 * collate the requirements of the attached devices, and choose an
+	 * appropriate backing storage for the buffer.
+	 *
+	 * Based on enum dma_data_direction, it might be possible to have
+	 * multiple users accessing at the same time (for reading, maybe), or
+	 * any other kind of sharing that the exporter might wish to make
+	 * available to buffer-users.
+	 *
+	 * Returns:
+	 *
+	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
+	 * already mapped into the device address space of the &device attached
+	 * with the provided &dma_buf_attachment.
+	 *
+	 * On failure, returns a negative error value wrapped into a pointer.
+	 * May also return -EINTR when a signal was received while being
+	 * blocked.
 	 */
 	struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
-						enum dma_data_direction);
+					 enum dma_data_direction);
+	/**
+	 * @unmap_dma_buf:
+	 *
+	 * This is called by dma_buf_unmap_attachment() and should unmap and
+	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
+	 * It should also unpin the backing storage if this is the last mapping
+	 * of the DMA buffer, it the exporter supports backing storage
+	 * migration.
+	 */
 	void (*unmap_dma_buf)(struct dma_buf_attachment *,
-						struct sg_table *,
-						enum dma_data_direction);
+			      struct sg_table *,
+			      enum dma_data_direction);
+
 	/* TODO: Add try_map_dma_buf version, to return immed with -EBUSY
 	 * if the call would block.
 	 */
 
-	/* after final dma_buf_put() */
+	/**
+	 * @release:
+	 *
+	 * Called after the last dma_buf_put to release the &dma_buf, and
+	 * mandatory.
+	 */
 	void (*release)(struct dma_buf *);
 
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
@@ -124,6 +195,15 @@ struct dma_buf_ops {
  * @poll: for userspace poll support
  * @cb_excl: for userspace poll support
  * @cb_shared: for userspace poll support
+ *
+ * This represents a shared buffer, created by calling dma_buf_export(). The
+ * userspace representation is a normal file descriptor, which can be created by
+ * calling dma_buf_fd().
+ *
+ * Shared dma buffers are reference counted using dma_buf_put() and
+ * get_dma_buf().
+ *
+ * Device DMA access is handled by the separate struct &dma_buf_attachment.
  */
 struct dma_buf {
 	size_t size;
@@ -160,6 +240,11 @@ struct dma_buf {
  * This structure holds the attachment information between the dma_buf buffer
  * and its user device(s). The list contains one attachment struct per device
  * attached to the buffer.
+ *
+ * An attachment is created by calling dma_buf_attach(), and released again by
+ * calling dma_buf_detach(). The DMA mapping itself needed to initiate a
+ * transfer is created by dma_buf_map_attachment() and freed again by calling
+ * dma_buf_unmap_attachment().
  */
 struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
@@ -192,9 +277,11 @@ struct dma_buf_export_info {
 };
 
 /**
- * helper macro for exporters; zeros and fills in most common values
- *
+ * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
  * @name: export-info name
+ *
+ * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info,
+ * zeroes it out and pre-populates exp_name in it.
  */
 #define DEFINE_DMA_BUF_EXPORT_INFO(name)	\
 	struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \
-- 
cgit v1.2.3


From 0959a1683d78270bab6381d498707fb8655ae11c Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 9 Dec 2016 19:53:08 +0100
Subject: dma-buf: Update cpu access documentation

- Again move the information relevant for driver writers next to the
  callbacks.
- Put the overview and userspace interface documentation into a DOC:
  section within the code.
- Remove the text that mmap needs to be coherent - since the
  DMA_BUF_IOCTL_SYNC landed that's no longer the case. But keep the text
  that for pte zapping exporters need to adjust the address space.
- Add a FIXME that kmap and the new begin/end stuff used by the SYNC
  ioctl don't really mix correctly. That's something I just realized
  while doing this doc rework.
- Augment function and structure docs like usual.

Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
  [sumits: fix cosmetic issues]
Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-5-daniel.vetter@ffwll.ch
---
 include/linux/dma-buf.h | 91 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 6df170fb243f..57828154e440 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -39,10 +39,6 @@ struct dma_buf_attachment;
 
 /**
  * struct dma_buf_ops - operations possible on struct dma_buf
- * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
- * 		      caches and allocate backing storage (if not yet done)
- * 		      respectively pin the object into memory.
- * @end_cpu_access: [optional] called after cpu access to flush caches.
  * @kmap_atomic: maps a page from the buffer into kernel address
  * 		 space, users may not block until the subsequent unmap call.
  * 		 This callback must not sleep.
@@ -50,10 +46,6 @@ struct dma_buf_attachment;
  * 		   This Callback must not sleep.
  * @kmap: maps a page from the buffer into kernel address space.
  * @kunmap: [optional] unmaps a page from the buffer.
- * @mmap: used to expose the backing storage to userspace. Note that the
- * 	  mapping needs to be coherent - if the exporter doesn't directly
- * 	  support this, it needs to fake coherency by shooting down any ptes
- * 	  when transitioning away from the cpu domain.
  * @vmap: [optional] creates a virtual mapping for the buffer into kernel
  *	  address space. Same restrictions as for vmap and friends apply.
  * @vunmap: [optional] unmaps a vmap from the buffer
@@ -164,13 +156,96 @@ struct dma_buf_ops {
 	 */
 	void (*release)(struct dma_buf *);
 
+	/**
+	 * @begin_cpu_access:
+	 *
+	 * This is called from dma_buf_begin_cpu_access() and allows the
+	 * exporter to ensure that the memory is actually available for cpu
+	 * access - the exporter might need to allocate or swap-in and pin the
+	 * backing storage. The exporter also needs to ensure that cpu access is
+	 * coherent for the access direction. The direction can be used by the
+	 * exporter to optimize the cache flushing, i.e. access with a different
+	 * direction (read instead of write) might return stale or even bogus
+	 * data (e.g. when the exporter needs to copy the data to temporary
+	 * storage).
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME: This is both called through the DMA_BUF_IOCTL_SYNC command
+	 * from userspace (where storage shouldn't be pinned to avoid handing
+	 * de-factor mlock rights to userspace) and for the kernel-internal
+	 * users of the various kmap interfaces, where the backing storage must
+	 * be pinned to guarantee that the atomic kmap calls can succeed. Since
+	 * there's no in-kernel users of the kmap interfaces yet this isn't a
+	 * real problem.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure. This can for
+	 * example fail when the backing storage can't be allocated. Can also
+	 * return -ERESTARTSYS or -EINTR when the call has been interrupted and
+	 * needs to be restarted.
+	 */
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
+
+	/**
+	 * @end_cpu_access:
+	 *
+	 * This is called from dma_buf_end_cpu_access() when the importer is
+	 * done accessing the CPU. The exporter can use this to flush caches and
+	 * unpin any resources pinned in @begin_cpu_access.
+	 * The result of any dma_buf kmap calls after end_cpu_access is
+	 * undefined.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure. Can return
+	 * -ERESTARTSYS or -EINTR when the call has been interrupted and needs
+	 * to be restarted.
+	 */
 	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
 	void (*kunmap)(struct dma_buf *, unsigned long, void *);
 
+	/**
+	 * @mmap:
+	 *
+	 * This callback is used by the dma_buf_mmap() function
+	 *
+	 * Note that the mapping needs to be incoherent, userspace is expected
+	 * to braket CPU access using the DMA_BUF_IOCTL_SYNC interface.
+	 *
+	 * Because dma-buf buffers have invariant size over their lifetime, the
+	 * dma-buf core checks whether a vma is too large and rejects such
+	 * mappings. The exporter hence does not need to duplicate this check.
+	 * Drivers do not need to check this themselves.
+	 *
+	 * If an exporter needs to manually flush caches and hence needs to fake
+	 * coherency for mmap support, it needs to be able to zap all the ptes
+	 * pointing at the backing storage. Now linux mm needs a struct
+	 * address_space associated with the struct file stored in vma->vm_file
+	 * to do that with the function unmap_mapping_range. But the dma_buf
+	 * framework only backs every dma_buf fd with the anon_file struct file,
+	 * i.e. all dma_bufs share the same file.
+	 *
+	 * Hence exporters need to setup their own file (and address_space)
+	 * association by setting vma->vm_file and adjusting vma->vm_pgoff in
+	 * the dma_buf mmap callback. In the specific case of a gem driver the
+	 * exporter could use the shmem file already provided by gem (and set
+	 * vm_pgoff = 0). Exporters can then zap ptes by unmapping the
+	 * corresponding range of the struct address_space associated with their
+	 * own file.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
 
 	void *(*vmap)(struct dma_buf *);
-- 
cgit v1.2.3


From 409c69be433b819c924a8d1c457a835bc6d51700 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 10 Dec 2016 11:51:11 +0200
Subject: ASoC: samsung: Remove tests of member address

The driver was checking for non-NULL address of struct's members:
 - s3c_audio_pdata->type (union),
 - s3c_audio_pdata->type.i2s (embedded struct).

This is pointless as these will be always non-NULL.  The 's3c_audio_pdata'
is always initialized in static memory so it will be zeroed.
Additionally the 'type' member was an union with only one member.

It is safe to reorganize the structures to get rid of useless union and
checks for addresses to fix the coccinelle warning:
	>> sound/soc/samsung/i2s.c:1270:2-4: ERROR: test of a variable/field address

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/platform_data/asoc-s3c.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index 15bf56ee8af7..90641a5daaf0 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -18,7 +18,7 @@
 
 extern void s3c64xx_ac97_setup_gpio(int);
 
-struct samsung_i2s {
+struct samsung_i2s_type {
 /* If the Primary DAI has 5.1 Channels */
 #define QUIRK_PRI_6CHAN		(1 << 0)
 /* If the I2S block has a Stereo Overlay Channel */
@@ -47,7 +47,5 @@ struct s3c_audio_pdata {
 	void *dma_capture;
 	void *dma_play_sec;
 	void *dma_capture_mic;
-	union {
-		struct samsung_i2s i2s;
-	} type;
+	struct samsung_i2s_type type;
 };
-- 
cgit v1.2.3


From e8314d7d53c8b050aac2828a5de5f28a997b468b Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Tue, 6 Dec 2016 20:22:36 +0100
Subject: misc: atmel-ssc: register as sound DAI if #sound-dai-cells is present

The SSC is currently not usable with the ASoC simple-audio-card, as
every SSC audio user has to build a platform driver that may do as
little as calling atmel_ssc_set_audio/atmel_ssc_put_audio (which
allocates the SSC and registers a DAI with the ASoC subsystem).

So, have that happen automatically, if the #sound-dai-cells property
is present in devicetree, which it has to be anyway for simple audio
card to work.

Signed-off-by: Peter Rosin <peda@axentia.se>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/atmel-ssc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 7c0f6549898b..fdb545101ede 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -20,6 +20,7 @@ struct ssc_device {
 	int			user;
 	int			irq;
 	bool			clk_from_rk_pin;
+	bool			sound_dai;
 };
 
 struct ssc_device * __must_check ssc_request(unsigned int ssc_num);
-- 
cgit v1.2.3


From 56e3d1cd05cc7b24cfcae8714b0661bf607aaba3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 15 Dec 2016 06:01:10 +0100
Subject: kref: prefer atomic_inc_not_zero to atomic_add_unless

On most platforms, there exists this ifdef:

 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)

This makes this patch functionally useless. However, on PPC, there is
actually an explicit definition of atomic_inc_not_zero with its own
assembly that is slightly more optimized than atomic_add_unless. So,
this patch changes kref to use atomic_inc_not_zero instead, for PPC and
any future platforms that might provide an explicit implementation.

This also puts this usage of kref more in line with a verbatim reading
of the examples in Paul McKenney's paper [1] in the section titled "2.4
Atomic Counting With Check and Release Memory Barrier", which uses
atomic_inc_not_zero.

[1] http://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2167.pdf

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161215050110.3241-1-Jason@zx2c4.com
---
 include/linux/kref.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index e15828fd71f1..62f0a84ae94e 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -133,6 +133,6 @@ static inline int kref_put_mutex(struct kref *kref,
  */
 static inline int __must_check kref_get_unless_zero(struct kref *kref)
 {
-	return atomic_add_unless(&kref->refcount, 1, 0);
+	return atomic_inc_not_zero(&kref->refcount);
 }
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From fd50d71f94fb1c8614098949db068cd4c8dbb91d Mon Sep 17 00:00:00 2001
From: Corentin LABBE <clabbe.montjoie@gmail.com>
Date: Tue, 13 Dec 2016 15:51:13 +0100
Subject: hwrng: core - Move hwrng miscdev minor number to
 include/linux/miscdevice.h

This patch move the define for hwrng's miscdev minor number to
include/linux/miscdevice.h.
It's better that all minor number are in the same place.
Rename it to HWRNG_MINOR (from RNG_MISCDEV_MINOR) in he process since
no other miscdev define have MISCDEV in their name.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/miscdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index ed30d5d713e3..5d81f739aa0a 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -31,6 +31,7 @@
 #define SGI_MMTIMER		153
 #define STORE_QUEUE_MINOR	155	/* unused */
 #define I2O_MINOR		166
+#define HWRNG_MINOR		183
 #define MICROCODE_MINOR		184
 #define IRNET_MINOR		187
 #define VFIO_MINOR		196
-- 
cgit v1.2.3


From cf4a7207b1cb4a3c3fe3aa11a83c9d673722a7f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Dec 2016 14:45:14 +0000
Subject: lib: Add a simple prime number generator

Prime numbers are interesting for testing components that use multiplies
and divides, such as testing DRM's struct drm_mm alignment computations.

v2: Move to lib/, add selftest
v3: Fix initial constants (exclude 0/1 from being primes)
v4: More RCU markup to keep 0day/sparse happy
v5: Fix RCU unwind on module exit, add to kselftests
v6: Tidy computation of bitmap size
v7: for_each_prime_number_from()
v8: Compose small-primes using BIT() for easier verification
v9: Move rcu dance entirely into callers.
v10: Improve quote for Betrand's Postulate (aka Chebyshev's theorem)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161222144514.3911-1-chris@chris-wilson.co.uk
---
 include/linux/prime_numbers.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 include/linux/prime_numbers.h

(limited to 'include/linux')

diff --git a/include/linux/prime_numbers.h b/include/linux/prime_numbers.h
new file mode 100644
index 000000000000..14ec4f567342
--- /dev/null
+++ b/include/linux/prime_numbers.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_PRIME_NUMBERS_H
+#define __LINUX_PRIME_NUMBERS_H
+
+#include <linux/types.h>
+
+bool is_prime_number(unsigned long x);
+unsigned long next_prime_number(unsigned long x);
+
+/**
+ * for_each_prime_number - iterate over each prime upto a value
+ * @prime: the current prime number in this iteration
+ * @max: the upper limit
+ *
+ * Starting from the first prime number 2 iterate over each prime number up to
+ * the @max value. On each iteration, @prime is set to the current prime number.
+ * @max should be less than ULONG_MAX to ensure termination. To begin with
+ * @prime set to 1 on the first iteration use for_each_prime_number_from()
+ * instead.
+ */
+#define for_each_prime_number(prime, max) \
+	for_each_prime_number_from((prime), 2, (max))
+
+/**
+ * for_each_prime_number_from - iterate over each prime upto a value
+ * @prime: the current prime number in this iteration
+ * @from: the initial value
+ * @max: the upper limit
+ *
+ * Starting from @from iterate over each successive prime number up to the
+ * @max value. On each iteration, @prime is set to the current prime number.
+ * @max should be less than ULONG_MAX, and @from less than @max, to ensure
+ * termination.
+ */
+#define for_each_prime_number_from(prime, from, max) \
+	for (prime = (from); prime <= (max); prime = next_prime_number(prime))
+
+#endif /* !__LINUX_PRIME_NUMBERS_H */
-- 
cgit v1.2.3


From a1d82aff5df760d933b6ea3a03805dbc2bd73eb8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Dec 2016 14:49:02 -0500
Subject: kernfs: make kernfs_open_file->mmapped a bitfield

More kernfs_open_file->mutex synchronized flags are planned to be
added.  Convert ->mmapped to a bitfield in preparation.

While at it, make kernfs_fop_mmap() use "true" instead of "1" on
->mmapped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/kernfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 7056238fd9f5..afd4e5abc4fb 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -185,7 +185,7 @@ struct kernfs_open_file {
 	char			*prealloc_buf;
 
 	size_t			atomic_write_len;
-	bool			mmapped;
+	bool			mmapped:1;
 	const struct vm_operations_struct *vm_ops;
 };
 
-- 
cgit v1.2.3


From 0e67db2f9fe91937e798e3d7d22c50a8438187e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Dec 2016 14:49:03 -0500
Subject: kernfs: add kernfs_ops->open/release() callbacks

Add ->open/release() methods to kernfs_ops.  ->open() is called when
the file is opened and ->release() when the file is either released or
severed.  These callbacks can be used, for example, to manage
persistent caching objects over multiple seq_file iterations.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/kernfs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index afd4e5abc4fb..a9b11b8d06f2 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -46,6 +46,7 @@ enum kernfs_node_flag {
 	KERNFS_SUICIDAL		= 0x0400,
 	KERNFS_SUICIDED		= 0x0800,
 	KERNFS_EMPTY_DIR	= 0x1000,
+	KERNFS_HAS_RELEASE	= 0x2000,
 };
 
 /* @flags for kernfs_create_root() */
@@ -175,6 +176,7 @@ struct kernfs_open_file {
 	/* published fields */
 	struct kernfs_node	*kn;
 	struct file		*file;
+	struct seq_file		*seq_file;
 	void			*priv;
 
 	/* private fields, do not use outside kernfs proper */
@@ -186,10 +188,18 @@ struct kernfs_open_file {
 
 	size_t			atomic_write_len;
 	bool			mmapped:1;
+	bool			released:1;
 	const struct vm_operations_struct *vm_ops;
 };
 
 struct kernfs_ops {
+	/*
+	 * Optional open/release methods.  Both are called with
+	 * @of->seq_file populated.
+	 */
+	int (*open)(struct kernfs_open_file *of);
+	void (*release)(struct kernfs_open_file *of);
+
 	/*
 	 * Read is handled by either seq_file or raw_read().
 	 *
-- 
cgit v1.2.3


From e90cbebc3fa5caea4c8bfeb0d0157a0cee53efc7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Dec 2016 14:49:03 -0500
Subject: cgroup add cftype->open/release() callbacks

Pipe the newly added kernfs->open/release() callbacks through cftype.
While at it, as cleanup operations now can be performed from
->release() instead of ->seq_stop(), make the latter optional.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 861b4677fc5b..8a916dc96e84 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -388,6 +388,9 @@ struct cftype {
 	struct list_head node;		/* anchored at ss->cfts */
 	struct kernfs_ops *kf_ops;
 
+	int (*open)(struct kernfs_open_file *of);
+	void (*release)(struct kernfs_open_file *of);
+
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
 	 * single integer. Use it in place of read()
-- 
cgit v1.2.3


From 5f617ebbdf10abd49312a89e3b894b927c7367f5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Dec 2016 14:49:05 -0500
Subject: cgroup: reorder css_set fields

Reorder css_set fields so that they're roughly in the order of how hot
they are.  The rough order is

1. the actual csses
2. reference counter and the default cgroup pointer.
3. task lists and iterations
4. fields used during merge including css_set lookup
5. the rest

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h | 54 ++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8a916dc96e84..3c02404cfce9 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -148,14 +148,18 @@ struct cgroup_subsys_state {
  * set for a task.
  */
 struct css_set {
-	/* Reference count */
-	atomic_t refcount;
-
 	/*
-	 * List running through all cgroup groups in the same hash
-	 * slot. Protected by css_set_lock
+	 * Set of subsystem states, one for each subsystem. This array is
+	 * immutable after creation apart from the init_css_set during
+	 * subsystem registration (at boot time).
 	 */
-	struct hlist_node hlist;
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	/* reference count */
+	atomic_t refcount;
+
+	/* the default cgroup associated with this css_set */
+	struct cgroup *dfl_cgrp;
 
 	/*
 	 * Lists running through all tasks using this cgroup group.
@@ -167,21 +171,29 @@ struct css_set {
 	struct list_head tasks;
 	struct list_head mg_tasks;
 
+	/* all css_task_iters currently walking this cset */
+	struct list_head task_iters;
+
 	/*
-	 * List of cgrp_cset_links pointing at cgroups referenced from this
-	 * css_set.  Protected by css_set_lock.
+	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * attached to an ancestor instead of the cgroup this css_set is
+	 * associated with.  The following node is anchored at
+	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+	 * iterate through all css's attached to a given cgroup.
 	 */
-	struct list_head cgrp_links;
+	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
 
-	/* the default cgroup associated with this css_set */
-	struct cgroup *dfl_cgrp;
+	/*
+	 * List running through all cgroup groups in the same hash
+	 * slot. Protected by css_set_lock
+	 */
+	struct hlist_node hlist;
 
 	/*
-	 * Set of subsystem states, one for each subsystem. This array is
-	 * immutable after creation apart from the init_css_set during
-	 * subsystem registration (at boot time).
+	 * List of cgrp_cset_links pointing at cgroups referenced from this
+	 * css_set.  Protected by css_set_lock.
 	 */
-	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+	struct list_head cgrp_links;
 
 	/*
 	 * List of csets participating in the on-going migration either as
@@ -201,18 +213,6 @@ struct css_set {
 	struct cgroup *mg_dst_cgrp;
 	struct css_set *mg_dst_cset;
 
-	/*
-	 * On the default hierarhcy, ->subsys[ssid] may point to a css
-	 * attached to an ancestor instead of the cgroup this css_set is
-	 * associated with.  The following node is anchored at
-	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
-	 * iterate through all css's attached to a given cgroup.
-	 */
-	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
-
-	/* all css_task_iters currently walking this cset */
-	struct list_head task_iters;
-
 	/* dead and being drained, ignore for migration */
 	bool dead;
 
-- 
cgit v1.2.3


From 7b4632f048415263669676dda20fd5d811c3d3e4 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Sat, 24 Dec 2016 23:28:35 +0800
Subject: cgroup: fix a comment typo

Fix a comment typo in cgroup.h.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c83c23f0577b..f6b43fbb141c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -266,7 +266,7 @@ void css_task_iter_end(struct css_task_iter *it);
  * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
  * @leader: the loop cursor
  * @dst_css: the destination css
- * @tset: takset to iterate
+ * @tset: taskset to iterate
  *
  * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
  * may not contain any.
-- 
cgit v1.2.3


From 3d48b53fb2ae37158e700ffef3f45461ff15c965 Mon Sep 17 00:00:00 2001
From: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
Date: Thu, 29 Dec 2016 21:37:21 +0100
Subject: net: dev_weight: TX/RX orthogonality

Oftenly, introducing side effects on packet processing on the other half
of the stack by adjusting one of TX/RX via sysctl is not desirable.
There are cases of demand for asymmetric, orthogonal configurability.

This holds true especially for nodes where RPS for RFS usage on top is
configured and therefore use the 'old dev_weight'. This is quite a
common base configuration setup nowadays, even with NICs of superior processing
support (e.g. aRFS).

A good example use case are nodes acting as noSQL data bases with a
large number of tiny requests and rather fewer but large packets as responses.
It's affordable to have large budget and rx dev_weights for the
requests. But as a side effect having this large a number on TX
processed in one run can overwhelm drivers.

This patch therefore introduces an independent configurability via sysctl to
userland.

Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 994f7423a74b..ecd78b3c9aba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		dev_weight_rx_bias;
+extern int		dev_weight_tx_bias;
+extern int		dev_rx_weight;
+extern int		dev_tx_weight;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
-- 
cgit v1.2.3


From f641d3b536e559dd2bae9245ec2e7d86cdf623fd Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Dec 2016 21:48:24 +0100
Subject: dma-buf: use preferred struct reference in kernel-doc

sed -e 's/\( \* .*\)struct &\([_a-z]*\)/\1\&struct \2/' -i

Originally I wasnt a friend of this style because I thought a
line-break between the "&struct" and "foo" part would break it. But a
quick test shows that " * &struct \n * foo\n" works pefectly well with
current kernel-doc. So time to mass-apply these changes!

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-4-git-send-email-daniel.vetter@ffwll.ch
---
 include/linux/dma-buf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 57828154e440..4d61fc55278b 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -278,7 +278,7 @@ struct dma_buf_ops {
  * Shared dma buffers are reference counted using dma_buf_put() and
  * get_dma_buf().
  *
- * Device DMA access is handled by the separate struct &dma_buf_attachment.
+ * Device DMA access is handled by the separate &struct dma_buf_attachment.
  */
 struct dma_buf {
 	size_t size;
@@ -355,7 +355,7 @@ struct dma_buf_export_info {
  * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
  * @name: export-info name
  *
- * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info,
+ * DEFINE_DMA_BUF_EXPORT_INFO macro defines the &struct dma_buf_export_info,
  * zeroes it out and pre-populates exp_name in it.
  */
 #define DEFINE_DMA_BUF_EXPORT_INFO(name)	\
-- 
cgit v1.2.3


From e9b4d7b56f293ed4de9ff7d16759d33492f83180 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Dec 2016 21:48:25 +0100
Subject: dma-buf: Use recommended structure member reference

I just learned that &struct_name.member_name works and looks pretty
even. It doesn't (yet) link to the member directly though, which would
be really good for big structures or vfunc tables (where the
per-member kerneldoc tends to be long).

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-5-git-send-email-daniel.vetter@ffwll.ch
---
 include/linux/dma-buf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 4d61fc55278b..bfb3704fc6fc 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -66,8 +66,8 @@ struct dma_buf_ops {
 	 * is not the case, and the allocation cannot be moved, it should also
 	 * fail the attach operation.
 	 *
-	 * Any exporter-private housekeeping data can be stored in the priv
-	 * pointer of &dma_buf_attachment structure.
+	 * Any exporter-private housekeeping data can be stored in the
+	 * &dma_buf_attachment.priv pointer.
 	 *
 	 * This callback is optional.
 	 *
@@ -106,7 +106,7 @@ struct dma_buf_ops {
 	 *
 	 * Note that any specific buffer attributes required for this function
 	 * should get added to device_dma_parameters accessible via
-	 * device->dma_params from the &dma_buf_attachment. The @attach callback
+	 * &device.dma_params from the &dma_buf_attachment. The @attach callback
 	 * should also check these constraints.
 	 *
 	 * If this is being called for the first time, the exporter can now
-- 
cgit v1.2.3


From 58ae74683ae2c07cd717a91799edb50231061938 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 19 Dec 2016 12:25:32 +0100
Subject: fscrypt: factor out bio specific functions

That way we can get rid of the direct dependency on CONFIG_BLOCK.

Fixes: d475a507457b ("ubifs: Add skeleton for fscrypto")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Gstir <david@sigma-star.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fscrypto.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index c074b670aa99..2a2815702095 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -174,11 +174,8 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
 						u64, gfp_t);
 extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
 				unsigned int, u64);
-extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
-extern void fscrypt_pullback_bio_page(struct page **, bool);
 extern void fscrypt_restore_control_page(struct page *);
-extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
-						unsigned int);
+
 /* policy.c */
 extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
 extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
@@ -201,6 +198,12 @@ extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
 			const struct fscrypt_str *, struct fscrypt_str *);
 extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
 			struct fscrypt_str *);
+
+/* bio.c */
+extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
+extern void fscrypt_pullback_bio_page(struct page **, bool);
+extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
+				 unsigned int);
 #endif
 
 /* crypto.c */
-- 
cgit v1.2.3


From e8f1cb507d01205e03f69809af4347ed8ec9db5b Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 1 Jan 2017 13:57:00 +0200
Subject: qed*: Update to dual-license

Since the submission of the qedr driver, there's inconsistency
in the licensing of the various qed/qede files - some are GPLv2
and some are dual-license.
Since qedr requires dual-license and it's dependent on both,
we're updating the licensing of all qed/qede source files.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h     | 33 +++++++++++++++++++++++++++++----
 include/linux/qed/eth_common.h     | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/iscsi_common.h   | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/qed_chain.h      | 34 +++++++++++++++++++++++++++++-----
 include/linux/qed/qed_eth_if.h     | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/qed_if.h         | 35 +++++++++++++++++++++++++++++------
 include/linux/qed/qed_iov_if.h     | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/qed_iscsi_if.h   | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/qed_ll2_if.h     | 31 +++++++++++++++++++++++++++----
 include/linux/qed/qed_roce_if.h    |  2 +-
 include/linux/qed/qede_roce.h      |  2 +-
 include/linux/qed/rdma_common.h    | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/roce_common.h    | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/storage_common.h | 32 ++++++++++++++++++++++++++++----
 include/linux/qed/tcp_common.h     | 32 ++++++++++++++++++++++++++++----
 15 files changed, 368 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 734deb094618..c33080baf38c 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -1,10 +1,35 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2016  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
+
 #ifndef _COMMON_HSI_H
 #define _COMMON_HSI_H
 #include <linux/types.h>
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 1aa0727c4136..4b402fb0eaad 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __ETH_COMMON__
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index 8f64b1223c2f..4c5747babcf6 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __ISCSI_COMMON__
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 37dfba101c6c..5cd7a4608c9b 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
- *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_CHAIN_H
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 7a52f7c58c37..d91651ecfd26 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_ETH_IF_H
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 4b454f4f5b25..d1576a2bcfc9 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1,10 +1,33 @@
 /* QLogic qed NIC Driver
- *
- * Copyright (c) 2015 QLogic Corporation
- *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_IF_H
diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h
index 5a4f8d0899e9..b1f979135f97 100644
--- a/include/linux/qed/qed_iov_if.h
+++ b/include/linux/qed/qed_iov_if.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_IOV_IF_H
diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
index d27912480cb3..f70bb81b8b6a 100644
--- a/include/linux/qed/qed_iscsi_if.h
+++ b/include/linux/qed/qed_iscsi_if.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_ISCSI_IF_H
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index fd75c265dba3..4fb4666ea879 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -1,10 +1,33 @@
 /* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * Copyright (c) 2015 QLogic Corporation
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef _QED_LL2_IF_H
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
index 53047d3fa678..f742d4312c9d 100644
--- a/include/linux/qed/qed_roce_if.h
+++ b/include/linux/qed/qed_roce_if.h
@@ -1,5 +1,5 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015-2016  QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h
index f48d64b0e2fb..3b8dd551a98c 100644
--- a/include/linux/qed/qede_roce.h
+++ b/include/linux/qed/qede_roce.h
@@ -1,5 +1,5 @@
 /* QLogic qedr NIC Driver
- * Copyright (c) 2015-2016  QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index 7663725faa94..f773aa5e746f 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __RDMA_COMMON__
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
index 2eeaf3dc6646..bad02df213df 100644
--- a/include/linux/qed/roce_common.h
+++ b/include/linux/qed/roce_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __ROCE_COMMON__
diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h
index 3b8e1efd9bc2..03f3e37ab059 100644
--- a/include/linux/qed/storage_common.h
+++ b/include/linux/qed/storage_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __STORAGE_COMMON__
diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h
index dc3889d1bbe6..46fe7856f1b2 100644
--- a/include/linux/qed/tcp_common.h
+++ b/include/linux/qed/tcp_common.h
@@ -1,9 +1,33 @@
 /* QLogic qed NIC Driver
- * Copyright (c) 2015 QLogic Corporation
+ * Copyright (c) 2015-2017  QLogic Corporation
  *
- * This software is available under the terms of the GNU General Public License
- * (GPL) Version 2, available from the file COPYING in the main directory of
- * this source tree.
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
  */
 
 #ifndef __TCP_COMMON__
-- 
cgit v1.2.3


From f29ffdb65ff0eaf95d2a2b80f0dee3fbd5a64772 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 1 Jan 2017 13:57:07 +0200
Subject: qed*: RSS indirection based on queue-handles

A step toward having qede agnostic to the queue configurations
in firmware/hardware - let the RSS indirections use queue handles
instead of actual queue indices.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_eth_if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index d91651ecfd26..3613d63cd5d0 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -77,7 +77,7 @@ struct qed_dev_eth_info {
 };
 
 struct qed_update_vport_rss_params {
-	u16	rss_ind_table[128];
+	void	*rss_ind_table[128];
 	u32	rss_key[10];
 	u8	rss_caps;
 };
-- 
cgit v1.2.3


From f990c82c385b1d9ce6acadb668df313c693cf48f Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Sun, 1 Jan 2017 13:57:08 +0200
Subject: qed*: Add support for ndo_set_vf_trust

Trusted VFs would be allowed to receive promiscuous and
multicast promiscuous data.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_iov_if.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h
index b1f979135f97..ac2e6a3199a3 100644
--- a/include/linux/qed/qed_iov_if.h
+++ b/include/linux/qed/qed_iov_if.h
@@ -53,6 +53,8 @@ struct qed_iov_hv_ops {
 
 	int (*set_rate) (struct qed_dev *cdev, int vfid,
 			 u32 min_rate, u32 max_rate);
+
+	int (*set_trust) (struct qed_dev *cdev, int vfid, bool trust);
 };
 
 #endif
-- 
cgit v1.2.3


From 31b95c9bdc20663a20b3261303c2a5fc34aae133 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Fri, 30 Dec 2016 13:56:46 +0100
Subject: net: stmmac: remove unused duplicate property snps,axi_all

For core revision 3.x Address-Aligned Beats is available in two registers.
The DT property snps,aal was created for AAL in the DMA bus register,
which is a read/write bit.
The DT property snps,axi_all was created for AXI_AAL in the AXI bus mode
register, which is a read only bit that reflects the value of AAL in the
DMA bus register.

Since the value of snps,axi_all is never used in the driver,
and since the property was created for a bit that is read only,
it should be safe to remove the property.

Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 266dab9ad782..889e0e9a3f1c 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -103,7 +103,6 @@ struct stmmac_axi {
 	u32 axi_wr_osr_lmt;
 	u32 axi_rd_osr_lmt;
 	bool axi_kbbe;
-	bool axi_axi_all;
 	u32 axi_blen[AXI_BLEN];
 	bool axi_fb;
 	bool axi_mb;
-- 
cgit v1.2.3


From 7b13558f242747db90e52fdc510441a2ed0bafba Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:38 +0200
Subject: net/mlx5: Fix offset naming for reserved fields in hca_cap_bits

Fix offset for reserved fields.

Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates")
Fixes: b4ff3a36d3e4 ("net/mlx5: Use offset based reserved field names in the IFC header file")
Fixes: 7d5e14237a55 ("net/mlx5: Update mlx5_ifc hardware features")
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 57bec544e20a..4792c856531e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -826,9 +826,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_1a9[0x2];
 	u8         local_ca_ack_delay[0x5];
 	u8         port_module_event[0x1];
-	u8         reserved_at_1b0[0x1];
+	u8         reserved_at_1b1[0x1];
 	u8         ports_check[0x1];
-	u8         reserved_at_1b2[0x1];
+	u8         reserved_at_1b3[0x1];
 	u8         disable_link_up[0x1];
 	u8         beacon_led[0x1];
 	u8         port_type[0x2];
@@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         compact_address_vector[0x1];
 	u8         striding_rq[0x1];
-	u8         reserved_at_201[0x2];
+	u8         reserved_at_202[0x2];
 	u8         ipoib_basic_offloads[0x1];
 	u8         reserved_at_205[0xa];
 	u8         drain_sigerr[0x1];
@@ -1009,10 +1009,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         rndv_offload_rc[0x1];
 	u8         rndv_offload_dc[0x1];
 	u8         log_tag_matching_list_sz[0x5];
-	u8         reserved_at_5e8[0x3];
+	u8         reserved_at_5f8[0x3];
 	u8         log_max_xrq[0x5];
 
-	u8         reserved_at_5f0[0x200];
+	u8         reserved_at_600[0x200];
 };
 
 enum mlx5_flow_destination_type {
-- 
cgit v1.2.3


From bcda1aca7712539439d53dd5351c6223e03bf6e1 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:41 +0200
Subject: net/mlx5: Support new MR features

This patch adds the following items to IFC file.

1. MLX5_MKC_ACCESS_MODE_KSM enum value for creating KSM memory keys.
KSM access mode used when indirect MKey associated with fixed memory
size entries.

2. null_mkey field that is used to indicate non-present KLM/KSM
entries, where it causes the device to generate page fault event
when trying to access it.

3. struct mlx5_ifc_cmd_hca_cap_bits capability bits indicating
related value/field is supported:
* fixed_buffer_size - MLX5_MKC_ACCESS_MODE_KSM
* umr_extended_translation_offset - translation_offset_42_16
    in UMR ctrl segment
* null_mkey - null_mkey in QUERY_SPECIAL_CONTEXTS

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4792c856531e..7c760e580268 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -782,11 +782,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
-	u8         reserved_at_108[0x1];
+	u8         fixed_buffer_size[0x1];
 	u8         log_max_mrw_sz[0x7];
 	u8         reserved_at_110[0x2];
 	u8         log_max_bsf_list_size[0x6];
-	u8         reserved_at_118[0x2];
+	u8         umr_extended_translation_offset[0x1];
+	u8         null_mkey[0x1];
 	u8         log_max_klm_list_size[0x6];
 
 	u8         reserved_at_120[0xa];
@@ -2569,6 +2570,7 @@ enum {
 	MLX5_MKC_ACCESS_MODE_PA    = 0x0,
 	MLX5_MKC_ACCESS_MODE_MTT   = 0x1,
 	MLX5_MKC_ACCESS_MODE_KLMS  = 0x2,
+	MLX5_MKC_ACCESS_MODE_KSM   = 0x3,
 };
 
 struct mlx5_ifc_mkc_bits {
@@ -3677,6 +3679,10 @@ struct mlx5_ifc_query_special_contexts_out_bits {
 	u8         dump_fill_mkey[0x20];
 
 	u8         resd_lkey[0x20];
+
+	u8         null_mkey[0x20];
+
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_query_special_contexts_in_bits {
-- 
cgit v1.2.3


From 3161625589c1d7c54e949d462f4d0c327664881a Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:42 +0200
Subject: IB/mlx5: Refactor UMR post send format

* Update struct mlx5_wqe_umr_ctrl_seg.
* Currenlty UMR send_flags aim only certain use cases: enabled/disable
  cached MR, modifying XLT for ODP. By making flags independent make UMR
  more flexible allowing arbitrary manipulations.
* Since different UMR formats have different entry sizes UMR request
  should receive exact size of translation table update instead of
  number of entries. Rename field npages to xlt_size in struct mlx5_umr_wr
  and update relevant code accordingly.
* Add support of length64 bit.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/qp.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 0aacb2a7480d..693811e0cb24 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -292,10 +292,14 @@ struct mlx5_wqe_data_seg {
 struct mlx5_wqe_umr_ctrl_seg {
 	u8		flags;
 	u8		rsvd0[3];
-	__be16		klm_octowords;
-	__be16		bsf_octowords;
+	__be16		xlt_octowords;
+	union {
+		__be16	xlt_offset;
+		__be16	bsf_octowords;
+	};
 	__be64		mkey_mask;
-	u8		rsvd1[32];
+	__be32		xlt_offset_47_16;
+	u8		rsvd1[28];
 };
 
 struct mlx5_seg_set_psv {
@@ -389,6 +393,10 @@ struct mlx5_bsf {
 	struct mlx5_bsf_inl	m_inl;
 };
 
+struct mlx5_mtt {
+	__be64		ptag;
+};
+
 struct mlx5_klm {
 	__be32		bcount;
 	__be32		key;
-- 
cgit v1.2.3


From 7d0cc6edcc7011133c45f62a7796a98b8cb5da0f Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:44 +0200
Subject: IB/mlx5: Add MR cache for large UMR regions

In this change we turn mlx5_ib_update_mtt() into generic
mlx5_ib_update_xlt() to perfrom HCA translation table modifiactions
supporting both atomic and process contexts and not limited by number
of modified entries.
Using this function we increase preallocated MRs up to 16GB.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ae55361e674..ec52f3b50bf5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -959,7 +959,7 @@ enum {
 };
 
 enum {
-	MAX_MR_CACHE_ENTRIES    = 16,
+	MAX_MR_CACHE_ENTRIES    = 21,
 };
 
 enum {
-- 
cgit v1.2.3


From 223cdc72429079aaf72511d2677b5d6584866313 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:45 +0200
Subject: net/mlx5: Update PAGE_FAULT_RESUME layout

Update PAGE_FAULT_RESUME command layout.

Three bit fields describing page fault: rdma, rdma_write, req_res gave 8
possible combinations, while only a few were legal. Now they
are interpreted as three-bit type field, where former legal
combinations turns into corresponding types and unused were added as new
types.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 7c760e580268..608dc988b3d6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4775,12 +4775,11 @@ struct mlx5_ifc_page_fault_resume_in_bits {
 
 	u8         error[0x1];
 	u8         reserved_at_41[0x4];
-	u8         rdma[0x1];
-	u8         read_write[0x1];
-	u8         req_res[0x1];
-	u8         qpn[0x18];
+	u8         page_fault_type[0x3];
+	u8         wq_number[0x18];
 
-	u8         reserved_at_60[0x20];
+	u8         reserved_at_60[0x8];
+	u8         token[0x18];
 };
 
 struct mlx5_ifc_nop_out_bits {
-- 
cgit v1.2.3


From d9aaed838765e28234cb700c7d1ac975cadf28c9 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:46 +0200
Subject: {net,IB}/mlx5: Refactor page fault handling

* Update page fault event according to last specification.
* Separate code path for page fault EQ, completion EQ and async EQ.
* Move page fault handling work queue from mlx5_ib static variable
  into mlx5_core page fault EQ.
* Allocate memory to store ODP event dynamically as the
  events arrive, since in atomic context - use mempool.
* Make mlx5_ib page fault handler run in process context.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h |  6 ++-
 include/linux/mlx5/driver.h | 97 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mlx5/qp.h     | 44 --------------------
 3 files changed, 96 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9f489365b3d3..3ccaeff15a80 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
 			__be16  wqe_index;
 			u16	reserved2;
 			__be16  packet_length;
-			u8	reserved3[12];
+			__be32  token;
+			u8	reserved4[8];
+			__be32  pftype_wq;
 		} __packed wqe;
 		struct {
 			__be32  r_key;
@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
 			__be16  packet_length;
 			__be32  rdma_op_len;
 			__be64  rdma_va;
+			__be32  pftype_token;
 		} __packed rdma;
 	} __packed;
-	__be32 flags_qpn;
 } __packed;
 
 struct mlx5_eqe_vport_change {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ec52f3b50bf5..b52d07491fe7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -42,6 +42,7 @@
 #include <linux/vmalloc.h>
 #include <linux/radix-tree.h>
 #include <linux/workqueue.h>
+#include <linux/mempool.h>
 #include <linux/interrupt.h>
 
 #include <linux/mlx5/device.h>
@@ -83,6 +84,7 @@ enum {
 	MLX5_EQ_VEC_PAGES	 = 0,
 	MLX5_EQ_VEC_CMD		 = 1,
 	MLX5_EQ_VEC_ASYNC	 = 2,
+	MLX5_EQ_VEC_PFAULT	 = 3,
 	MLX5_EQ_VEC_COMP_BASE,
 };
 
@@ -178,6 +180,14 @@ enum mlx5_port_status {
 	MLX5_PORT_DOWN      = 2,
 };
 
+enum mlx5_eq_type {
+	MLX5_EQ_TYPE_COMP,
+	MLX5_EQ_TYPE_ASYNC,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	MLX5_EQ_TYPE_PF,
+#endif
+};
+
 struct mlx5_uuar_info {
 	struct mlx5_uar	       *uars;
 	int			num_uars;
@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
 	spinlock_t lock;
 };
 
+struct mlx5_eq_pagefault {
+	struct work_struct       work;
+	/* Pagefaults lock */
+	spinlock_t		 lock;
+	struct workqueue_struct *wq;
+	mempool_t		*pool;
+};
+
 struct mlx5_eq {
 	struct mlx5_core_dev   *dev;
 	__be32 __iomem	       *doorbell;
@@ -346,7 +364,13 @@ struct mlx5_eq {
 	struct list_head	list;
 	int			index;
 	struct mlx5_rsc_debug	*dbg;
-	struct mlx5_eq_tasklet	tasklet_ctx;
+	enum mlx5_eq_type	type;
+	union {
+		struct mlx5_eq_tasklet   tasklet_ctx;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+		struct mlx5_eq_pagefault pf_ctx;
+#endif
+	};
 };
 
 struct mlx5_core_psv {
@@ -377,6 +401,8 @@ struct mlx5_core_mkey {
 	u32			pd;
 };
 
+#define MLX5_24BIT_MASK		((1 << 24) - 1)
+
 enum mlx5_res_type {
 	MLX5_RES_QP	= MLX5_EVENT_QUEUE_TYPE_QP,
 	MLX5_RES_RQ	= MLX5_EVENT_QUEUE_TYPE_RQ,
@@ -411,6 +437,9 @@ struct mlx5_eq_table {
 	struct mlx5_eq		pages_eq;
 	struct mlx5_eq		async_eq;
 	struct mlx5_eq		cmd_eq;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct mlx5_eq		pfault_eq;
+#endif
 	int			num_comp_vectors;
 	/* protect EQs list
 	 */
@@ -497,6 +526,7 @@ struct mlx5_fc_stats {
 
 struct mlx5_eswitch;
 struct mlx5_lag;
+struct mlx5_pagefault;
 
 struct mlx5_rl_entry {
 	u32                     rate;
@@ -601,6 +631,14 @@ struct mlx5_priv {
 	struct mlx5_rl_table            rl_table;
 
 	struct mlx5_port_module_event_stats  pme_stats;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	void		      (*pfault)(struct mlx5_core_dev *dev,
+					void *context,
+					struct mlx5_pagefault *pfault);
+	void		       *pfault_ctx;
+	struct srcu_struct      pfault_srcu;
+#endif
 };
 
 enum mlx5_device_state {
@@ -619,6 +657,50 @@ enum mlx5_pci_status {
 	MLX5_PCI_STATUS_ENABLED,
 };
 
+enum mlx5_pagefault_type_flags {
+	MLX5_PFAULT_REQUESTOR = 1 << 0,
+	MLX5_PFAULT_WRITE     = 1 << 1,
+	MLX5_PFAULT_RDMA      = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+	u32			bytes_committed;
+	u32			token;
+	u8			event_subtype;
+	u8			type;
+	union {
+		/* Initiator or send message responder pagefault details. */
+		struct {
+			/* Received packet size, only valid for responders. */
+			u32	packet_size;
+			/*
+			 * Number of resource holding WQE, depends on type.
+			 */
+			u32	wq_num;
+			/*
+			 * WQE index. Refers to either the send queue or
+			 * receive queue, according to event_subtype.
+			 */
+			u16	wqe_index;
+		} wqe;
+		/* RDMA responder pagefault details */
+		struct {
+			u32	r_key;
+			/*
+			 * Received packet size, minimal size page fault
+			 * resolution required for forward progress.
+			 */
+			u32	packet_size;
+			u32	rdma_op_len;
+			u64	rdma_va;
+		} rdma;
+	};
+
+	struct mlx5_eq	       *eq;
+	struct work_struct	work;
+};
+
 struct mlx5_td {
 	struct list_head tirs_list;
 	u32              tdn;
@@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
-#endif
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name, struct mlx5_uar *uar);
+		       int nent, u64 mask, const char *name,
+		       struct mlx5_uar *uar, enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
 int mlx5_stop_eqs(struct mlx5_core_dev *dev);
@@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 			struct mlx5_odp_caps *odp_caps);
 int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 			     u8 port_num, void *out, size_t sz);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
+				u32 wq_num, u8 type, int error);
+#endif
 
 int mlx5_init_rl_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
@@ -974,6 +1058,9 @@ struct mlx5_interface {
 	void			(*detach)(struct mlx5_core_dev *dev, void *context);
 	void			(*event)(struct mlx5_core_dev *dev, void *context,
 					 enum mlx5_dev_event event, unsigned long param);
+	void			(*pfault)(struct mlx5_core_dev *dev,
+					  void *context,
+					  struct mlx5_pagefault *pfault);
 	void *                  (*get_dev)(void *context);
 	int			protocol;
 	struct list_head	list;
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 693811e0cb24..9ed775f5cb66 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,9 +50,6 @@
 #define MLX5_BSF_APPTAG_ESCAPE	0x1
 #define MLX5_BSF_APPREF_ESCAPE	0x2
 
-#define MLX5_QPN_BITS		24
-#define MLX5_QPN_MASK		((1 << MLX5_QPN_BITS) - 1)
-
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
 	MLX5_QP_OPTPAR_RRE			= 1 << 1,
@@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg {
 	__be16		num_entries;
 };
 
-enum mlx5_pagefault_flags {
-	MLX5_PFAULT_REQUESTOR = 1 << 0,
-	MLX5_PFAULT_WRITE     = 1 << 1,
-	MLX5_PFAULT_RDMA      = 1 << 2,
-};
-
-/* Contains the details of a pagefault. */
-struct mlx5_pagefault {
-	u32			bytes_committed;
-	u8			event_subtype;
-	enum mlx5_pagefault_flags flags;
-	union {
-		/* Initiator or send message responder pagefault details. */
-		struct {
-			/* Received packet size, only valid for responders. */
-			u32	packet_size;
-			/*
-			 * WQE index. Refers to either the send queue or
-			 * receive queue, according to event_subtype.
-			 */
-			u16	wqe_index;
-		} wqe;
-		/* RDMA responder pagefault details */
-		struct {
-			u32	r_key;
-			/*
-			 * Received packet size, minimal size page fault
-			 * resolution required for forward progress.
-			 */
-			u32	packet_size;
-			u32	rdma_op_len;
-			u64	rdma_va;
-		} rdma;
-	};
-};
-
 struct mlx5_core_qp {
 	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
-	void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
@@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
-				u8 context, int error);
-#endif
 int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *rq);
 void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From 17d2f88f92ce39b348f125f6b2e6eeb6b0906ac7 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:47 +0200
Subject: IB/mlx5: Add ODP atomics support

Handle ODP atomic operations. When initiator of RDMA atomic
operation use ODP MR to provide source data handle pagefault properly.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h |  2 +-
 include/linux/mlx5/qp.h       | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 608dc988b3d6..15f896781966 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -328,7 +328,7 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
 	u8         receive[0x1];
 	u8         write[0x1];
 	u8         read[0x1];
-	u8         reserved_at_4[0x1];
+	u8         atomic[0x1];
 	u8         srq_receive[0x1];
 	u8         reserved_at_6[0x1a];
 };
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9ed775f5cb66..219c699c17b7 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -212,6 +212,7 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_OPCODE_MASK 0xff
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+#define MLX5_WQE_AV_EXT 0x80000000
 
 enum {
 	MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,
@@ -242,6 +243,23 @@ struct mlx5_wqe_masked_atomic_seg {
 	__be64			compare_mask;
 };
 
+struct mlx5_base_av {
+	union {
+		struct {
+			__be32	qkey;
+			__be32	reserved;
+		} qkey;
+		__be64	dc_key;
+	} key;
+	__be32	dqp_dct;
+	u8	stat_rate_sl;
+	u8	fl_mlid;
+	union {
+		__be16	rlid;
+		__be16  udp_sport;
+	};
+};
+
 struct mlx5_av {
 	union {
 		struct {
-- 
cgit v1.2.3


From aa8e08d2f523501c40b0e70f1c4ecacb97195931 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Mon, 2 Jan 2017 11:37:48 +0200
Subject: IB/mlx5: Improve MR check

Add "type" field to mlx5_core MKEY struct.
Check whether page fault happens on MKEY corresponding to MR.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/driver.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b52d07491fe7..cfa49bca009c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -394,11 +394,17 @@ struct mlx5_core_sig_ctx {
 	u32			sigerr_count;
 };
 
+enum {
+	MLX5_MKEY_MR = 1,
+	MLX5_MKEY_MW,
+};
+
 struct mlx5_core_mkey {
 	u64			iova;
 	u64			size;
 	u32			key;
 	u32			pd;
+	u32			type;
 };
 
 #define MLX5_24BIT_MASK		((1 << 24) - 1)
-- 
cgit v1.2.3


From 8e4881aa1d5d2f9c7ebfd0fe5e138f0cc345832c Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 1 Jan 2017 19:02:45 +0100
Subject: net: mdio: add mdio45_ethtool_ksettings_get

There is a function in mdio for the old ethtool api gset.
We add a new function mdio45_ethtool_ksettings_get for the
new ethtool api glinksettings.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index bf9d1d750693..b6587a4b32e7 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -130,6 +130,10 @@ extern int mdio45_nway_restart(const struct mdio_if_info *mdio);
 extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 				      struct ethtool_cmd *ecmd,
 				      u32 npage_adv, u32 npage_lpa);
+extern void
+mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio,
+				   struct ethtool_link_ksettings *cmd,
+				   u32 npage_adv, u32 npage_lpa);
 
 /**
  * mdio45_ethtool_gset - get settings for ETHTOOL_GSET
@@ -147,6 +151,23 @@ static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio,
 	mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0);
 }
 
+/**
+ * mdio45_ethtool_ksettings_get - get settings for ETHTOOL_GLINKSETTINGS
+ * @mdio: MDIO interface
+ * @cmd: Ethtool request structure
+ *
+ * Since the CSRs for auto-negotiation using next pages are not fully
+ * standardised, this function does not attempt to decode them.  Use
+ * mdio45_ethtool_ksettings_get_npage() to specify advertisement bits
+ * from next pages.
+ */
+static inline void
+mdio45_ethtool_ksettings_get(const struct mdio_if_info *mdio,
+			     struct ethtool_link_ksettings *cmd)
+{
+	mdio45_ethtool_ksettings_get_npage(mdio, cmd, 0, 0);
+}
+
 extern int mdio_mii_ioctl(const struct mdio_if_info *mdio,
 			  struct mii_ioctl_data *mii_data, int cmd);
 
-- 
cgit v1.2.3


From 7b73305160f11b633e9801b2c6b83d5b0cb867cc Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Fri, 16 Dec 2016 18:45:21 +0100
Subject: module: Drop redundant declaration of struct module

Struct module is already declared at the beginning of the file, no
need to declare it again.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: 93c2e105f6bc ("module: Optimize __module_address() using a latched RB-tree")
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/module.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..ef599379f9a4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -282,8 +282,6 @@ enum module_state {
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
-struct module;
-
 struct mod_tree_node {
 	struct module *mod;
 	struct latch_tree_node node;
-- 
cgit v1.2.3


From 8afda8b26d01ee26a60ef2f0284a7f01a5ed96f8 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 28 Nov 2016 15:06:24 +0300
Subject: spi-nor: Add support for Intel SPI serial flash controller

Add support for the SPI serial flash host controller found on many Intel
CPUs including Baytrail and Braswell. The SPI serial flash controller is
used to access BIOS and other platform specific information. By default the
driver exposes a single read-only MTD device but with a module parameter
"writeable=1" the MTD device can be made read-write which makes it possible
to upgrade BIOS directly from Linux.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/platform_data/intel-spi.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 include/linux/platform_data/intel-spi.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h
new file mode 100644
index 000000000000..942b0c3f8f08
--- /dev/null
+++ b/include/linux/platform_data/intel-spi.h
@@ -0,0 +1,31 @@
+/*
+ * Intel PCH/PCU SPI flash driver.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef INTEL_SPI_PDATA_H
+#define INTEL_SPI_PDATA_H
+
+enum intel_spi_type {
+	INTEL_SPI_BYT = 1,
+	INTEL_SPI_LPT,
+	INTEL_SPI_BXT,
+};
+
+/**
+ * struct intel_spi_boardinfo - Board specific data for Intel SPI driver
+ * @type: Type which this controller is compatible with
+ * @writeable: The chip is writeable
+ */
+struct intel_spi_boardinfo {
+	enum intel_spi_type type;
+	bool writeable;
+};
+
+#endif /* INTEL_SPI_PDATA_H */
-- 
cgit v1.2.3


From ff00d7a32a1b88b772981a13fc198e0d29300666 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 28 Nov 2016 15:06:25 +0300
Subject: mfd: lpc_ich: Add support for SPI serial flash host controller

Many Intel CPUs including Haswell, Broadwell and Baytrail have SPI serial
flash host controller as part of the LPC device. This will populate an MFD
cell suitable for the SPI host controller driver if we know that the LPC
device has one.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/lpc_ich.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index 2b300b44f994..fba8fcb54f8c 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -20,6 +20,8 @@
 #ifndef LPC_ICH_H
 #define LPC_ICH_H
 
+#include <linux/platform_data/intel-spi.h>
+
 /* GPIO resources */
 #define ICH_RES_GPIO	0
 #define ICH_RES_GPE0	1
@@ -40,6 +42,7 @@ struct lpc_ich_info {
 	char name[32];
 	unsigned int iTCO_version;
 	unsigned int gpio_version;
+	enum intel_spi_type spi_type;
 	u8 use_gpio;
 };
 
-- 
cgit v1.2.3


From b31ef8285b19ec5563274c574fcfe7a5993125ce Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 21 Dec 2016 09:47:03 -0800
Subject: thermal core: convert ID allocation to IDA

The thermal core does not use the ability to look up pointers by ID, so
convert it from using an IDR to the more space-efficient IDA.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e275e98bdceb..dab11f97e1c6 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -194,7 +194,7 @@ struct thermal_attr {
  * @governor:	pointer to the governor for this thermal zone
  * @governor_data:	private pointer for governor data
  * @thermal_instances:	list of &struct thermal_instance of this thermal zone
- * @idr:	&struct idr to generate unique id for this zone's cooling
+ * @ida:	&struct ida to generate unique id for this zone's cooling
  *		devices
  * @lock:	lock to protect thermal_instances list
  * @node:	node in thermal_tz_list (in thermal_core.c)
@@ -227,7 +227,7 @@ struct thermal_zone_device {
 	struct thermal_governor *governor;
 	void *governor_data;
 	struct list_head thermal_instances;
-	struct idr idr;
+	struct ida ida;
 	struct mutex lock;
 	struct list_head node;
 	struct delayed_work poll_queue;
-- 
cgit v1.2.3


From fe2858c8c6d167df33a839591ebe63ea05a69d06 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 4 Jan 2017 09:39:52 +0100
Subject: pwm: Remove pwm_can_sleep()

The last user of this function has been removed, so it is no longer
needed.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 2c6c5114c089..e15fd3ce6502 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -451,8 +451,6 @@ struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
 struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
 				   const char *con_id);
 void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
-
-bool pwm_can_sleep(struct pwm_device *pwm);
 #else
 static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
 {
@@ -566,11 +564,6 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
 static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 {
 }
-
-static inline bool pwm_can_sleep(struct pwm_device *pwm)
-{
-	return false;
-}
 #endif
 
 static inline void pwm_apply_args(struct pwm_device *pwm)
-- 
cgit v1.2.3


From 8c0216f377406c7613b67bd18755889026284192 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 4 Jan 2017 09:40:54 +0100
Subject: pwm: Remove .can_sleep from struct pwm_chip

All PWM devices have been marked as "might sleep" since v4.5, there is
no longer a need to differentiate on a per-chip basis.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index e15fd3ce6502..eae215ef1b2c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -287,8 +287,6 @@ struct pwm_ops {
  * @pwms: array of PWM devices allocated by the framework
  * @of_xlate: request a PWM device given a device tree PWM specifier
  * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier
- * @can_sleep: must be true if the .config(), .enable() or .disable()
- *             operations may sleep
  */
 struct pwm_chip {
 	struct device *dev;
@@ -302,7 +300,6 @@ struct pwm_chip {
 	struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
 					const struct of_phandle_args *args);
 	unsigned int of_pwm_n_cells;
-	bool can_sleep;
 };
 
 /**
-- 
cgit v1.2.3


From 1ff8cebf49ed9e9ca2ae44b5c4176aef9c21af9c Mon Sep 17 00:00:00 2001
From: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Date: Tue, 3 Jan 2017 20:42:17 +0800
Subject: scm: remove use CMSG{_COMPAT}_ALIGN(sizeof(struct {compat_}cmsghdr))

sizeof(struct cmsghdr) and sizeof(struct compat_cmsghdr) already aligned.
remove use CMSG_ALIGN(sizeof(struct cmsghdr)) and
CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)) keep code consistent.

Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index b5cc5a6d7011..c06438023d79 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -92,9 +92,9 @@ struct cmsghdr {
 
 #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
 
-#define CMSG_DATA(cmsg)	((void *)((char *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr))))
-#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len))
-#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len))
+#define CMSG_DATA(cmsg)	((void *)((char *)(cmsg) + sizeof(struct cmsghdr)))
+#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len))
+#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len))
 
 #define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \
 				  (struct cmsghdr *)(ctl) : \
-- 
cgit v1.2.3


From 63971c5682bf89e15083733dcd7b4610e789e843 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 2 Jan 2017 13:44:29 +0200
Subject: spi: pxa2xx: fix indentation of the comments in header

Just for sake of readability fix the indentation of the comments in
pxa2xx_ssp.h header file.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 2d6f0c39ed68..a0522328d7aa 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -90,9 +90,9 @@
 #define SSSR_RFL_MASK	(0xf << 12)	/* Receive FIFO Level mask */
 
 #define SSCR1_TFT	(0x000003c0)	/* Transmit FIFO Threshold (mask) */
-#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */
+#define SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..16] */
 #define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
-#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
+#define SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..16] */
 
 #define RX_THRESH_CE4100_DFLT	2
 #define TX_THRESH_CE4100_DFLT	2
@@ -106,9 +106,9 @@
 #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..4] */
 
 /* QUARK_X1000 SSCR0 bit definition */
-#define QUARK_X1000_SSCR0_DSS	(0x1F)		/* Data Size Select (mask) */
-#define QUARK_X1000_SSCR0_DataSize(x)  ((x) - 1)	/* Data Size Select [4..32] */
-#define QUARK_X1000_SSCR0_FRF	(0x3 << 5)	/* FRame Format (mask) */
+#define QUARK_X1000_SSCR0_DSS		(0x1F << 0)	/* Data Size Select (mask) */
+#define QUARK_X1000_SSCR0_DataSize(x)	((x) - 1)	/* Data Size Select [4..32] */
+#define QUARK_X1000_SSCR0_FRF		(0x3 << 5)	/* FRame Format (mask) */
 #define QUARK_X1000_SSCR0_Motorola	(0x0 << 5)	/* Motorola's Serial Peripheral Interface (SPI) */
 
 #define RX_THRESH_QUARK_X1000_DFLT	1
@@ -121,8 +121,8 @@
 #define QUARK_X1000_SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..32] */
 #define QUARK_X1000_SSCR1_RFT	(0x1F << 11)	/* Receive FIFO Threshold (mask) */
 #define QUARK_X1000_SSCR1_RxTresh(x) (((x) - 1) << 11)	/* level [1..32] */
-#define QUARK_X1000_SSCR1_STRF       (1 << 17)		/* Select FIFO or EFWR */
-#define QUARK_X1000_SSCR1_EFWR	(1 << 16)		/* Enable FIFO Write/Read */
+#define QUARK_X1000_SSCR1_STRF	(1 << 17)	/* Select FIFO or EFWR */
+#define QUARK_X1000_SSCR1_EFWR	(1 << 16)	/* Enable FIFO Write/Read */
 
 /* extra bits in PXA255, PXA26x and PXA27x SSP ports */
 #define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
-- 
cgit v1.2.3


From eac53b3664f592713655f5de59dc44bdd0cfc0bd Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 21 Dec 2016 15:36:47 +0100
Subject: power: supply: axp288_charger: Drop platform_data dependency

When the axp288_charger driver was originally merged, it was merged with
a dependency on some other driver providing platform data for it.

However the battery-data-framework which should provide that data never
got merged, so the axp288_charger as merged upstream has never worked,
its probe method simply always returns -ENODEV.

This commit removes the dependency on the platform_data instead reading
back the charging current and charging voltage that the firmware has set
and using those values as the maximum values the user may set.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/mfd/axp20x.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index a4860bc9b73d..e460fc42d63e 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -554,13 +554,6 @@ struct axp20x_fg_pdata {
 	int thermistor_curve[MAX_THERM_CURVE_SIZE][2];
 };
 
-struct axp20x_chrg_pdata {
-	int max_cc;
-	int max_cv;
-	int def_cc;
-	int def_cv;
-};
-
 struct axp288_extcon_pdata {
 	/* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */
 	struct gpio_desc *gpio_mux_cntl;
-- 
cgit v1.2.3


From 888f97435a856c2c5c6ca0b3337b68d595b5639e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 14 Dec 2016 17:38:53 +0100
Subject: power: supply: axp288_fuel_gauge: Drop platform_data dependency

When the axp288_faul_gauge driver was originally merged, it was
merged with a dependency on some other driver providing platform
data for it.

However the battery-data-framework which should provide that data
never got merged, resulting in x86 tablets / laptops with an axp288
having no working battery monitor, as before this commit the driver
would simply return -ENODEV if there is no platform data.

This commit removes the dependency on the platform_data instead
checking that the firmware has initialized the fuel-gauge and
reading the info back from the pmic.

What is missing from the read-back info is the table to map raw adc
values to temperature, so this commit drops the temperature and
temperature limits properties. The min voltage, charge design and
model name info is also missing. Note that none of these are really
important for userspace to have.

All other functionality is preserved and actually made available
by this commit.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=88471
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/mfd/axp20x.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index e460fc42d63e..812806d6319b 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -532,28 +532,6 @@ struct axp20x_dev {
 	const struct regmap_irq_chip	*regmap_irq_chip;
 };
 
-#define BATTID_LEN				64
-#define OCV_CURVE_SIZE			32
-#define MAX_THERM_CURVE_SIZE	25
-#define PD_DEF_MIN_TEMP			0
-#define PD_DEF_MAX_TEMP			55
-
-struct axp20x_fg_pdata {
-	char battid[BATTID_LEN + 1];
-	int design_cap;
-	int min_volt;
-	int max_volt;
-	int max_temp;
-	int min_temp;
-	int cap1;
-	int cap0;
-	int rdc1;
-	int rdc0;
-	int ocv_curve[OCV_CURVE_SIZE];
-	int tcsz;
-	int thermistor_curve[MAX_THERM_CURVE_SIZE][2];
-};
-
 struct axp288_extcon_pdata {
 	/* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */
 	struct gpio_desc *gpio_mux_cntl;
-- 
cgit v1.2.3


From 5952758101fb55844957a8d4fe88402d9827cfb4 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 4 Jan 2017 19:56:24 +0100
Subject: dsa: mv88e6xxx: Optimise atu_get

Lookup in the ATU can be performed starting from a given MAC
address. This is faster than starting with the first possible MAC
address and iterating all entries.

Entries are returned in numeric order. So if the MAC address returned
is bigger than what we are searching for, we know it is not in the
ATU.

Using the benchmark provided by Volodymyr Bendiuga
<volodymyr.bendiuga@gmail.com>,

https://www.spinics.net/lists/netdev/msg411550.html

on an Marvell Armada 370 RD, the test to add a number of static fdb
entries went from 1.616531 seconds to 0.312052 seconds.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 60 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 6fec9e81bd70..42add77ae47d 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -396,6 +396,66 @@ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2,
 	return true;
 }
 
+/**
+ * ether_addr_to_u64 - Convert an Ethernet address into a u64 value.
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return a u64 value of the address
+ */
+static inline u64 ether_addr_to_u64(const u8 *addr)
+{
+	u64 u = 0;
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		u = u << 8 | addr[i];
+
+	return u;
+}
+
+/**
+ * u64_to_ether_addr - Convert a u64 to an Ethernet address.
+ * @u: u64 to convert to an Ethernet MAC address
+ * @addr: Pointer to a six-byte array to contain the Ethernet address
+ */
+static inline void u64_to_ether_addr(u64 u, u8 *addr)
+{
+	int i;
+
+	for (i = ETH_ALEN - 1; i >= 0; i--) {
+		addr[i] = u & 0xff;
+		u = u >> 8;
+	}
+}
+
+/**
+ * eth_addr_dec - Decrement the given MAC address
+ *
+ * @addr: Pointer to a six-byte array containing Ethernet address to decrement
+ */
+static inline void eth_addr_dec(u8 *addr)
+{
+	u64 u = ether_addr_to_u64(addr);
+
+	u--;
+	u64_to_ether_addr(u, addr);
+}
+
+/**
+ * ether_addr_greater - Compare two Ethernet addresses
+ * @addr1: Pointer to a six-byte array containing the Ethernet address
+ * @addr2: Pointer other six-byte array containing the Ethernet address
+ *
+ * Compare two Ethernet addresses, returns true addr1 is greater than addr2
+ */
+static inline bool ether_addr_greater(const u8 *addr1, const u8 *addr2)
+{
+	u64 u1 = ether_addr_to_u64(addr1);
+	u64 u2 = ether_addr_to_u64(addr2);
+
+	return u1 > u2;
+}
+
 /**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
-- 
cgit v1.2.3


From 3db5e3e707ebb9ab0ce3a2dcd924ed2ea525d770 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Jan 2017 13:37:28 +0100
Subject: wireless: move IEEE80211_NUM_ACS to ieee80211.h

This constant isn't really specific to mac80211, so move it
"up" a level to ieee80211.h

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fe849329511a..87d1937e4671 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -185,6 +185,8 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 
 /* number of user priorities 802.11 uses */
 #define IEEE80211_NUM_UPS		8
+/* number of ACs */
+#define IEEE80211_NUM_ACS		4
 
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
-- 
cgit v1.2.3


From 0e377f3b9ae936aefe5aaca4c2e2546d57b63df7 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Thu, 5 Jan 2017 18:24:04 +0800
Subject: iio: Add gravity sensor support

Gravity sensor is a soft sensor, which derives value from
standard accelerometer device by filtering out the acceleration
which is not caused by gravity.

Gravity sensor provides a three dimensional vector indicating
the direction and magnitude of gravity. Typically, this sensor
is used to determine the device's relative orientation in space.
The units and the coordinate system is the same as the one used by
the acceleration sensor.
When a device is at rest, the output of the gravity sensor should
be identical to that of the accelerometer.

More information can be found in:
http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf

Gravity sensor and accelerometer have similar channels and
share channel usage ids. So the most of the code for accel_3d
can be reused.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/hid-sensor-ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 9a3a9db1b39d..30c7dc45e45f 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -52,6 +52,9 @@
 #define HID_USAGE_SENSOR_ANGL_VELOCITY_Y_AXIS			0x200458
 #define HID_USAGE_SENSOR_ANGL_VELOCITY_Z_AXIS			0x200459
 
+/* Gravity vector */
+#define HID_USAGE_SENSOR_GRAVITY_VECTOR				0x20007B
+
 /* ORIENTATION: Compass 3D: (200083) */
 #define HID_USAGE_SENSOR_COMPASS_3D				0x200083
 #define HID_USAGE_SENSOR_DATA_ORIENTATION			0x200470
-- 
cgit v1.2.3


From 210af2a5f12403a8968d6014742886cc7e9823b4 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 5 Jan 2017 10:52:10 -0600
Subject: ipmi: make ipmi_usr_hndl const

It's only function pointers.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/linux/ipmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 78c5d5ae3857..f1045b2c6a00 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -100,7 +100,7 @@ struct ipmi_user_hndl {
 
 /* Create a new user of the IPMI layer on the given interface number. */
 int ipmi_create_user(unsigned int          if_num,
-		     struct ipmi_user_hndl *handler,
+		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
 		     ipmi_user_t           *user);
 
-- 
cgit v1.2.3


From 4e552c8cb5bc9137e67e035bab8df6dddbca7384 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi.shyti@samsung.com>
Date: Thu, 5 Jan 2017 11:34:12 +0900
Subject: leds: add LED_ON brightness as boolean value

Some devices do not handle the led brightness or simply don't
care about it. Conceptually said devices want to just switch on
or off the led. It is useless in this case to have a 255 range
of brightness, while just having an LED_ON and LED_OFF improves
the boolean meaning of the led status.

Signed-off-by: Andi Shyti <andi.shyti@samsung.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 569cb531094c..bb50d0151e75 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -27,6 +27,7 @@ struct device;
 
 enum led_brightness {
 	LED_OFF		= 0,
+	LED_ON		= 1,
 	LED_HALF	= 127,
 	LED_FULL	= 255,
 };
-- 
cgit v1.2.3


From eca90a3b3226fcecb4b3bbf4b0b6ff72422674bc Mon Sep 17 00:00:00 2001
From: Alexander Alemayhu <alexander@alemayhu.com>
Date: Thu, 5 Jan 2017 22:11:50 +0100
Subject: EDAC: Fix typos in enum mem_type comments

s/labed/labeled/
s/differenciate/differentiate/

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170105211150.24003-1-alexander@alemayhu.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 include/linux/edac.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 07c52c0af62d..5b6adf964248 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -190,8 +190,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  *			part of the memory details to the memory controller.
  * @MEM_RMBS:		Rambus DRAM, used on a few Pentium III/IV controllers.
  * @MEM_DDR2:		DDR2 RAM, as described at JEDEC JESD79-2F.
- *			Those memories are labed as "PC2-" instead of "PC" to
- *			differenciate from DDR.
+ *			Those memories are labeled as "PC2-" instead of "PC" to
+ *			differentiate from DDR.
  * @MEM_FB_DDR2:	Fully-Buffered DDR2, as described at JEDEC Std No. 205
  *			and JESD206.
  *			Those memories are accessed per DIMM slot, and not by
-- 
cgit v1.2.3


From 69d707d034b6078f0b5998f80e5883c8243b205c Mon Sep 17 00:00:00 2001
From: "Karicheri, Muralidharan" <m-karicheri2@ti.com>
Date: Fri, 6 Jan 2017 15:37:39 -0500
Subject: net: netcp: extract eflag from desc for rx_hook handling

Extract the eflag bits from the received desc and pass it down
the rx_hook chain to be available for netcp modules. Also the
psdata and epib data has to be inspected by the netcp modules.
So the desc can be freed only after returning from the rx_hook.
So move knav_pool_desc_put() after the rx_hook processing.

Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/soc/ti/knav_dma.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 35cb9264e0d5..2b7882666ef6 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -41,6 +41,8 @@
 #define KNAV_DMA_DESC_RETQ_SHIFT		0
 #define KNAV_DMA_DESC_RETQ_MASK			MASK(14)
 #define KNAV_DMA_DESC_BUF_LEN_MASK		MASK(22)
+#define KNAV_DMA_DESC_EFLAGS_MASK		MASK(4)
+#define KNAV_DMA_DESC_EFLAGS_SHIFT		20
 
 #define KNAV_DMA_NUM_EPIB_WORDS			4
 #define KNAV_DMA_NUM_PS_WORDS			16
-- 
cgit v1.2.3


From f099d616dd689d27b08dec95d0b6f1f302cf8ec4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 5 Jan 2017 12:01:30 -0800
Subject: fscrypt: remove unused 'mode' member of fscrypt_ctx

Nothing reads or writes fscrypt_ctx.mode, and it doesn't belong there
because a fscrypt_ctx is not tied to a specific encryption mode.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fscrypto.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 2a2815702095..8635ea46ef6e 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -35,7 +35,6 @@ struct fscrypt_ctx {
 		struct list_head free_list;	/* Free list */
 	};
 	u8 flags;				/* Flags */
-	u8 mode;				/* Encryption mode for tfm */
 };
 
 /**
-- 
cgit v1.2.3


From a5d431eff2e0bb22156897435aa277ddc96074f7 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 5 Jan 2017 13:51:18 -0800
Subject: fscrypt: make fscrypt_operations.key_prefix a string

There was an unnecessary amount of complexity around requesting the
filesystem-specific key prefix.  It was unclear why; perhaps it was
envisioned that different instances of the same filesystem type could
use different key prefixes, or that key prefixes could be binary.
However, neither of those things were implemented or really make sense
at all.  So simplify the code by making key_prefix a const char *.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fscrypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index 8635ea46ef6e..715f17b3c6d7 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -85,8 +85,8 @@ struct fscrypt_name {
  */
 struct fscrypt_operations {
 	unsigned int flags;
+	const char *key_prefix;
 	int (*get_context)(struct inode *, void *, size_t);
-	int (*key_prefix)(struct inode *, u8 **);
 	int (*prepare_context)(struct inode *);
 	int (*set_context)(struct inode *, const void *, size_t, void *);
 	int (*dummy_context)(struct inode *);
-- 
cgit v1.2.3


From 2f5ff26478adaff5ed9b7ad4079d6a710b5f27e7 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:21 +0200
Subject: mlx5: Fix naming convention with respect to UARs

This establishes a solid naming conventions for UARs. A UAR (User Access
Region) can have size identical to a system page or can be fixed 4KB
depending on a value queried by firmware. Each UAR always has 4 blue
flame register which are used to post doorbell to send queue. In
addition, a UAR has section used for posting doorbells to CQs or EQs. In
this patch we change names to reflect this conventions.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h |  9 +++++----
 include/linux/mlx5/driver.h | 14 +++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 3ccaeff15a80..aa851c51ab59 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -212,10 +212,11 @@ enum {
 };
 
 enum {
-	MLX5_BF_REGS_PER_PAGE		= 4,
-	MLX5_MAX_UAR_PAGES		= 1 << 8,
-	MLX5_NON_FP_BF_REGS_PER_PAGE	= 2,
-	MLX5_MAX_UUARS	= MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
+	MLX5_BFREGS_PER_UAR		= 4,
+	MLX5_MAX_UARS			= 1 << 8,
+	MLX5_NON_FP_BFREGS_PER_UAR	= 2,
+	MLX5_MAX_BFREGS			= MLX5_MAX_UARS *
+					  MLX5_NON_FP_BFREGS_PER_UAR,
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cfa49bca009c..3d07e25b3bf1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -188,16 +188,16 @@ enum mlx5_eq_type {
 #endif
 };
 
-struct mlx5_uuar_info {
+struct mlx5_bfreg_info {
 	struct mlx5_uar	       *uars;
 	int			num_uars;
-	int			num_low_latency_uuars;
+	int			num_low_latency_bfregs;
 	unsigned long	       *bitmap;
 	unsigned int	       *count;
 	struct mlx5_bf	       *bfs;
 
 	/*
-	 * protect uuar allocation data structs
+	 * protect bfreg allocation data structs
 	 */
 	struct mutex		lock;
 	u32			ver;
@@ -217,7 +217,7 @@ struct mlx5_bf {
 	/* serialize 64 bit writes when done as two 32 bit accesses
 	 */
 	spinlock_t		lock32;
-	int			uuarn;
+	int			bfregn;
 };
 
 struct mlx5_cmd_first {
@@ -579,7 +579,7 @@ struct mlx5_priv {
 	struct mlx5_eq_table	eq_table;
 	struct msix_entry	*msix_arr;
 	struct mlx5_irq_info	*irq_info;
-	struct mlx5_uuar_info	uuari;
+	struct mlx5_bfreg_info	bfregi;
 	MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
 
 	/* pages stuff */
@@ -903,8 +903,8 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
-int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
-int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
+int mlx5_alloc_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi);
+int mlx5_free_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi);
 int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
 		       bool map_wc);
 void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
-- 
cgit v1.2.3


From a6d51b68611e98f05042ada662aed5dbe3279c1e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:23 +0200
Subject: net/mlx5: Introduce blue flame register allocator

Here is an implementation of an allocator that allocates blue flame
registers. A blue flame register is used for generating send doorbells.
A blue flame register can be used to generate either a regular doorbell
or a blue flame doorbell where the data to be sent is written to the
device's I/O memory hence saving the need to read the data from memory.
For blue flame kind of doorbells to succeed, the blue flame register
need to be mapped as write combining. The user can specify what kind of
send doorbells she wishes to use. If she requested write combining
mapping but that failed, the allocator will fall back to non write
combining mapping and will indicate that to the user.
Subsequent patches in this series will make use of this allocator.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  2 ++
 include/linux/mlx5/driver.h   | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h |  7 +++++--
 3 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index aa851c51ab59..db1b9287012f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -215,6 +215,8 @@ enum {
 	MLX5_BFREGS_PER_UAR		= 4,
 	MLX5_MAX_UARS			= 1 << 8,
 	MLX5_NON_FP_BFREGS_PER_UAR	= 2,
+	MLX5_FP_BFREGS_PER_UAR		= MLX5_BFREGS_PER_UAR -
+					  MLX5_NON_FP_BFREGS_PER_UAR,
 	MLX5_MAX_BFREGS			= MLX5_MAX_UARS *
 					  MLX5_NON_FP_BFREGS_PER_UAR,
 };
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3d07e25b3bf1..969aa1fe17e2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -452,6 +452,39 @@ struct mlx5_eq_table {
 	spinlock_t		lock;
 };
 
+struct mlx5_uars_page {
+	void __iomem	       *map;
+	bool			wc;
+	u32			index;
+	struct list_head	list;
+	unsigned int		bfregs;
+	unsigned long	       *reg_bitmap; /* for non fast path bf regs */
+	unsigned long	       *fp_bitmap;
+	unsigned int		reg_avail;
+	unsigned int		fp_avail;
+	struct kref		ref_count;
+	struct mlx5_core_dev   *mdev;
+};
+
+struct mlx5_bfreg_head {
+	/* protect blue flame registers allocations */
+	struct mutex		lock;
+	struct list_head	list;
+};
+
+struct mlx5_bfreg_data {
+	struct mlx5_bfreg_head	reg_head;
+	struct mlx5_bfreg_head	wc_head;
+};
+
+struct mlx5_sq_bfreg {
+	void __iomem	       *map;
+	struct mlx5_uars_page  *up;
+	bool			wc;
+	u32			index;
+	unsigned int		offset;
+};
+
 struct mlx5_uar {
 	u32			index;
 	struct list_head	bf_list;
@@ -645,6 +678,7 @@ struct mlx5_priv {
 	void		       *pfault_ctx;
 	struct srcu_struct      pfault_srcu;
 #endif
+	struct mlx5_bfreg_data		bfregs;
 };
 
 enum mlx5_device_state {
@@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
 int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
 void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+		     bool map_wc, bool fast_path);
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 15f896781966..1223feff0ea4 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         uc[0x1];
 	u8         rc[0x1];
 
-	u8         reserved_at_240[0xa];
+	u8         uar_4k[0x1];
+	u8         reserved_at_241[0x9];
 	u8         uar_sz[0x6];
 	u8         reserved_at_250[0x8];
 	u8         log_pg_sz[0x8];
@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         device_frequency_mhz[0x20];
 	u8         device_frequency_khz[0x20];
 
-	u8         reserved_at_500[0x80];
+	u8         reserved_at_500[0x20];
+	u8	   num_of_uars_per_page[0x20];
+	u8         reserved_at_540[0x40];
 
 	u8         reserved_at_580[0x3f];
 	u8         cqe_compression[0x1];
-- 
cgit v1.2.3


From 9f09eaeae28a0c67b8fc2b5acb411a5d4fd8cc80 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 6 Jan 2017 17:39:06 -0800
Subject: net: ipmr: Remove nowait arg to ipmr_get_route

ipmr_get_route has 1 caller and the nowait arg is 0. Remove the arg and
simplify ipmr_get_route accordingly.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index e5fb81376e92..f019b62f27b5 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -120,5 +120,5 @@ struct mfc_cache {
 struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
-		   struct rtmsg *rtm, int nowait, u32 portid);
+		   struct rtmsg *rtm, u32 portid);
 #endif
-- 
cgit v1.2.3


From bc1f44709cf27fb2a5766cadafe7e2ad5e9cb221 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Fri, 6 Jan 2017 19:12:52 -0800
Subject: net: make ndo_get_stats64 a void function

The network device operation for reading statistics is only called
in one place, and it ignores the return value. Having a structure
return value is potentially confusing because some future driver could
incorrectly assume that the return value was used.

Fix all drivers with ndo_get_stats64 to have a void function.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ecd78b3c9aba..b14ad9c139d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -913,8 +913,8 @@ struct netdev_xdp {
  *	Callback used when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
- *                      struct rtnl_link_stats64 *storage);
+ * void (*ndo_get_stats64)(struct net_device *dev,
+ *                         struct rtnl_link_stats64 *storage);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. Drivers must do one of the following:
@@ -1165,8 +1165,8 @@ struct net_device_ops {
 						   struct neigh_parms *);
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
-	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
-						     struct rtnl_link_stats64 *storage);
+	void			(*ndo_get_stats64)(struct net_device *dev,
+						   struct rtnl_link_stats64 *storage);
 	bool			(*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
 	int			(*ndo_get_offload_stats)(int attr_id,
 							 const struct net_device *dev,
-- 
cgit v1.2.3


From 8a522bf2d4f788306443d36b26b54f0aedcdfdbe Mon Sep 17 00:00:00 2001
From: Peter Foley <pefoley2@pefoley.com>
Date: Sun, 27 Nov 2016 21:37:20 -0500
Subject: extcon: adc-jack: Fix incompatible pointer type warning

This patch fixes the incompatible warning of extcon-adc-jack.c driver
when calling devm_extcon_dev_allocate().

Signed-off-by: Peter Foley <pefoley2@pefoley.com>
[cw00.choi: Modify the patch title and descritpion]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon/extcon-adc-jack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h
index a0e03b13b449..2aa32075bca1 100644
--- a/include/linux/extcon/extcon-adc-jack.h
+++ b/include/linux/extcon/extcon-adc-jack.h
@@ -59,7 +59,7 @@ struct adc_jack_pdata {
 	const char *name;
 	const char *consumer_channel;
 
-	const enum extcon *cable_names;
+	const unsigned int *cable_names;
 
 	/* The last entry's state should be 0 */
 	struct adc_jack_cond *adc_conditions;
-- 
cgit v1.2.3


From db6228612ce297949621a62e9d2331ee55016778 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Wed, 21 Dec 2016 14:10:47 +0800
Subject: extcon: Add documentation for EXTCON_CHG_USB_* and EXTCON_USB_*

Current there is both "EXTCON_USB" and "EXTCON_CHG_USB_SDP" which
both seem to suggest a standard downstream port. But there is no
documentation describing how these relate.

Thus add documentation to describe EXTCON_CHG_USB_SDP should always
appear together with EXTCON_USB, and EXTCON_CHG_USB_ACA would normally
appear with EXTCON_USB_HOST.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index b871c0cb1f02..00201230bea5 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -46,7 +46,14 @@
 #define EXTCON_USB		1
 #define EXTCON_USB_HOST		2
 
-/* Charging external connector */
+/*
+ * Charging external connector
+ *
+ * When one SDP charger connector was reported, we should also report
+ * the USB connector, which means EXTCON_CHG_USB_SDP should always
+ * appear together with EXTCON_USB. The same as ACA charger connector,
+ * EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST.
+ */
 #define EXTCON_CHG_USB_SDP	5	/* Standard Downstream Port */
 #define EXTCON_CHG_USB_DCP	6	/* Dedicated Charging Port */
 #define EXTCON_CHG_USB_CDP	7	/* Charging Downstream Port */
-- 
cgit v1.2.3


From e6cf046543763878614d51e8283abd40d5e5327e Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 26 Dec 2016 20:37:38 +0900
Subject: extcon: Move defintion of struct extcon_dev to driver/extcon
 directory

This patch moves the 'struct extcon_dev' of extcon subsystem
to driver/extcon/extcon.h header file because the struct extcon_dev have to
be handled by extcon API to guarantee the consistency of strcut extcon_dev.
If external drivers are able to touch the struct extcon_dev directly, it might
cause the critical and unknown problem.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 57 +-------------------------------------------------
 1 file changed, 1 insertion(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 00201230bea5..d57e52443841 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -167,62 +167,7 @@ union extcon_property_value {
 };
 
 struct extcon_cable;
-
-/**
- * struct extcon_dev - An extcon device represents one external connector.
- * @name:		The name of this extcon device. Parent device name is
- *			used if NULL.
- * @supported_cable:	Array of supported cable names ending with EXTCON_NONE.
- *			If supported_cable is NULL, cable name related APIs
- *			are disabled.
- * @mutually_exclusive:	Array of mutually exclusive set of cables that cannot
- *			be attached simultaneously. The array should be
- *			ending with NULL or be NULL (no mutually exclusive
- *			cables). For example, if it is { 0x7, 0x30, 0}, then,
- *			{0, 1}, {0, 1, 2}, {0, 2}, {1, 2}, or {4, 5} cannot
- *			be attached simulataneously. {0x7, 0} is equivalent to
- *			{0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there
- *			can be no simultaneous connections.
- * @dev:		Device of this extcon.
- * @state:		Attach/detach state of this extcon. Do not provide at
- *			register-time.
- * @nh:			Notifier for the state change events from this extcon
- * @entry:		To support list of extcon devices so that users can
- *			search for extcon devices based on the extcon name.
- * @lock:
- * @max_supported:	Internal value to store the number of cables.
- * @extcon_dev_type:	Device_type struct to provide attribute_groups
- *			customized for each extcon device.
- * @cables:		Sysfs subdirectories. Each represents one cable.
- *
- * In most cases, users only need to provide "User initializing data" of
- * this struct when registering an extcon. In some exceptional cases,
- * optional callbacks may be needed. However, the values in "internal data"
- * are overwritten by register function.
- */
-struct extcon_dev {
-	/* Optional user initializing data */
-	const char *name;
-	const unsigned int *supported_cable;
-	const u32 *mutually_exclusive;
-
-	/* Internal data. Please do not set. */
-	struct device dev;
-	struct raw_notifier_head *nh;
-	struct list_head entry;
-	int max_supported;
-	spinlock_t lock;	/* could be called by irq handler */
-	u32 state;
-
-	/* /sys/class/extcon/.../cable.n/... */
-	struct device_type extcon_dev_type;
-	struct extcon_cable *cables;
-
-	/* /sys/class/extcon/.../mutually_exclusive/... */
-	struct attribute_group attr_g_muex;
-	struct attribute **attrs_muex;
-	struct device_attribute *d_attrs_muex;
-};
+struct extcon_dev;
 
 #if IS_ENABLED(CONFIG_EXTCON)
 
-- 
cgit v1.2.3


From 62a37443e93bbae74410cb72aa9d7e15a1da0b98 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 3 Jan 2017 13:50:54 +0800
Subject: extcon: Add documentation for EXTCON_CHG_USB_SLOW/FAST

Currently there are no documentation for EXTCON_CHG_USB_SLOW/FAST
charger connector. These names don't mean much and no guide to tell
users how to use it, thus try to add documentation to make them clear.

Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
[cw00.choi: Use the 'connector' expression instead of 'cable']
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index d57e52443841..242157cad25d 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -53,6 +53,10 @@
  * the USB connector, which means EXTCON_CHG_USB_SDP should always
  * appear together with EXTCON_USB. The same as ACA charger connector,
  * EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST.
+ *
+ * The EXTCON_CHG_USB_SLOW connector can provide at least 500mA of
+ * current at 5V. The EXTCON_CHG_USB_FAST connector can provide at
+ * least 1A of current at 5V.
  */
 #define EXTCON_CHG_USB_SDP	5	/* Standard Downstream Port */
 #define EXTCON_CHG_USB_DCP	6	/* Dedicated Charging Port */
-- 
cgit v1.2.3


From 3c5f0e076833c407cca372c663d47499ae4dab45 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 2 Jan 2017 13:03:03 +0900
Subject: extcon: Add new EXTCON_CHG_USB_PD type for USB Power Delivery

This patch adds the new EXTCON_CHG_USB_PD for USB PD (Power Delivery)[1].
The USB Power Delivery specification specifies that USB cable provides
the increased power more than 7.5W to device with larger power demand.
The EXTCON_CHG_USB_PD has the EXTCON_TYPE_CHG and EXTCON_TYPE_USB type.

[1] https://en.wikipedia.org/wiki/USB#PD

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/extcon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 242157cad25d..7010fb01a81a 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -65,6 +65,7 @@
 #define EXTCON_CHG_USB_FAST	9
 #define EXTCON_CHG_USB_SLOW	10
 #define EXTCON_CHG_WPT		11	/* Wireless Power Transfer */
+#define EXTCON_CHG_USB_PD	12	/* USB Power Delivery */
 
 /* Jack external connector */
 #define EXTCON_JACK_MICROPHONE	20
-- 
cgit v1.2.3


From e7246e122aaa99ebbb8ad7da80f35a20577bd8af Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Jan 2017 17:06:35 -0500
Subject: net-tc: extract skip classify bit from tc_verd

Packets sent by the IFB device skip subsequent tc classification.
A single bit governs this state. Move it out of tc_verd in
anticipation of removing that __u16 completely.

The new bitfield tc_skip_classify temporarily uses one bit of a
hole, until tc_verd is removed completely in a follow-up patch.

Remove the bit hole comment. It could be 2, 3, 4 or 5 bits long.
With that many options, little value in documenting it.

Introduce a helper function to deduplicate the logic in the two
sites that check this bit.

The field tc_skip_classify is set only in IFB on skbs cloned in
act_mirred, so original packet sources do not have to clear the
bit when reusing packets (notably, pktgen and octeon).

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cfd417e..570f60ec6cb4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -589,6 +589,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@pkt_type: Packet class
  *	@fclone: skbuff clone status
  *	@ipvs_property: skbuff is owned by ipvs
+ *	@tc_skip_classify: do not classify packet. set by IFB device
  *	@peeked: this packet has been seen already, so stats have been
  *		done for it, don't do them again
  *	@nf_trace: netfilter packet trace flag
@@ -749,7 +750,9 @@ struct sk_buff {
 #ifdef CONFIG_NET_SWITCHDEV
 	__u8			offload_fwd_mark:1;
 #endif
-	/* 2, 4 or 5 bit hole */
+#ifdef CONFIG_NET_CLS_ACT
+	__u8			tc_skip_classify:1;
+#endif
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
-- 
cgit v1.2.3


From a5135bcfba7345031df45e02cd150a45add47cf8 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Jan 2017 17:06:36 -0500
Subject: net-tc: convert tc_verd to integer bitfields

Extract the remaining two fields from tc_verd and remove the __u16
completely. TC_AT and TC_FROM are converted to equivalent two-bit
integer fields tc_at and tc_from. Where possible, use existing
helper skb_at_tc_ingress when reading tc_at. Introduce helper
skb_reset_tc to clear fields.

Not documenting tc_from and tc_at, because they will be replaced
with single bit fields in follow-on patches.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 570f60ec6cb4..f738d09947b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -599,7 +599,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
  *	@tc_index: Traffic control index
- *	@tc_verd: traffic control verdict
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
@@ -752,13 +751,12 @@ struct sk_buff {
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	__u8			tc_skip_classify:1;
+	__u8			tc_at:2;
+	__u8			tc_from:2;
 #endif
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
-	__u16			tc_verd;	/* traffic control verdict */
-#endif
 #endif
 
 	union {
-- 
cgit v1.2.3


From 8dc07fdbf2054f157e8333f940a1ad728916c786 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Jan 2017 17:06:37 -0500
Subject: net-tc: convert tc_at to tc_at_ingress

Field tc_at is used only within tc actions to distinguish ingress from
egress processing. A single bit is sufficient for this purpose.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f738d09947b2..fab3f87e9bd1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -590,6 +590,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@fclone: skbuff clone status
  *	@ipvs_property: skbuff is owned by ipvs
  *	@tc_skip_classify: do not classify packet. set by IFB device
+ *	@tc_at_ingress: used within tc_classify to distinguish in/egress
  *	@peeked: this packet has been seen already, so stats have been
  *		done for it, don't do them again
  *	@nf_trace: netfilter packet trace flag
@@ -751,7 +752,7 @@ struct sk_buff {
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	__u8			tc_skip_classify:1;
-	__u8			tc_at:2;
+	__u8			tc_at_ingress:1;
 	__u8			tc_from:2;
 #endif
 
-- 
cgit v1.2.3


From bc31c905e946b5c55df5d2938335e78ffb3157ca Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Jan 2017 17:06:38 -0500
Subject: net-tc: convert tc_from to tc_from_ingress and tc_redirected

The tc_from field fulfills two roles. It encodes whether a packet was
redirected by an act_mirred device and, if so, whether act_mirred was
called on ingress or egress. Split it into separate fields.

The information is needed by the special IFB loop, where packets are
taken out of the normal path by act_mirred, forwarded to IFB, then
reinjected at their original location (ingress or egress) by IFB.

The IFB device cannot use skb->tc_at_ingress, because that may have
been overwritten as the packet travels from act_mirred to ifb_xmit,
when it passes through tc_classify on the IFB egress path. Cache this
value in skb->tc_from_ingress.

That field is valid only if a packet arriving at ifb_xmit came from
act_mirred. Other packets can be crafted to reach ifb_xmit. These
must be dropped. Set tc_redirected on redirection and drop all packets
that do not have this bit set.

Both fields are set only on cloned skbs in tc actions, so original
packet sources do not have to clear the bit when reusing packets
(notably, pktgen and octeon).

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fab3f87e9bd1..3149a88de548 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -591,6 +591,8 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@ipvs_property: skbuff is owned by ipvs
  *	@tc_skip_classify: do not classify packet. set by IFB device
  *	@tc_at_ingress: used within tc_classify to distinguish in/egress
+ *	@tc_redirected: packet was redirected by a tc action
+ *	@tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
  *	@peeked: this packet has been seen already, so stats have been
  *		done for it, don't do them again
  *	@nf_trace: netfilter packet trace flag
@@ -753,7 +755,8 @@ struct sk_buff {
 #ifdef CONFIG_NET_CLS_ACT
 	__u8			tc_skip_classify:1;
 	__u8			tc_at_ingress:1;
-	__u8			tc_from:2;
+	__u8			tc_redirected:1;
+	__u8			tc_from_ingress:1;
 #endif
 
 #ifdef CONFIG_NET_SCHED
-- 
cgit v1.2.3


From d6c99f4bf093a58d3ab47caaec74b81f18bc4e3f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Jan 2017 14:12:21 +0000
Subject: dma-fence: Wrap querying the fence->status

The fence->status is an optional field that is only valid once the fence
has been signaled. (Driver may fill the fence->status with an error code
prior to calling dma_fence_signal().) Given the restriction upon its
validity, wrap querying of the fence->status into a helper
dma_fence_get_status().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-2-chris@chris-wilson.co.uk
---
 include/linux/dma-fence.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d51a7d23c358..19f7af905182 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -378,6 +378,30 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
 		return dma_fence_is_signaled(f2) ? NULL : f2;
 }
 
+/**
+ * dma_fence_get_status_locked - returns the status upon completion
+ * @fence: [in]	the dma_fence to query
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence (to indicate whether the fence was completed due to an error
+ * rather than success). The value of the status condition is only valid
+ * if the fence has been signaled, dma_fence_get_status_locked() first checks
+ * the signal state before reporting the error status.
+ *
+ * Returns 0 if the fence has not yet been signaled, 1 if the fence has
+ * been signaled without an error condition, or a negative error code
+ * if the fence has been completed in err.
+ */
+static inline int dma_fence_get_status_locked(struct dma_fence *fence)
+{
+	if (dma_fence_is_signaled_locked(fence))
+		return fence->status < 0 ? fence->status : 1;
+	else
+		return 0;
+}
+
+int dma_fence_get_status(struct dma_fence *fence);
+
 signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
-- 
cgit v1.2.3


From b21507e272627c434e8dd74e8d51fd8245281b59 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 9 Jan 2017 10:07:31 -0500
Subject: proc,security: move restriction on writing /proc/pid/attr nodes to
 proc

Processes can only alter their own security attributes via
/proc/pid/attr nodes.  This is presently enforced by each individual
security module and is also imposed by the Linux credentials
implementation, which only allows a task to alter its own credentials.
Move the check enforcing this restriction from the individual
security modules to proc_pid_attr_write() before calling the security hook,
and drop the unnecessary task argument to the security hook since it can
only ever be the current task.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 3 +--
 include/linux/security.h  | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 558adfa5c8a8..0dde95900196 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1547,8 +1547,7 @@ union security_list_options {
 	void (*d_instantiate)(struct dentry *dentry, struct inode *inode);
 
 	int (*getprocattr)(struct task_struct *p, char *name, char **value);
-	int (*setprocattr)(struct task_struct *p, char *name, void *value,
-				size_t size);
+	int (*setprocattr)(const char *name, void *value, size_t size);
 	int (*ismaclabel)(const char *name);
 	int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
 	int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid);
diff --git a/include/linux/security.h b/include/linux/security.h
index c2125e9093e8..f4ebac117fa6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -361,7 +361,7 @@ int security_sem_semop(struct sem_array *sma, struct sembuf *sops,
 			unsigned nsops, int alter);
 void security_d_instantiate(struct dentry *dentry, struct inode *inode);
 int security_getprocattr(struct task_struct *p, char *name, char **value);
-int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
+int security_setprocattr(const char *name, void *value, size_t size);
 int security_netlink_send(struct sock *sk, struct sk_buff *skb);
 int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
@@ -1106,7 +1106,7 @@ static inline int security_getprocattr(struct task_struct *p, char *name, char *
 	return -EINVAL;
 }
 
-static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_setprocattr(char *name, void *value, size_t size)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From a009e975da5c7d42a7f5eaadc54946eb5f76c9af Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 4 Jan 2017 14:12:22 +0000
Subject: dma-fence: Introduce drm_fence_set_error() helper

The dma_fence.error field (formerly known as dma_fence.status) is an
optional field that may be set by drivers before calling
dma_fence_signal(). The field can be used to indicate that the fence was
completed in err rather than with success, and is visible to other
consumers of the fence and to userspace via sync_file.

This patch renames the field from status to error so that its meaning is
hopefully more clear (and distinct from dma_fence_get_status() which is
a composite between the error state and signal state) and adds a helper
that validates the preconditions of when it is suitable to adjust the
error field.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-3-chris@chris-wilson.co.uk
---
 include/linux/dma-fence.h | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 19f7af905182..6048fa404e57 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -47,7 +47,7 @@ struct dma_fence_cb;
  * can be compared to decide which fence would be signaled later.
  * @flags: A mask of DMA_FENCE_FLAG_* defined below
  * @timestamp: Timestamp when the fence was signaled.
- * @status: Optional, only valid if < 0, must be set before calling
+ * @error: Optional, only valid if < 0, must be set before calling
  * dma_fence_signal, indicates that the fence has completed with an error.
  *
  * the flags member must be manipulated and read using the appropriate
@@ -79,7 +79,7 @@ struct dma_fence {
 	unsigned seqno;
 	unsigned long flags;
 	ktime_t timestamp;
-	int status;
+	int error;
 };
 
 enum dma_fence_flag_bits {
@@ -133,7 +133,7 @@ struct dma_fence_cb {
  * or some failure occurred that made it impossible to enable
  * signaling. True indicates successful enabling.
  *
- * fence->status may be set in enable_signaling, but only when false is
+ * fence->error may be set in enable_signaling, but only when false is
  * returned.
  *
  * Calling dma_fence_signal before enable_signaling is called allows
@@ -145,7 +145,7 @@ struct dma_fence_cb {
  * the second time will be a noop since it was already signaled.
  *
  * Notes on signaled:
- * May set fence->status if returning true.
+ * May set fence->error if returning true.
  *
  * Notes on wait:
  * Must not be NULL, set to dma_fence_default_wait for default implementation.
@@ -395,13 +395,33 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
 static inline int dma_fence_get_status_locked(struct dma_fence *fence)
 {
 	if (dma_fence_is_signaled_locked(fence))
-		return fence->status < 0 ? fence->status : 1;
+		return fence->error ?: 1;
 	else
 		return 0;
 }
 
 int dma_fence_get_status(struct dma_fence *fence);
 
+/**
+ * dma_fence_set_error - flag an error condition on the fence
+ * @fence: [in]	the dma_fence
+ * @error: [in]	the error to store
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence, to indicate that the fence was completed due to an error
+ * rather than success. This must be set before signaling (so that the value
+ * is visible before any waiters on the signal callback are woken). This
+ * helper exists to help catching erroneous setting of #dma_fence.error.
+ */
+static inline void dma_fence_set_error(struct dma_fence *fence,
+				       int error)
+{
+	BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+	BUG_ON(error >= 0 || error < -MAX_ERRNO);
+
+	fence->error = error;
+}
+
 signed long dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
 signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
-- 
cgit v1.2.3


From f32815d21d4d8287336fb9cef4d2d9e0866214c2 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 2 Jan 2017 17:19:40 -0500
Subject: xtables: add xt_match, xt_target and data copy_to_user functions

xt_entry_target, xt_entry_match and their private data may contain
kernel data.

Introduce helper functions xt_match_to_user, xt_target_to_user and
xt_data_to_user that copy only the expected fields. These replace
existing logic that calls copy_to_user on entire structs, then
overwrites select fields.

Private data is defined in xt_match and xt_target. All matches and
targets that maintain kernel data store this at the tail of their
private structure. Extend xt_match and xt_target with .usersize to
limit how many bytes of data are copied. The remainder is cleared.

If compatsize is specified, usersize can only safely be used if all
fields up to usersize use platform-independent types. Otherwise, the
compat_to_user callback must be defined.

This patch does not yet enable the support logic.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5117e4d2ddfa..be378cf47fcc 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -167,6 +167,7 @@ struct xt_match {
 
 	const char *table;
 	unsigned int matchsize;
+	unsigned int usersize;
 #ifdef CONFIG_COMPAT
 	unsigned int compatsize;
 #endif
@@ -207,6 +208,7 @@ struct xt_target {
 
 	const char *table;
 	unsigned int targetsize;
+	unsigned int usersize;
 #ifdef CONFIG_COMPAT
 	unsigned int compatsize;
 #endif
@@ -287,6 +289,13 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
 		    bool inv_proto);
 
+int xt_match_to_user(const struct xt_entry_match *m,
+		     struct xt_entry_match __user *u);
+int xt_target_to_user(const struct xt_entry_target *t,
+		      struct xt_entry_target __user *u);
+int xt_data_to_user(void __user *dst, const void *src,
+		    int usersize, int size);
+
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
 
-- 
cgit v1.2.3


From 0118717583cda6f4f36092853ad0345e8150b286 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:24 +0200
Subject: net/mlx5: Add interface to get reference to a UAR

A reference to a UAR is required to generate CQ or EQ doorbells. Since
CQ or EQ doorbells can all be generated using the same UAR area without
any effect on performance, we are just getting a reference to any
available UAR, If one is not available we allocate it but we don't waste
the blue flame registers it can provide and we will use them for
subsequent allocations.
We get a reference to such UAR and put in mlx5_priv so any kernel
consumer can make use of it.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 969aa1fe17e2..9a3a0954855b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -679,6 +679,7 @@ struct mlx5_priv {
 	struct srcu_struct      pfault_srcu;
 #endif
 	struct mlx5_bfreg_data		bfregs;
+	struct mlx5_uars_page	       *uar;
 };
 
 enum mlx5_device_state {
@@ -1007,7 +1008,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
-		       struct mlx5_uar *uar, enum mlx5_eq_type type);
+		       enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
 int mlx5_stop_eqs(struct mlx5_core_dev *dev);
@@ -1118,6 +1119,8 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
+void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
 struct mlx5_profile {
 	u64	mask;
-- 
cgit v1.2.3


From 5fe9dec0d045437e48f112b8fa705197bd7bc3c0 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:25 +0200
Subject: IB/mlx5: Use blue flame register allocator in mlx5_ib

Make use of the blue flame registers allocator at mlx5_ib. Since blue
flame was not really supported we remove all the code that is related to
blue flame and we let all consumers to use the same blue flame register.
Once blue flame is supported we will add the code. As part of this patch
we also move the definition of struct mlx5_bf to mlx5_ib.h as it is only
used by mlx5_ib.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/cq.h       |  3 +--
 include/linux/mlx5/doorbell.h |  6 ++++--
 include/linux/mlx5/driver.h   | 19 -------------------
 3 files changed, 5 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 7c3c0d3aca37..996863381bc8 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -144,7 +144,6 @@ enum {
 
 static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 			       void __iomem *uar_page,
-			       spinlock_t *doorbell_lock,
 			       u32 cons_index)
 {
 	__be32 doorbell[2];
@@ -164,7 +163,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 	doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
 	doorbell[1] = cpu_to_be32(cq->cqn);
 
-	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock);
+	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
 int mlx5_init_cq_table(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
index afc78a3f4462..0787de28f2fc 100644
--- a/include/linux/mlx5/doorbell.h
+++ b/include/linux/mlx5/doorbell.h
@@ -68,10 +68,12 @@ static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(doorbell_lock, flags);
+	if (doorbell_lock)
+		spin_lock_irqsave(doorbell_lock, flags);
 	__raw_writel((__force u32) val[0], dest);
 	__raw_writel((__force u32) val[1], dest + 4);
-	spin_unlock_irqrestore(doorbell_lock, flags);
+	if (doorbell_lock)
+		spin_unlock_irqrestore(doorbell_lock, flags);
 }
 
 #endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9a3a0954855b..bb362f506a2e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -203,23 +203,6 @@ struct mlx5_bfreg_info {
 	u32			ver;
 };
 
-struct mlx5_bf {
-	void __iomem	       *reg;
-	void __iomem	       *regreg;
-	int			buf_size;
-	struct mlx5_uar	       *uar;
-	unsigned long		offset;
-	int			need_lock;
-	/* protect blue flame buffer selection when needed
-	 */
-	spinlock_t		lock;
-
-	/* serialize 64 bit writes when done as two 32 bit accesses
-	 */
-	spinlock_t		lock32;
-	int			bfregn;
-};
-
 struct mlx5_cmd_first {
 	__be32		data[4];
 };
@@ -612,8 +595,6 @@ struct mlx5_priv {
 	struct mlx5_eq_table	eq_table;
 	struct msix_entry	*msix_arr;
 	struct mlx5_irq_info	*irq_info;
-	struct mlx5_bfreg_info	bfregi;
-	MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
 
 	/* pages stuff */
 	struct workqueue_struct *pg_wq;
-- 
cgit v1.2.3


From b037c29a8056b8e896c4e084ba7cc30d6a1f165f Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:26 +0200
Subject: IB/mlx5: Allow future extension of libmlx5 input data

Current check requests that new fields in struct
mlx5_ib_alloc_ucontext_req_v2 that are not known to the driver be zero.
This was introduced so new libraries passing additional information to
the kernel through struct mlx5_ib_alloc_ucontext_req_v2 will be notified
by old kernels that do not support their request by failing the
operation. This schecme is problematic since it requires libmlx5 to issue
the requests with descending input size for struct
mlx5_ib_alloc_ucontext_req_v2.

To avoid this, we require that new features that will obey the following
rules:
If the feature requires one or more fields in the response and the at
least one of the fields can be encoded such that a zero value means the
kernel ignored the request then this field will provide the indication
to the library. If no response is required or if zero is a valid
response, a new field should be added that indicates to the library
whether its request was processed.

Fixes: b368d7cb8ceb ('IB/mlx5: Add hca_core_clock_offset to udata in init_ucontext')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 12 +++++++-----
 include/linux/mlx5/driver.h | 12 ++++--------
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index db1b9287012f..dd345e8cf6f0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -211,6 +211,11 @@ enum {
 	MLX5_EN_WR	= (u64)2
 };
 
+enum {
+	MLX5_ADAPTER_PAGE_SHIFT		= 12,
+	MLX5_ADAPTER_PAGE_SIZE		= 1 << MLX5_ADAPTER_PAGE_SHIFT,
+};
+
 enum {
 	MLX5_BFREGS_PER_UAR		= 4,
 	MLX5_MAX_UARS			= 1 << 8,
@@ -219,6 +224,8 @@ enum {
 					  MLX5_NON_FP_BFREGS_PER_UAR,
 	MLX5_MAX_BFREGS			= MLX5_MAX_UARS *
 					  MLX5_NON_FP_BFREGS_PER_UAR,
+	MLX5_UARS_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+	MLX5_NON_FP_BFREGS_IN_PAGE	= MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE,
 };
 
 enum {
@@ -391,11 +398,6 @@ enum {
 	MLX5_MAX_PAGE_SHIFT		= 31
 };
 
-enum {
-	MLX5_ADAPTER_PAGE_SHIFT		= 12,
-	MLX5_ADAPTER_PAGE_SIZE		= 1 << MLX5_ADAPTER_PAGE_SHIFT,
-};
-
 enum {
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bb362f506a2e..7e7394fef835 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -189,18 +189,17 @@ enum mlx5_eq_type {
 };
 
 struct mlx5_bfreg_info {
-	struct mlx5_uar	       *uars;
-	int			num_uars;
+	u32		       *sys_pages;
 	int			num_low_latency_bfregs;
-	unsigned long	       *bitmap;
 	unsigned int	       *count;
-	struct mlx5_bf	       *bfs;
 
 	/*
 	 * protect bfreg allocation data structs
 	 */
 	struct mutex		lock;
 	u32			ver;
+	bool			lib_uar_4k;
+	u32			num_sys_pages;
 };
 
 struct mlx5_cmd_first {
@@ -470,13 +469,10 @@ struct mlx5_sq_bfreg {
 
 struct mlx5_uar {
 	u32			index;
-	struct list_head	bf_list;
-	unsigned		free_bf_bmap;
-	void __iomem	       *bf_map;
 	void __iomem	       *map;
+	void __iomem	       *bf_map;
 };
 
-
 struct mlx5_core_health {
 	struct health_buffer __iomem   *health;
 	__be32 __iomem		       *health_counter;
-- 
cgit v1.2.3


From 30aa60b3bd12bd79b5324b7b595bd3446ab24b52 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 3 Jan 2017 23:55:27 +0200
Subject: IB/mlx5: Support 4k UAR for libmlx5

Add fields to structs to convey to kernel an indication whether the
library supports multi UARs per page and return to the library the size
of a UAR based on the queried value.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/cq.h     |  2 +-
 include/linux/mlx5/driver.h | 12 ------------
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 996863381bc8..95898847c7d4 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -42,13 +42,13 @@ struct mlx5_core_cq {
 	int			cqe_sz;
 	__be32		       *set_ci_db;
 	__be32		       *arm_db;
+	struct mlx5_uars_page  *uar;
 	atomic_t		refcount;
 	struct completion	free;
 	unsigned		vector;
 	unsigned int		irqn;
 	void (*comp)		(struct mlx5_core_cq *);
 	void (*event)		(struct mlx5_core_cq *, enum mlx5_event);
-	struct mlx5_uar	       *uar;
 	u32			cons_index;
 	unsigned		arm_sn;
 	struct mlx5_rsc_debug	*dbg;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7e7394fef835..10e632588cd5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -467,12 +467,6 @@ struct mlx5_sq_bfreg {
 	unsigned int		offset;
 };
 
-struct mlx5_uar {
-	u32			index;
-	void __iomem	       *map;
-	void __iomem	       *bf_map;
-};
-
 struct mlx5_core_health {
 	struct health_buffer __iomem   *health;
 	__be32 __iomem		       *health_counter;
@@ -725,7 +719,6 @@ struct mlx5_td {
 };
 
 struct mlx5e_resources {
-	struct mlx5_uar            cq_uar;
 	u32                        pdn;
 	struct mlx5_td             td;
 	struct mlx5_core_mkey      mkey;
@@ -915,11 +908,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
-int mlx5_alloc_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi);
-int mlx5_free_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi);
-int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
-		       bool map_wc);
-void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 2c956a60778cbb6a27e0c7a8a52a91378c90e1d1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 8 Jan 2017 13:54:00 +0100
Subject: siphash: add cryptographically secure PRF

SipHash is a 64-bit keyed hash function that is actually a
cryptographically secure PRF, like HMAC. Except SipHash is super fast,
and is meant to be used as a hashtable keyed lookup function, or as a
general PRF for short input use cases, such as sequence numbers or RNG
chaining.

For the first usage:

There are a variety of attacks known as "hashtable poisoning" in which an
attacker forms some data such that the hash of that data will be the
same, and then preceeds to fill up all entries of a hashbucket. This is
a realistic and well-known denial-of-service vector. Currently
hashtables use jhash, which is fast but not secure, and some kind of
rotating key scheme (or none at all, which isn't good). SipHash is meant
as a replacement for jhash in these cases.

There are a modicum of places in the kernel that are vulnerable to
hashtable poisoning attacks, either via userspace vectors or network
vectors, and there's not a reliable mechanism inside the kernel at the
moment to fix it. The first step toward fixing these issues is actually
getting a secure primitive into the kernel for developers to use. Then
we can, bit by bit, port things over to it as deemed appropriate.

While SipHash is extremely fast for a cryptographically secure function,
it is likely a bit slower than the insecure jhash, and so replacements
will be evaluated on a case-by-case basis based on whether or not the
difference in speed is negligible and whether or not the current jhash usage
poses a real security risk.

For the second usage:

A few places in the kernel are using MD5 or SHA1 for creating secure
sequence numbers, syn cookies, port numbers, or fast random numbers.
SipHash is a faster and more fitting, and more secure replacement for MD5
in those situations. Replacing MD5 and SHA1 with SipHash for these uses is
obvious and straight-forward, and so is submitted along with this patch
series. There shouldn't be much of a debate over its efficacy.

Dozens of languages are already using this internally for their hash
tables and PRFs. Some of the BSDs already use this in their kernels.
SipHash is a widely known high-speed solution to a widely known set of
problems, and it's time we catch-up.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/siphash.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 include/linux/siphash.h

(limited to 'include/linux')

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
new file mode 100644
index 000000000000..feeb29cd113e
--- /dev/null
+++ b/include/linux/siphash.h
@@ -0,0 +1,85 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4.
+ */
+
+#ifndef _LINUX_SIPHASH_H
+#define _LINUX_SIPHASH_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#define SIPHASH_ALIGNMENT __alignof__(u64)
+typedef struct {
+	u64 key[2];
+} siphash_key_t;
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+#endif
+
+u64 siphash_1u64(const u64 a, const siphash_key_t *key);
+u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
+u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
+		 const siphash_key_t *key);
+u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
+		 const siphash_key_t *key);
+u64 siphash_1u32(const u32 a, const siphash_key_t *key);
+u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
+		 const siphash_key_t *key);
+
+static inline u64 siphash_2u32(const u32 a, const u32 b,
+			       const siphash_key_t *key)
+{
+	return siphash_1u64((u64)b << 32 | a, key);
+}
+static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
+			       const u32 d, const siphash_key_t *key)
+{
+	return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
+}
+
+
+static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
+				     const siphash_key_t *key)
+{
+	if (__builtin_constant_p(len) && len == 4)
+		return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
+	if (__builtin_constant_p(len) && len == 8)
+		return siphash_1u64(le64_to_cpu(data[0]), key);
+	if (__builtin_constant_p(len) && len == 16)
+		return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    key);
+	if (__builtin_constant_p(len) && len == 24)
+		return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    le64_to_cpu(data[2]), key);
+	if (__builtin_constant_p(len) && len == 32)
+		return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    le64_to_cpu(data[2]), le64_to_cpu(data[3]),
+				    key);
+	return __siphash_aligned(data, len, key);
+}
+
+/**
+ * siphash - compute 64-bit siphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the siphash key
+ */
+static inline u64 siphash(const void *data, size_t len,
+			  const siphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+		return __siphash_unaligned(data, len, key);
+#endif
+	return ___siphash_aligned(data, len, key);
+}
+
+#endif /* _LINUX_SIPHASH_H */
-- 
cgit v1.2.3


From 1ae2324f732c9c4e2fa4ebd885fa1001b70d52e1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 8 Jan 2017 13:54:01 +0100
Subject: siphash: implement HalfSipHash1-3 for hash tables

HalfSipHash, or hsiphash, is a shortened version of SipHash, which
generates 32-bit outputs using a weaker 64-bit key. It has *much* lower
security margins, and shouldn't be used for anything too sensitive, but
it could be used as a hashtable key function replacement, if the output
is never exposed, and if the security requirement is not too high.

The goal is to make this something that performance-critical jhash users
would be willing to use.

On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias
SipHash1-3 to HalfSipHash1-3 on those systems.

64-bit x86_64:
[    0.509409] test_siphash:     SipHash2-4 cycles: 4049181
[    0.510650] test_siphash:     SipHash1-3 cycles: 2512884
[    0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920
[    0.512904] test_siphash:    JenkinsHash cycles:  978267
So, we map hsiphash() -> SipHash1-3

32-bit x86:
[    0.509868] test_siphash:     SipHash2-4 cycles: 14812892
[    0.513601] test_siphash:     SipHash1-3 cycles:  9510710
[    0.515263] test_siphash: HalfSipHash1-3 cycles:  3856157
[    0.515952] test_siphash:    JenkinsHash cycles:  1148567
So, we map hsiphash() -> HalfSipHash1-3

hsiphash() is roughly 3 times slower than jhash(), but comes with a
considerable security improvement.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/siphash.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index feeb29cd113e..fa7a6b9cedbf 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -5,7 +5,9 @@
  * SipHash: a fast short-input PRF
  * https://131002.net/siphash/
  *
- * This implementation is specifically for SipHash2-4.
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
  */
 
 #ifndef _LINUX_SIPHASH_H
@@ -82,4 +84,57 @@ static inline u64 siphash(const void *data, size_t len,
 	return ___siphash_aligned(data, len, key);
 }
 
+#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
+typedef struct {
+	unsigned long key[2];
+} hsiphash_key_t;
+
+u32 __hsiphash_aligned(const void *data, size_t len,
+		       const hsiphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+			 const hsiphash_key_t *key);
+#endif
+
+u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
+u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
+u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
+		  const hsiphash_key_t *key);
+u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
+		  const hsiphash_key_t *key);
+
+static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
+				      const hsiphash_key_t *key)
+{
+	if (__builtin_constant_p(len) && len == 4)
+		return hsiphash_1u32(le32_to_cpu(data[0]), key);
+	if (__builtin_constant_p(len) && len == 8)
+		return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     key);
+	if (__builtin_constant_p(len) && len == 12)
+		return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), key);
+	if (__builtin_constant_p(len) && len == 16)
+		return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), le32_to_cpu(data[3]),
+				     key);
+	return __hsiphash_aligned(data, len, key);
+}
+
+/**
+ * hsiphash - compute 32-bit hsiphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the hsiphash key
+ */
+static inline u32 hsiphash(const void *data, size_t len,
+			   const hsiphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+		return __hsiphash_unaligned(data, len, key);
+#endif
+	return ___hsiphash_aligned(data, len, key);
+}
+
 #endif /* _LINUX_SIPHASH_H */
-- 
cgit v1.2.3


From b4b7b772e8b018286482d8d1fba7804ceac56a64 Mon Sep 17 00:00:00 2001
From: jpinto <Joao.Pinto@synopsys.com>
Date: Mon, 9 Jan 2017 12:35:08 +0000
Subject: stmmac: adding DT parameter for LPI tx clock gating

This patch adds a new parameter to the stmmac DT: snps,en-tx-lpi-clockgating.
It was ported from synopsys/dwc_eth_qos.c and it is useful if lpi tx clock
gating is needed by stmmac users also.

Signed-off-by: Joao Pinto <jpinto@synopsys.com>
Tested-by: Niklas Cassel <niklas.cassel@axis.com>
Reviewed-by: Lars Persson <larper@axis.com>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 889e0e9a3f1c..e3cd7588623d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -142,5 +142,6 @@ struct plat_stmmacenet_data {
 	int has_gmac4;
 	bool tso_en;
 	int mac_port_sel_speed;
+	bool en_tx_lpi_clockgating;
 };
 #endif
-- 
cgit v1.2.3


From f573c0b9c4e02691cf87736bd0824fd37ec02e65 Mon Sep 17 00:00:00 2001
From: jpinto <Joao.Pinto@synopsys.com>
Date: Mon, 9 Jan 2017 12:35:09 +0000
Subject: stmmac: move stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to platform
 structure

This patch moves stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to the
plat_stmmacenet_data structure. It also moves these platform variables
initialization to stmmac_platform. This was done for two reasons:

a) If PCI is used, platform related code is being executed in stmmac_main
resulting in warnings that have no sense and conceptually was not right

b) stmmac as a synopsys reference ethernet driver stack will be hosting
more and more drivers to its structure like synopsys/dwc_eth_qos.c.
These drivers have their own DT bindings that are not compatible with
stmmac's. One of the most important are the clock names, and so they need
to be parsed in the glue logic and initialized there, and that is the main
reason why the clocks were passed to the platform structure.

Signed-off-by: Joao Pinto <jpinto@synopsys.com>
Tested-by: Niklas Cassel <niklas.cassel@axis.com>
Reviewed-by: Lars Persson <larper@axis.com>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e3cd7588623d..d76033d6726d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -138,6 +138,11 @@ struct plat_stmmacenet_data {
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
+	struct clk *stmmac_clk;
+	struct clk *pclk;
+	struct clk *clk_ptp_ref;
+	unsigned int clk_ptp_rate;
+	struct reset_control *stmmac_rst;
 	struct stmmac_axi *axi;
 	int has_gmac4;
 	bool tso_en;
-- 
cgit v1.2.3


From ac7138746e14137a451f8539614cdd349153e0c0 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Mon, 9 Jan 2017 16:55:13 +0100
Subject: smc: establish new socket family

* enable smc module loading and unloading
 * register new socket family
 * basic smc socket creation and deletion
 * use backing TCP socket to run CLC (Connection Layer Control)
   handshake of SMC protocol
 * Setup for infiniband traffic is implemented in follow-on patches.
   For now fallback to TCP socket is always used.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Reviewed-by: Utz Bacher <utz.bacher@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index c06438023d79..082027457825 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -202,8 +202,12 @@ struct ucred {
 #define AF_VSOCK	40	/* vSockets			*/
 #define AF_KCM		41	/* Kernel Connection Multiplexor*/
 #define AF_QIPCRTR	42	/* Qualcomm IPC Router          */
+#define AF_SMC		43	/* smc sockets: reserve number for
+				 * PF_SMC protocol family that
+				 * reuses AF_INET address family
+				 */
 
-#define AF_MAX		43	/* For now.. */
+#define AF_MAX		44	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -251,6 +255,7 @@ struct ucred {
 #define PF_VSOCK	AF_VSOCK
 #define PF_KCM		AF_KCM
 #define PF_QIPCRTR	AF_QIPCRTR
+#define PF_SMC		AF_SMC
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
-- 
cgit v1.2.3


From 39f19ebbf57b403695f7b5f9cf322fe1ddb5d7fb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 9 Jan 2017 10:19:50 -0800
Subject: bpf: rename ARG_PTR_TO_STACK

since ARG_PTR_TO_STACK is no longer just pointer to stack
rename it to ARG_PTR_TO_MEM and adjust comment.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f74ae68086dc..94ea8d2383e6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,14 +69,14 @@ enum bpf_arg_type {
 	/* the following constraints used to prototype bpf_memcmp() and other
 	 * functions that access data on eBPF program stack
 	 */
-	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
-	ARG_PTR_TO_RAW_STACK,	/* any pointer to eBPF program stack, area does not
-				 * need to be initialized, helper function must fill
-				 * all bytes or clear them in error case.
+	ARG_PTR_TO_MEM,		/* pointer to valid memory (stack, packet, map value) */
+	ARG_PTR_TO_UNINIT_MEM,	/* pointer to memory does not need to be initialized,
+				 * helper function must fill all bytes or clear
+				 * them in error case.
 				 */
 
-	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
-	ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */
+	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
+	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
-- 
cgit v1.2.3


From aecb57da7ae9df1f9c2eb11788888b135ddc0203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vesa=20J=C3=A4=C3=A4skel=C3=A4inen?=
 <vesa.jaaskelainen@vaisala.com>
Date: Fri, 23 Dec 2016 13:15:58 +0200
Subject: rtc: tps65910: Add RTC calibration support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Texas Instrument's TPS65910 has support for compensating RTC crystal
inaccuracies. When enabled every hour RTC counter value will be compensated
with two's complement value.

Signed-off-by: Vesa Jääskeläinen <vesa.jaaskelainen@vaisala.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/mfd/tps65910.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 6483a6fdce59..ffb21e79204d 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -134,6 +134,7 @@
 
 /* RTC_CTRL_REG bitfields */
 #define TPS65910_RTC_CTRL_STOP_RTC			0x01 /*0=stop, 1=run */
+#define TPS65910_RTC_CTRL_AUTO_COMP			0x04
 #define TPS65910_RTC_CTRL_GET_TIME			0x40
 
 /* RTC_STATUS_REG bitfields */
-- 
cgit v1.2.3


From b8aa8453918ebfd93d78de56c2afd4b735e02e27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= <mic@digikod.net>
Date: Thu, 22 Dec 2016 00:32:25 +0100
Subject: security: Fix inode_getattr documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace arguments @mnt and @dentry with @path.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 558adfa5c8a8..9cf50ad2fe20 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -352,8 +352,7 @@
  *	Return 0 if permission is granted.
  * @inode_getattr:
  *	Check permission before obtaining file attributes.
- *	@mnt is the vfsmount where the dentry was looked up
- *	@dentry contains the dentry structure for the file.
+ *	@path contains the path structure for the file.
  *	Return 0 if permission is granted.
  * @inode_setxattr:
  *	Check permission before setting the extended attributes
-- 
cgit v1.2.3


From 989e0aac1a801e9e9580632c9fd448a7aaca596a Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Fri, 30 Dec 2016 15:01:17 +0100
Subject: ata: pass queued command to ->sff_data_xfer method

For Atari Falcon PATA support we need to check the current command
in its ->sff_data_xfer method.  Update core code and all users
accordingly.

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index c170be548b7f..0e8a8000b45f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -968,7 +968,7 @@ struct ata_port_operations {
 	void (*sff_tf_read)(struct ata_port *ap, struct ata_taskfile *tf);
 	void (*sff_exec_command)(struct ata_port *ap,
 				 const struct ata_taskfile *tf);
-	unsigned int (*sff_data_xfer)(struct ata_device *dev,
+	unsigned int (*sff_data_xfer)(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 	void (*sff_irq_on)(struct ata_port *);
 	bool (*sff_irq_check)(struct ata_port *);
@@ -1823,11 +1823,11 @@ extern void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf);
 extern void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf);
 extern void ata_sff_exec_command(struct ata_port *ap,
 				 const struct ata_taskfile *tf);
-extern unsigned int ata_sff_data_xfer(struct ata_device *dev,
+extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern unsigned int ata_sff_data_xfer32(struct ata_device *dev,
+extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev,
+extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
-- 
cgit v1.2.3


From 39d3e7584a686541a3295ff1624d341e669e1afc Mon Sep 17 00:00:00 2001
From: Parav Pandit <pandit.parav@gmail.com>
Date: Tue, 10 Jan 2017 00:02:13 +0000
Subject: rdmacg: Added rdma cgroup controller

Added rdma cgroup controller that does accounting, limit enforcement
on rdma/IB resources.

Added rdma cgroup header file which defines its APIs to perform
charging/uncharging functionality. It also defined APIs for RDMA/IB
stack for device registration. Devices which are registered will
participate in controller functions of accounting and limit
enforcements. It define rdmacg_device structure to bind IB stack
and RDMA cgroup controller.

RDMA resources are tracked using resource pool. Resource pool is per
device, per cgroup entity which allows setting up accounting limits
on per device basis.

Currently resources are defined by the RDMA cgroup.

Resource pool is created/destroyed dynamically whenever
charging/uncharging occurs respectively and whenever user
configuration is done. Its a tradeoff of memory vs little more code
space that creates resource pool object whenever necessary, instead of
creating them during cgroup creation and device registration time.

Signed-off-by: Parav Pandit <pandit.parav@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup_rdma.h   | 53 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cgroup_subsys.h |  4 ++++
 2 files changed, 57 insertions(+)
 create mode 100644 include/linux/cgroup_rdma.h

(limited to 'include/linux')

diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
new file mode 100644
index 000000000000..e94290b29e99
--- /dev/null
+++ b/include/linux/cgroup_rdma.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#ifndef _CGROUP_RDMA_H
+#define _CGROUP_RDMA_H
+
+#include <linux/cgroup.h>
+
+enum rdmacg_resource_type {
+	RDMACG_RESOURCE_HCA_HANDLE,
+	RDMACG_RESOURCE_HCA_OBJECT,
+	RDMACG_RESOURCE_MAX,
+};
+
+#ifdef CONFIG_CGROUP_RDMA
+
+struct rdma_cgroup {
+	struct cgroup_subsys_state	css;
+
+	/*
+	 * head to keep track of all resource pools
+	 * that belongs to this cgroup.
+	 */
+	struct list_head		rpools;
+};
+
+struct rdmacg_device {
+	struct list_head	dev_node;
+	struct list_head	rpools;
+	char			*name;
+};
+
+/*
+ * APIs for RDMA/IB stack to publish when a device wants to
+ * participate in resource accounting
+ */
+int rdmacg_register_device(struct rdmacg_device *device);
+void rdmacg_unregister_device(struct rdmacg_device *device);
+
+/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
+int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
+		      struct rdmacg_device *device,
+		      enum rdmacg_resource_type index);
+void rdmacg_uncharge(struct rdma_cgroup *cg,
+		     struct rdmacg_device *device,
+		     enum rdmacg_resource_type index);
+#endif	/* CONFIG_CGROUP_RDMA */
+#endif	/* _CGROUP_RDMA_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336acee9..d0e597c44585 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -56,6 +56,10 @@ SUBSYS(hugetlb)
 SUBSYS(pids)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_RDMA)
+SUBSYS(rdma)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
-- 
cgit v1.2.3


From 78c9981f6132cb600d545007c91e300021b7caf3 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:24 +0000
Subject: iio:buffer: Stop exporting iio_update_demux

Nothing outside of indiustrialio-buffer.c should be using this.
Requires a large amount of juggling of functions to avoid a
forward definition.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 70a5164f4728..889d0f2f5d7b 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -168,8 +168,6 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev,
 	return iio_push_to_buffers(indio_dev, data);
 }
 
-int iio_update_demux(struct iio_dev *indio_dev);
-
 bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 	const unsigned long *mask);
 
-- 
cgit v1.2.3


From 263cf5e6577a779cc3311c5290325ef3917de2ea Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:25 +0000
Subject: iio:buffer.h Reformat structure comments to be inline.

This should make it easier to see how the structure is split into
public and private parts - reflected in the generated documentation.

Deliberately use /* instead of /** for the private elements to avoid
warnings when kernel-doc script runs.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 100 ++++++++++++++++++++++++++++-----------------
 1 file changed, 63 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 889d0f2f5d7b..4a65a7bb40a4 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -74,45 +74,71 @@ struct iio_buffer_access_funcs {
 
 /**
  * struct iio_buffer - general buffer structure
- * @length:		[DEVICE] number of datums in buffer
- * @bytes_per_datum:	[DEVICE] size of individual datum including timestamp
- * @scan_el_attrs:	[DRIVER] control of scan elements if that scan mode
- *			control method is used
- * @scan_mask:		[INTERN] bitmask used in masking scan mode elements
- * @scan_timestamp:	[INTERN] does the scan mode include a timestamp
- * @access:		[DRIVER] buffer access functions associated with the
- *			implementation.
- * @scan_el_dev_attr_list:[INTERN] list of scan element related attributes.
- * @buffer_group:	[INTERN] attributes of the buffer group
- * @scan_el_group:	[DRIVER] attribute group for those attributes not
- *			created from the iio_chan_info array.
- * @pollq:		[INTERN] wait queue to allow for polling on the buffer.
- * @stufftoread:	[INTERN] flag to indicate new data.
- * @attrs:		[INTERN] standard attributes of the buffer
- * @demux_list:		[INTERN] list of operations required to demux the scan.
- * @demux_bounce:	[INTERN] buffer for doing gather from incoming scan.
- * @buffer_list:	[INTERN] entry in the devices list of current buffers.
- * @ref:		[INTERN] reference count of the buffer.
- * @watermark:		[INTERN] number of datums to wait for poll/read.
+ *
+ * Note that the internals of this structure should only be of interest to
+ * those writing new buffer implementations.
  */
 struct iio_buffer {
-	int					length;
-	int					bytes_per_datum;
-	struct attribute_group			*scan_el_attrs;
-	long					*scan_mask;
-	bool					scan_timestamp;
-	const struct iio_buffer_access_funcs	*access;
-	struct list_head			scan_el_dev_attr_list;
-	struct attribute_group			buffer_group;
-	struct attribute_group			scan_el_group;
-	wait_queue_head_t			pollq;
-	bool					stufftoread;
-	const struct attribute			**attrs;
-	struct list_head			demux_list;
-	void					*demux_bounce;
-	struct list_head			buffer_list;
-	struct kref				ref;
-	unsigned int				watermark;
+	/** @length: Number of datums in buffer. */
+	int length;
+
+	/**  @bytes_per_datum: Size of individual datum including timestamp. */
+	int bytes_per_datum;
+
+	/**
+	 * @access: Buffer access functions associated with the
+	 * implementation.
+	 */
+	const struct iio_buffer_access_funcs *access;
+
+	/** @scan_mask: Bitmask used in masking scan mode elements. */
+	long *scan_mask;
+
+	/** @demux_list: List of operations required to demux the scan. */
+	struct list_head demux_list;
+
+	/** @pollq: Wait queue to allow for polling on the buffer. */
+	wait_queue_head_t pollq;
+
+	/** @watermark: Number of datums to wait for poll/read. */
+	unsigned int watermark;
+
+	/* private: */
+	/*
+	 * @scan_el_attrs: Control of scan elements if that scan mode
+	 * control method is used.
+	 */
+	struct attribute_group *scan_el_attrs;
+
+	/* @scan_timestamp: Does the scan mode include a timestamp. */
+	bool scan_timestamp;
+
+	/* @scan_el_dev_attr_list: List of scan element related attributes. */
+	struct list_head scan_el_dev_attr_list;
+
+	/* @buffer_group: Attributes of the buffer group. */
+	struct attribute_group buffer_group;
+
+	/*
+	 * @scan_el_group: Attribute group for those attributes not
+	 * created from the iio_chan_info array.
+	 */
+	struct attribute_group scan_el_group;
+
+	/* @stufftoread: Flag to indicate new data. */
+	bool stufftoread;
+
+	/* @attrs: Standard attributes of the buffer. */
+	const struct attribute **attrs;
+
+	/* @demux_bounce: Buffer for doing gather from incoming scan. */
+	void *demux_bounce;
+
+	/* @buffer_list: Entry in the devices list of current buffers. */
+	struct list_head buffer_list;
+
+	/* @ref: Reference count of the buffer. */
+	struct kref ref;
 };
 
 /**
-- 
cgit v1.2.3


From 9f4667776c138df33c4107fcd8811aa9cb6cdcbe Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:26 +0000
Subject: iio:buffer: Introduced a function to assign the buffer specific
 attrs.

This is a necessary step in taking the buffer implementation
opaque.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 4a65a7bb40a4..30ea9806db67 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -17,6 +17,8 @@
 
 struct iio_buffer;
 
+void iio_buffer_set_attrs(struct iio_buffer *buffer,
+			 const struct attribute **attrs);
 /**
  * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be
  *   configured. It has a fixed value which will be buffer specific.
-- 
cgit v1.2.3


From c2bf8d5f3262b3942bf923ef3b86d6ebe590821d Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:27 +0000
Subject: iio:buffer: Stop exporting iio_scan_mask_query

Nothing uses it outside of core code.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 30ea9806db67..635aa87eb5e9 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -161,9 +161,6 @@ int iio_update_buffers(struct iio_dev *indio_dev,
  **/
 void iio_buffer_init(struct iio_buffer *buffer);
 
-int iio_scan_mask_query(struct iio_dev *indio_dev,
-			struct iio_buffer *buffer, int bit);
-
 /**
  * iio_push_to_buffers() - push to a registered buffer.
  * @indio_dev:		iio_dev structure for device.
-- 
cgit v1.2.3


From 315a19eca0e7cbae1bef7f43b36fdcfc33f248f6 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:28 +0000
Subject: iio:buffers: Push some docs down into the .c file.

Ancient legacy of me doing it wrong which it is nice to clear
up whilst we are here.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 635aa87eb5e9..cd77ed14eb7f 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -161,11 +161,6 @@ int iio_update_buffers(struct iio_dev *indio_dev,
  **/
 void iio_buffer_init(struct iio_buffer *buffer);
 
-/**
- * iio_push_to_buffers() - push to a registered buffer.
- * @indio_dev:		iio_dev structure for device.
- * @data:		Full scan.
- */
 int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
 /*
-- 
cgit v1.2.3


From d4ad4f4b721ad76e28b73e32b8602c011e281893 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:29 +0000
Subject: iio:buffer:iio_push_to_buffers_with_timestamp fix kernel-doc.

Wrong start of kernel doc comment /* -> /**

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index cd77ed14eb7f..8c915c2c18f1 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -163,7 +163,7 @@ void iio_buffer_init(struct iio_buffer *buffer);
 
 int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
-/*
+/**
  * iio_push_to_buffers_with_timestamp() - push data and timestamp to buffers
  * @indio_dev:		iio_dev structure for device.
  * @data:		sample data
-- 
cgit v1.2.3


From 8abd5ba53962854c3a1c21d04fa6fdba54cc0ee1 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:30 +0000
Subject: iio:kfifo_buf header include push down.

As a precursor to splitting buffer.h, lets make sure all drivers
include the relevant headers rather than relying on picking them
up from kfifo_buf.h.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/kfifo_buf.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h
index 1683bc710d14..027cfa9c3703 100644
--- a/include/linux/iio/kfifo_buf.h
+++ b/include/linux/iio/kfifo_buf.h
@@ -1,9 +1,8 @@
 #ifndef __LINUX_IIO_KFIFO_BUF_H__
 #define __LINUX_IIO_KFIFO_BUF_H__
 
-#include <linux/kfifo.h>
-#include <linux/iio/iio.h>
-#include <linux/iio/buffer.h>
+struct iio_buffer;
+struct device;
 
 struct iio_buffer *iio_kfifo_allocate(void);
 void iio_kfifo_free(struct iio_buffer *r);
-- 
cgit v1.2.3


From 2b827ad54111439b919605da90e122ecda88bad6 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:32 +0000
Subject: iio:buffer: Push implementation of iio_device_attach_buffer into .c
 file

This is a precursor to the splitting of buffer.h into parts relevant
to buffer implementation vs those for devices using buffers.
struct buffer is about to become opaque as far as the header is
concerned.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 8c915c2c18f1..5cdfe2b7d674 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -194,20 +194,8 @@ bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer);
 void iio_buffer_put(struct iio_buffer *buffer);
 
-/**
- * iio_device_attach_buffer - Attach a buffer to a IIO device
- * @indio_dev: The device the buffer should be attached to
- * @buffer: The buffer to attach to the device
- *
- * This function attaches a buffer to a IIO device. The buffer stays attached to
- * the device until the device is freed. The function should only be called at
- * most once per device.
- */
-static inline void iio_device_attach_buffer(struct iio_dev *indio_dev,
-	struct iio_buffer *buffer)
-{
-	indio_dev->buffer = iio_buffer_get(buffer);
-}
+void iio_device_attach_buffer(struct iio_dev *indio_dev,
+			      struct iio_buffer *buffer);
 
 #else /* CONFIG_IIO_BUFFER */
 
-- 
cgit v1.2.3


From 33dd94cb972175249258329c4aaffddcc82c2005 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Mon, 2 Jan 2017 19:28:34 +0000
Subject: iio:buffer.h - split into buffer.h and buffer_impl.h

buffer.h supplies everything needed for devices using buffers.
buffer_impl.h supplies access to the internals as needed to write
a buffer implementation.

This was really motivated by the mess that turned up in the
kernel-doc documentation pulled in by the new sphinx docs.
It made it clear that our logical separations in headers were
generally terrible.  The buffer case was easy to sort out without
greatly effecting drivers so here it is.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
---
 include/linux/iio/buffer.h      | 156 +-------------------------------------
 include/linux/iio/buffer_impl.h | 162 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 155 deletions(-)
 create mode 100644 include/linux/iio/buffer_impl.h

(limited to 'include/linux')

diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 5cdfe2b7d674..48767c776119 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -11,155 +11,11 @@
 #define _IIO_BUFFER_GENERIC_H_
 #include <linux/sysfs.h>
 #include <linux/iio/iio.h>
-#include <linux/kref.h>
-
-#ifdef CONFIG_IIO_BUFFER
 
 struct iio_buffer;
 
 void iio_buffer_set_attrs(struct iio_buffer *buffer,
 			 const struct attribute **attrs);
-/**
- * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be
- *   configured. It has a fixed value which will be buffer specific.
- */
-#define INDIO_BUFFER_FLAG_FIXED_WATERMARK BIT(0)
-
-/**
- * struct iio_buffer_access_funcs - access functions for buffers.
- * @store_to:		actually store stuff to the buffer
- * @read_first_n:	try to get a specified number of bytes (must exist)
- * @data_available:	indicates how much data is available for reading from
- *			the buffer.
- * @request_update:	if a parameter change has been marked, update underlying
- *			storage.
- * @set_bytes_per_datum:set number of bytes per datum
- * @set_length:		set number of datums in buffer
- * @enable:             called if the buffer is attached to a device and the
- *                      device starts sampling. Calls are balanced with
- *                      @disable.
- * @disable:            called if the buffer is attached to a device and the
- *                      device stops sampling. Calles are balanced with @enable.
- * @release:		called when the last reference to the buffer is dropped,
- *			should free all resources allocated by the buffer.
- * @modes:		Supported operating modes by this buffer type
- * @flags:		A bitmask combination of INDIO_BUFFER_FLAG_*
- *
- * The purpose of this structure is to make the buffer element
- * modular as event for a given driver, different usecases may require
- * different buffer designs (space efficiency vs speed for example).
- *
- * It is worth noting that a given buffer implementation may only support a
- * small proportion of these functions.  The core code 'should' cope fine with
- * any of them not existing.
- **/
-struct iio_buffer_access_funcs {
-	int (*store_to)(struct iio_buffer *buffer, const void *data);
-	int (*read_first_n)(struct iio_buffer *buffer,
-			    size_t n,
-			    char __user *buf);
-	size_t (*data_available)(struct iio_buffer *buffer);
-
-	int (*request_update)(struct iio_buffer *buffer);
-
-	int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd);
-	int (*set_length)(struct iio_buffer *buffer, int length);
-
-	int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
-	int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
-
-	void (*release)(struct iio_buffer *buffer);
-
-	unsigned int modes;
-	unsigned int flags;
-};
-
-/**
- * struct iio_buffer - general buffer structure
- *
- * Note that the internals of this structure should only be of interest to
- * those writing new buffer implementations.
- */
-struct iio_buffer {
-	/** @length: Number of datums in buffer. */
-	int length;
-
-	/**  @bytes_per_datum: Size of individual datum including timestamp. */
-	int bytes_per_datum;
-
-	/**
-	 * @access: Buffer access functions associated with the
-	 * implementation.
-	 */
-	const struct iio_buffer_access_funcs *access;
-
-	/** @scan_mask: Bitmask used in masking scan mode elements. */
-	long *scan_mask;
-
-	/** @demux_list: List of operations required to demux the scan. */
-	struct list_head demux_list;
-
-	/** @pollq: Wait queue to allow for polling on the buffer. */
-	wait_queue_head_t pollq;
-
-	/** @watermark: Number of datums to wait for poll/read. */
-	unsigned int watermark;
-
-	/* private: */
-	/*
-	 * @scan_el_attrs: Control of scan elements if that scan mode
-	 * control method is used.
-	 */
-	struct attribute_group *scan_el_attrs;
-
-	/* @scan_timestamp: Does the scan mode include a timestamp. */
-	bool scan_timestamp;
-
-	/* @scan_el_dev_attr_list: List of scan element related attributes. */
-	struct list_head scan_el_dev_attr_list;
-
-	/* @buffer_group: Attributes of the buffer group. */
-	struct attribute_group buffer_group;
-
-	/*
-	 * @scan_el_group: Attribute group for those attributes not
-	 * created from the iio_chan_info array.
-	 */
-	struct attribute_group scan_el_group;
-
-	/* @stufftoread: Flag to indicate new data. */
-	bool stufftoread;
-
-	/* @attrs: Standard attributes of the buffer. */
-	const struct attribute **attrs;
-
-	/* @demux_bounce: Buffer for doing gather from incoming scan. */
-	void *demux_bounce;
-
-	/* @buffer_list: Entry in the devices list of current buffers. */
-	struct list_head buffer_list;
-
-	/* @ref: Reference count of the buffer. */
-	struct kref ref;
-};
-
-/**
- * iio_update_buffers() - add or remove buffer from active list
- * @indio_dev:		device to add buffer to
- * @insert_buffer:	buffer to insert
- * @remove_buffer:	buffer_to_remove
- *
- * Note this will tear down the all buffering and build it up again
- */
-int iio_update_buffers(struct iio_dev *indio_dev,
-		       struct iio_buffer *insert_buffer,
-		       struct iio_buffer *remove_buffer);
-
-/**
- * iio_buffer_init() - Initialize the buffer structure
- * @buffer:		buffer to be initialized
- **/
-void iio_buffer_init(struct iio_buffer *buffer);
 
 int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
@@ -189,19 +45,9 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev,
 }
 
 bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
-	const unsigned long *mask);
-
-struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer);
-void iio_buffer_put(struct iio_buffer *buffer);
+				   const unsigned long *mask);
 
 void iio_device_attach_buffer(struct iio_dev *indio_dev,
 			      struct iio_buffer *buffer);
 
-#else /* CONFIG_IIO_BUFFER */
-
-static inline void iio_buffer_get(struct iio_buffer *buffer) {}
-static inline void iio_buffer_put(struct iio_buffer *buffer) {}
-
-#endif /* CONFIG_IIO_BUFFER */
-
 #endif /* _IIO_BUFFER_GENERIC_H_ */
diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
new file mode 100644
index 000000000000..8daba198fafa
--- /dev/null
+++ b/include/linux/iio/buffer_impl.h
@@ -0,0 +1,162 @@
+#ifndef _IIO_BUFFER_GENERIC_IMPL_H_
+#define _IIO_BUFFER_GENERIC_IMPL_H_
+#include <linux/sysfs.h>
+#include <linux/kref.h>
+
+#ifdef CONFIG_IIO_BUFFER
+
+struct iio_dev;
+struct iio_buffer;
+
+/**
+ * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be
+ *   configured. It has a fixed value which will be buffer specific.
+ */
+#define INDIO_BUFFER_FLAG_FIXED_WATERMARK BIT(0)
+
+/**
+ * struct iio_buffer_access_funcs - access functions for buffers.
+ * @store_to:		actually store stuff to the buffer
+ * @read_first_n:	try to get a specified number of bytes (must exist)
+ * @data_available:	indicates how much data is available for reading from
+ *			the buffer.
+ * @request_update:	if a parameter change has been marked, update underlying
+ *			storage.
+ * @set_bytes_per_datum:set number of bytes per datum
+ * @set_length:		set number of datums in buffer
+ * @enable:             called if the buffer is attached to a device and the
+ *                      device starts sampling. Calls are balanced with
+ *                      @disable.
+ * @disable:            called if the buffer is attached to a device and the
+ *                      device stops sampling. Calles are balanced with @enable.
+ * @release:		called when the last reference to the buffer is dropped,
+ *			should free all resources allocated by the buffer.
+ * @modes:		Supported operating modes by this buffer type
+ * @flags:		A bitmask combination of INDIO_BUFFER_FLAG_*
+ *
+ * The purpose of this structure is to make the buffer element
+ * modular as event for a given driver, different usecases may require
+ * different buffer designs (space efficiency vs speed for example).
+ *
+ * It is worth noting that a given buffer implementation may only support a
+ * small proportion of these functions.  The core code 'should' cope fine with
+ * any of them not existing.
+ **/
+struct iio_buffer_access_funcs {
+	int (*store_to)(struct iio_buffer *buffer, const void *data);
+	int (*read_first_n)(struct iio_buffer *buffer,
+			    size_t n,
+			    char __user *buf);
+	size_t (*data_available)(struct iio_buffer *buffer);
+
+	int (*request_update)(struct iio_buffer *buffer);
+
+	int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd);
+	int (*set_length)(struct iio_buffer *buffer, int length);
+
+	int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
+	int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev);
+
+	void (*release)(struct iio_buffer *buffer);
+
+	unsigned int modes;
+	unsigned int flags;
+};
+
+/**
+ * struct iio_buffer - general buffer structure
+ *
+ * Note that the internals of this structure should only be of interest to
+ * those writing new buffer implementations.
+ */
+struct iio_buffer {
+	/** @length: Number of datums in buffer. */
+	int length;
+
+	/**  @bytes_per_datum: Size of individual datum including timestamp. */
+	int bytes_per_datum;
+
+	/**
+	 * @access: Buffer access functions associated with the
+	 * implementation.
+	 */
+	const struct iio_buffer_access_funcs *access;
+
+	/** @scan_mask: Bitmask used in masking scan mode elements. */
+	long *scan_mask;
+
+	/** @demux_list: List of operations required to demux the scan. */
+	struct list_head demux_list;
+
+	/** @pollq: Wait queue to allow for polling on the buffer. */
+	wait_queue_head_t pollq;
+
+	/** @watermark: Number of datums to wait for poll/read. */
+	unsigned int watermark;
+
+	/* private: */
+	/*
+	 * @scan_el_attrs: Control of scan elements if that scan mode
+	 * control method is used.
+	 */
+	struct attribute_group *scan_el_attrs;
+
+	/* @scan_timestamp: Does the scan mode include a timestamp. */
+	bool scan_timestamp;
+
+	/* @scan_el_dev_attr_list: List of scan element related attributes. */
+	struct list_head scan_el_dev_attr_list;
+
+	/* @buffer_group: Attributes of the buffer group. */
+	struct attribute_group buffer_group;
+
+	/*
+	 * @scan_el_group: Attribute group for those attributes not
+	 * created from the iio_chan_info array.
+	 */
+	struct attribute_group scan_el_group;
+
+	/* @stufftoread: Flag to indicate new data. */
+	bool stufftoread;
+
+	/* @attrs: Standard attributes of the buffer. */
+	const struct attribute **attrs;
+
+	/* @demux_bounce: Buffer for doing gather from incoming scan. */
+	void *demux_bounce;
+
+	/* @buffer_list: Entry in the devices list of current buffers. */
+	struct list_head buffer_list;
+
+	/* @ref: Reference count of the buffer. */
+	struct kref ref;
+};
+
+/**
+ * iio_update_buffers() - add or remove buffer from active list
+ * @indio_dev:		device to add buffer to
+ * @insert_buffer:	buffer to insert
+ * @remove_buffer:	buffer_to_remove
+ *
+ * Note this will tear down the all buffering and build it up again
+ */
+int iio_update_buffers(struct iio_dev *indio_dev,
+		       struct iio_buffer *insert_buffer,
+		       struct iio_buffer *remove_buffer);
+
+/**
+ * iio_buffer_init() - Initialize the buffer structure
+ * @buffer:		buffer to be initialized
+ **/
+void iio_buffer_init(struct iio_buffer *buffer);
+
+struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer);
+void iio_buffer_put(struct iio_buffer *buffer);
+
+#else /* CONFIG_IIO_BUFFER */
+
+static inline void iio_buffer_get(struct iio_buffer *buffer) {}
+static inline void iio_buffer_put(struct iio_buffer *buffer) {}
+
+#endif /* CONFIG_IIO_BUFFER */
+#endif /* _IIO_BUFFER_GENERIC_IMPL_H_ */
-- 
cgit v1.2.3


From ccb61f8a99e6c29df4fb96a65dad4fad740d5be9 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Thu, 22 Dec 2016 16:54:00 -0800
Subject: Drivers: hv: vmbus: Fix a rescind handling bug

The host can rescind a channel that has been offered to the
guest and once the channel is rescinded, the host does not
respond to any requests on that channel. Deal with the case where
the guest may be blocked waiting for a response from the host.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42fe43fb0c80..7ea20bd7cdd1 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -627,6 +627,7 @@ struct vmbus_channel_msginfo {
 
 	/* Synchronize the request/response if needed */
 	struct completion  waitevent;
+	struct vmbus_channel *waiting_channel;
 	union {
 		struct vmbus_channel_version_supported version_supported;
 		struct vmbus_channel_open_result open_result;
-- 
cgit v1.2.3


From 874bcd00f520cac297aefade201c4efc07fc8d17 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Thu, 15 Dec 2016 20:36:33 +0100
Subject: apm-emulation: move APM_MINOR_DEV to include/linux/miscdevice.h

This patch move the define for APM_MINOR_DEV to include/linux/miscdevice.h
It is better that all minor number definitions are in the same place.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/miscdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index ed30d5d713e3..0590263c462c 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -22,6 +22,7 @@
 /*#define ADB_MOUSE_MINOR	10	FIXME OBSOLETE */
 #define WATCHDOG_MINOR		130	/* Watchdog timer     */
 #define TEMP_MINOR		131	/* Temperature Sensor */
+#define APM_MINOR_DEV		134
 #define RTC_MINOR		135
 #define EFI_RTC_MINOR		136	/* EFI Time services */
 #define VHCI_MINOR		137
-- 
cgit v1.2.3


From b0f2d7d546d37697d3f50753904f6f0c549b62bc Mon Sep 17 00:00:00 2001
From: Martyn Welch <martyn@welchs.me.uk>
Date: Sat, 10 Dec 2016 23:10:41 +0000
Subject: VME: Remove node entry from vme_driver

The vme_driver structure currently has a "node" entry. This entry is
never used and it's intended purpose has been lost to the mists of time.

Remove the entry from vme_driver to avoid confusion.

Signed-off-by: Martyn Welch <martyn.welch@collabora.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/vme.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vme.h b/include/linux/vme.h
index 8c589176c2f8..ec5e8bf6118e 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -108,7 +108,6 @@ struct vme_dev {
 };
 
 struct vme_driver {
-	struct list_head node;
 	const char *name;
 	int (*match)(struct vme_dev *);
 	int (*probe)(struct vme_dev *);
-- 
cgit v1.2.3


From e41746b0dd51f353630112558835aa62c1eb883e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 14 Dec 2016 15:33:39 +0000
Subject: debugfs: improve formatting of debugfs_real_fops()

Type of debugfs_real_fops() is longer than parameters and
the name, so there is no way to break the declaration nicely.
We have to go over 80 characters.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 014cc564d1c4..7574e86ff1eb 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -52,8 +52,7 @@ extern struct srcu_struct debugfs_srcu;
  * Must only be called under the protection established by
  * debugfs_use_file_start().
  */
-static inline const struct file_operations *
-debugfs_real_fops(const struct file *filp)
+static inline const struct file_operations *debugfs_real_fops(const struct file *filp)
 	__must_hold(&debugfs_srcu)
 {
 	/*
-- 
cgit v1.2.3


From a0244a8df15d8caee8831872b6fc97b676cb15bd Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 15 Dec 2016 19:55:54 +0100
Subject: kref: prefer atomic_inc_not_zero to atomic_add_unless

On most platforms, there exists this ifdef:

 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)

This makes this patch functionally useless. However, on PPC, there is
actually an explicit definition of atomic_inc_not_zero with its own
assembly that is slightly more optimized than atomic_add_unless. So,
this patch changes kref to use atomic_inc_not_zero instead, for PPC and
any future platforms that might provide an explicit implementation.

This also puts this usage of kref more in line with a verbatim reading
of the examples in Paul McKenney's paper [1] in the section titled "2.4
Atomic Counting With Check and Release Memory Barrier", which uses
atomic_inc_not_zero.

[1] http://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2167.pdf

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kref.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index e15828fd71f1..62f0a84ae94e 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -133,6 +133,6 @@ static inline int kref_put_mutex(struct kref *kref,
  */
 static inline int __must_check kref_get_unless_zero(struct kref *kref)
 {
-	return atomic_add_unless(&kref->refcount, 1, 0);
+	return atomic_inc_not_zero(&kref->refcount);
 }
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From bb5b06750f1d9b2d632d942d8a72b35a4dd930b9 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 4 Jan 2017 13:56:28 +0530
Subject: gpio: davinci: Remove redundant members davinci_gpio_controller stuct

davinci_gpio_controller struct has set_data, in_data, clr_data
members that are assigned and never used.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 6ace3fd32b6a..44ca5303849c 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -33,9 +33,6 @@ struct davinci_gpio_controller {
 	/* Serialize access to GPIO registers */
 	spinlock_t		lock;
 	void __iomem		*regs;
-	void __iomem		*set_data;
-	void __iomem		*clr_data;
-	void __iomem		*in_data;
 	int			gpio_unbanked;
 	unsigned		gpio_irq;
 };
-- 
cgit v1.2.3


From 568c5fe5a54f2654f5a4c599c45b8a62ed9a2013 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 10 Jan 2017 13:35:42 -0800
Subject: mm: Introduce lm_alias

Certain architectures may have the kernel image mapped separately to
alias the linear map. Introduce a macro lm_alias to translate a kernel
image symbol into its linear alias. This is used in part with work to
add CONFIG_DEBUG_VIRTUAL support for arm64.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe6b4036664a..5dc9c4650522 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly;
 #define page_to_virt(x)	__va(PFN_PHYS(page_to_pfn(x)))
 #endif
 
+#ifndef lm_alias
+#define lm_alias(x)	__va(__pa_symbol(x))
+#endif
+
 /*
  * To prevent common memory management code establishing
  * a zero page mapping on a read fault.
-- 
cgit v1.2.3


From cbbd86075a7e32b6e67f77d374b9bb53841ab1c7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Wed, 11 Jan 2017 17:09:50 +0100
Subject: video: fbdev: imxfb: remove the macros for initializing the DMACR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current definitions of DMACR_HM() and DMACR_TM() are correct only
for imx1, they're wrong for imx21.

The macros are meant for legacy board files only, they're not applicable
for boards using device tree.

At the moment, there are no boards using these macros. So it should be
safe to drop them rather than making them work for both imx1 and imx21,
which would require a change in the platform data struct.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/platform_data/video-imxfb.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h
index 18e908324549..a5c0a71ec914 100644
--- a/include/linux/platform_data/video-imxfb.h
+++ b/include/linux/platform_data/video-imxfb.h
@@ -47,10 +47,6 @@
 #define LSCR1_GRAY2(x)            (((x) & 0xf) << 4)
 #define LSCR1_GRAY1(x)            (((x) & 0xf))
 
-#define DMACR_BURST	(1 << 31)
-#define DMACR_HM(x)	(((x) & 0xf) << 16)
-#define DMACR_TM(x)	((x) & 0xf)
-
 struct imx_fb_videomode {
 	struct fb_videomode mode;
 	u32 pcr;
-- 
cgit v1.2.3


From 15df6d98ec3b40775918fc6ef73d7f1c2d0cf870 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 10 Jan 2017 18:48:12 +0100
Subject: power: supply: axp20x_usb_power: fix warning on 64bit

Casting of_device_get_match_data return value to int causes warning on 64bit
architectures.

../drivers/power/supply/axp20x_usb_power.c: In function
'axp20x_usb_power_probe':
../drivers/power/supply/axp20x_usb_power.c:297:21: warning: cast from
pointer to integer of different size [-Wpointer-to-int-cast]

Fixes: 0dcc70ca8644 ("power: supply: axp20x_usb_power: use of_device_id
    data field instead of device_is_compatible")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/mfd/axp20x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 812806d6319b..f848ee86a339 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -13,7 +13,7 @@
 
 #include <linux/regmap.h>
 
-enum {
+enum axp20x_variants {
 	AXP152_ID = 0,
 	AXP202_ID,
 	AXP209_ID,
-- 
cgit v1.2.3


From 818e3012c2eac4885bf7278c5144a14186d742d2 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:38 +1100
Subject: power: supply: bq27xxx: rename BQ27500 allow for deprecation in
 future.

The BQ2750X definition exists only to satisfy backwards compatibility.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index bed9557b69e7..4c904d00564c 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -4,7 +4,7 @@
 enum bq27xxx_chip {
 	BQ27000 = 1, /* bq27000, bq27200 */
 	BQ27010, /* bq27010, bq27210 */
-	BQ27500, /* bq27500 */
+	BQ2750X, /* bq27500 deprecated alias */
 	BQ27510, /* bq27510, bq27520 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
-- 
cgit v1.2.3


From 6da6e4bdd383d8efdf50da6d0933a16ad3a590f6 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:39 +1100
Subject: power: supply: bq27xxx: rename BQ27510 allow for deprecation in
 future.

The BQ2751X definition exists only to satisfy backwards compatibility.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 4c904d00564c..651f265837a4 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -5,7 +5,7 @@ enum bq27xxx_chip {
 	BQ27000 = 1, /* bq27000, bq27200 */
 	BQ27010, /* bq27010, bq27210 */
 	BQ2750X, /* bq27500 deprecated alias */
-	BQ27510, /* bq27510, bq27520 */
+	BQ2751X, /* bq27510, bq27520 deprecated alias */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 32833635b0fe9bf71bbf867d1c3abfb5b006bf29 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:40 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27500/1 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27500 chip definition to specifically match the
bq27500/1 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 651f265837a4..76b623d7a9a4 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -6,6 +6,7 @@ enum bq27xxx_chip {
 	BQ27010, /* bq27010, bq27210 */
 	BQ2750X, /* bq27500 deprecated alias */
 	BQ2751X, /* bq27510, bq27520 deprecated alias */
+	BQ27500, /* bq27500/1 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From bd28177f3ec8367fbb3c56cfcf1c1a46e3fe240a Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:41 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27510-g1
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27510G1 chip definition to specifically match the
bq27510-G1 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 76b623d7a9a4..3af0815426c9 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -7,6 +7,7 @@ enum bq27xxx_chip {
 	BQ2750X, /* bq27500 deprecated alias */
 	BQ2751X, /* bq27510, bq27520 deprecated alias */
 	BQ27500, /* bq27500/1 */
+	BQ27510G1, /* bq27510G1 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 698a2bf5fc31d85d428a2ae495775b61381a495e Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:42 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27510-g2
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27510G2 chip definition to specifically match the
bq27510-G2 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 3af0815426c9..79772ca231b1 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -8,6 +8,7 @@ enum bq27xxx_chip {
 	BQ2751X, /* bq27510, bq27520 deprecated alias */
 	BQ27500, /* bq27500/1 */
 	BQ27510G1, /* bq27510G1 */
+	BQ27510G2, /* bq27510G2 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 71375aa7d6a7392d4968f6a562b437cb4958f956 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:43 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27510-g3
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27510G3 chip definition to specifically match the
bq27510-G3 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Tested-by: Chris Lapa <chris@lapa.com.au>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 79772ca231b1..c45ce71a6158 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -9,6 +9,7 @@ enum bq27xxx_chip {
 	BQ27500, /* bq27500/1 */
 	BQ27510G1, /* bq27510G1 */
 	BQ27510G2, /* bq27510G2 */
+	BQ27510G3, /* bq27510G3 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 68f2a813eb25bd0ef72453aeef9b1ce156157d14 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:44 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27520-g1
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27520G1 chip definition to specifically match the
bq27520-G1 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index c45ce71a6158..eddd96936875 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -10,6 +10,7 @@ enum bq27xxx_chip {
 	BQ27510G1, /* bq27510G1 */
 	BQ27510G2, /* bq27510G2 */
 	BQ27510G3, /* bq27510G3 */
+	BQ27520G1, /* bq27520G1 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From a5deb9a93040a4a221ef8a67c88ecd72cd1f3625 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:45 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27520-g2
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27520G2 chip definition to specifically match the
bq27520-G2 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index eddd96936875..a3fc34a4ef72 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -11,6 +11,7 @@ enum bq27xxx_chip {
 	BQ27510G2, /* bq27510G2 */
 	BQ27510G3, /* bq27510G3 */
 	BQ27520G1, /* bq27520G1 */
+	BQ27520G2, /* bq27520G2 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 825e915ba2e811b91a034ac6290cd172387c5447 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:46 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27520-g3
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27520G3 chip definition to specifically match the
bq27520-G3 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index a3fc34a4ef72..5c12717d0f75 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -12,6 +12,7 @@ enum bq27xxx_chip {
 	BQ27510G3, /* bq27510G3 */
 	BQ27520G1, /* bq27520G1 */
 	BQ27520G2, /* bq27520G2 */
+	BQ27520G3, /* bq27520G3 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 8835cae5f2abd7f7a3143afe357f416aff5517a4 Mon Sep 17 00:00:00 2001
From: Chris Lapa <chris@lapa.com.au>
Date: Wed, 11 Jan 2017 12:44:47 +1100
Subject: power: supply: bq27xxx: adds specific support for bq27520-g4
 revision.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the BQ27520G4 chip definition to specifically match the
bq27520-G4 functionality as described in the datasheet.

Signed-off-by: Chris Lapa <chris@lapa.com.au>
Acked-by: Pali Rohár <pali.rohar@gmail.com>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power/bq27xxx_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 5c12717d0f75..b312bcef53da 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -13,6 +13,7 @@ enum bq27xxx_chip {
 	BQ27520G1, /* bq27520G1 */
 	BQ27520G2, /* bq27520G2 */
 	BQ27520G3, /* bq27520G3 */
+	BQ27520G4, /* bq27520G4 */
 	BQ27530, /* bq27530, bq27531 */
 	BQ27541, /* bq27541, bq27542, bq27546, bq27742 */
 	BQ27545, /* bq27545 */
-- 
cgit v1.2.3


From 729204ef49ec00b788ce23deb9eb922a5769f55d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Sat, 17 Dec 2016 18:49:09 +0800
Subject: block: relax check on sg gap

If the last bvec of the 1st bio and the 1st bvec of the next
bio are physically contigious, and the latter can be merged
to last segment of the 1st bio, we should think they don't
violate sg gap(or virt boundary) limit.

Both Vitaly and Dexuan reported lots of unmergeable small bios
are observed when running mkfs on Hyper-V virtual storage, and
performance becomes quite low. This patch fixes that performance
issue.

The same issue should exist on NVMe, since it sets virt boundary too.

Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reported-by: Dexuan Cui <decui@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Cc: Keith Busch <keith.busch@intel.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 83695641bd5e..b20da8dfa7ec 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1607,6 +1607,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
+/*
+ * Check if the two bvecs from two bios can be merged to one segment.
+ * If yes, no need to check gap between the two bios since the 1st bio
+ * and the 1st bvec in the 2nd bio can be handled in one segment.
+ */
+static inline bool bios_segs_mergeable(struct request_queue *q,
+		struct bio *prev, struct bio_vec *prev_last_bv,
+		struct bio_vec *next_first_bv)
+{
+	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
+		return false;
+	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
+		return false;
+	if (prev->bi_seg_back_size + next_first_bv->bv_len >
+			queue_max_segment_size(q))
+		return false;
+	return true;
+}
+
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
@@ -1616,7 +1635,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 		bio_get_last_bvec(prev, &pb);
 		bio_get_first_bvec(next, &nb);
 
-		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+		if (!bios_segs_mergeable(q, prev, &pb, &nb))
+			return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
 	}
 
 	return false;
-- 
cgit v1.2.3


From f8a5b12247fe18f7fed801ad262a7ab190e1f848 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 13 Dec 2016 09:24:51 -0700
Subject: blk-mq: make mq_ops a const pointer

We never change it, make that clear.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
---
 include/linux/blk-mq.h | 2 +-
 include/linux/blkdev.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4a2ab5d99ff7..afc81d77e471 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -60,7 +60,7 @@ struct blk_mq_hw_ctx {
 
 struct blk_mq_tag_set {
 	unsigned int		*mq_map;
-	struct blk_mq_ops	*ops;
+	const struct blk_mq_ops	*ops;
 	unsigned int		nr_hw_queues;
 	unsigned int		queue_depth;	/* max hw supported */
 	unsigned int		reserved_tags;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b20da8dfa7ec..2e99d659b0f1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -407,7 +407,7 @@ struct request_queue {
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
 
-	struct blk_mq_ops	*mq_ops;
+	const struct blk_mq_ops	*mq_ops;
 
 	unsigned int		*mq_map;
 
-- 
cgit v1.2.3


From 607904c357c61adf20b8fd18af765e501d61a385 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 9 Jan 2017 10:26:52 -0500
Subject: locking/spinlocks: Remove the unused spin_lock_bh_nested() API

The spin_lock_bh_nested() API is defined but is not used anywhere
in the kernel. So all spin_lock_bh_nested() and related APIs are
now removed.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1483975612-16447-1-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h         | 8 --------
 include/linux/spinlock_api_smp.h | 2 --
 include/linux/spinlock_api_up.h  | 1 -
 3 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 47dd0cebd204..59248dcc6ef3 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define raw_spin_lock_nested(lock, subclass) \
 	_raw_spin_lock_nested(lock, subclass)
-# define raw_spin_lock_bh_nested(lock, subclass) \
-	_raw_spin_lock_bh_nested(lock, subclass)
 
 # define raw_spin_lock_nest_lock(lock, nest_lock)			\
 	 do {								\
@@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 # define raw_spin_lock_nested(lock, subclass)		\
 	_raw_spin_lock(((void)(subclass), (lock)))
 # define raw_spin_lock_nest_lock(lock, nest_lock)	_raw_spin_lock(lock)
-# define raw_spin_lock_bh_nested(lock, subclass)	_raw_spin_lock_bh(lock)
 #endif
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
@@ -317,11 +314,6 @@ do {								\
 	raw_spin_lock_nested(spinlock_check(lock), subclass);	\
 } while (0)
 
-#define spin_lock_bh_nested(lock, subclass)			\
-do {								\
-	raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\
-} while (0)
-
 #define spin_lock_nest_lock(lock, nest_lock)				\
 do {									\
 	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 5344268e6e62..42dfab89e740 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr);
 void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)		__acquires(lock);
 void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
 								__acquires(lock);
-void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
-								__acquires(lock);
 void __lockfunc
 _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
 								__acquires(lock);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d3afef9d8dbe..d0d188861ad6 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -57,7 +57,6 @@
 
 #define _raw_spin_lock(lock)			__LOCK(lock)
 #define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
-#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock)
 #define _raw_read_lock(lock)			__LOCK(lock)
 #define _raw_write_lock(lock)			__LOCK(lock)
 #define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
-- 
cgit v1.2.3


From 6c0b2e833f1439ebcbd7258b655623c1aef23ffb Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 11 Jan 2017 16:32:17 +0200
Subject: soc: qcom: smem_state: Fix include for ERR_PTR()

The correct include file for getting errno constants and ERR_PTR() is
linux/err.h, rather than linux/errno.h, so fix the include.

Fixes: e8b123e60084 ("soc: qcom: smem_state: Add stubs for disabled smem_state")
Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
---
 include/linux/soc/qcom/smem_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/smem_state.h b/include/linux/soc/qcom/smem_state.h
index 7b88697929e9..b8478ee7a71f 100644
--- a/include/linux/soc/qcom/smem_state.h
+++ b/include/linux/soc/qcom/smem_state.h
@@ -1,7 +1,7 @@
 #ifndef __QCOM_SMEM_STATE__
 #define __QCOM_SMEM_STATE__
 
-#include <linux/errno.h>
+#include <linux/err.h>
 
 struct device_node;
 struct qcom_smem_state;
-- 
cgit v1.2.3


From 732dbf3a6104a3abfcfcd066dcaf89e5054ce009 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Dec 2016 08:31:34 +0100
Subject: serial: do not accept sysrq characters via serial port

many embedded boards have a disconnected TTL level serial which can
generate some garbage that can lead to spurious false sysrq detects.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 5def8e830fb0..58484fb35cc8 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -450,7 +450,7 @@ extern void uart_handle_cts_change(struct uart_port *uport,
 extern void uart_insert_char(struct uart_port *port, unsigned int status,
 		 unsigned int overrun, unsigned int ch, unsigned int flag);
 
-#ifdef SUPPORT_SYSRQ
+#if defined(SUPPORT_SYSRQ) && defined(CONFIG_MAGIC_SYSRQ_SERIAL)
 static inline int
 uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 {
-- 
cgit v1.2.3


From 738b35ccee1bcd7cf4af147edd76e7880533ad9f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 11 Jan 2017 21:13:02 -0800
Subject: net: core: Make netif_wake_subqueue a wrapper

netif_wake_subqueue() is duplicating the same thing that netif_tx_wake_queue()
does, so make it call it directly after looking up the queue from the index.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ebd9e2c12f44..97ae0ac513ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3106,7 +3106,19 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
 	return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
 }
 
-void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
+/**
+ *	netif_wake_subqueue - allow sending packets on subqueue
+ *	@dev: network device
+ *	@queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+	netif_tx_wake_queue(txq);
+}
 
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-- 
cgit v1.2.3


From 6b8cc1d11ef75c5b9c530b3d0d148f3c2dd25f93 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Jan 2017 11:51:32 +0100
Subject: bpf: pass original insn directly to convert_ctx_access

Currently, when calling convert_ctx_access() callback for the various
program types, we pass in insn->dst_reg, insn->src_reg, insn->off from
the original instruction. This information is needed to rewrite the
instruction that is based on the user ctx structure into a kernel
representation for the ctx. As we'd like to allow access size beyond
just BPF_W, we'd need also insn->code for that in order to decode the
original access size. Given that, lets just pass insn directly to the
convert_ctx_access() callback and work on that to not clutter the
callback with even more arguments we need to pass when everything is
already contained in insn. So lets go through that once, no functional
change.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 94ea8d2383e6..f8c3560b01db 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -161,9 +161,10 @@ struct bpf_verifier_ops {
 				enum bpf_reg_type *reg_type);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
-	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
-				  int src_reg, int ctx_off,
-				  struct bpf_insn *insn, struct bpf_prog *prog);
+	u32 (*convert_ctx_access)(enum bpf_access_type type,
+				  const struct bpf_insn *src,
+				  struct bpf_insn *dst,
+				  struct bpf_prog *prog);
 };
 
 struct bpf_prog_type_list {
-- 
cgit v1.2.3


From 3a2f5a59a695a73e0cde9a61e0feae5fa730e936 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 10 Jan 2017 12:28:32 -0500
Subject: security,selinux,smack: kill security_task_wait hook

As reported by yangshukui, a permission denial from security_task_wait()
can lead to a soft lockup in zap_pid_ns_processes() since it only expects
sys_wait4() to return 0 or -ECHILD. Further, security_task_wait() can
in general lead to zombies; in the absence of some way to automatically
reparent a child process upon a denial, the hook is not useful.  Remove
the security hook and its implementations in SELinux and Smack.  Smack
already removed its check from its hook.

Reported-by: yangshukui <yangshukui@huawei.com>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h | 7 -------
 include/linux/security.h  | 6 ------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0dde95900196..6fe7a5cb0be1 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -666,11 +666,6 @@
  *	@sig contains the signal value.
  *	@secid contains the sid of the process where the signal originated
  *	Return 0 if permission is granted.
- * @task_wait:
- *	Check permission before allowing a process to reap a child process @p
- *	and collect its status information.
- *	@p contains the task_struct for process.
- *	Return 0 if permission is granted.
  * @task_prctl:
  *	Check permission before performing a process control operation on the
  *	current process.
@@ -1507,7 +1502,6 @@ union security_list_options {
 	int (*task_movememory)(struct task_struct *p);
 	int (*task_kill)(struct task_struct *p, struct siginfo *info,
 				int sig, u32 secid);
-	int (*task_wait)(struct task_struct *p);
 	int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3,
 				unsigned long arg4, unsigned long arg5);
 	void (*task_to_inode)(struct task_struct *p, struct inode *inode);
@@ -1767,7 +1761,6 @@ struct security_hook_heads {
 	struct list_head task_getscheduler;
 	struct list_head task_movememory;
 	struct list_head task_kill;
-	struct list_head task_wait;
 	struct list_head task_prctl;
 	struct list_head task_to_inode;
 	struct list_head ipc_permission;
diff --git a/include/linux/security.h b/include/linux/security.h
index f4ebac117fa6..d3868f2ebada 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -332,7 +332,6 @@ int security_task_getscheduler(struct task_struct *p);
 int security_task_movememory(struct task_struct *p);
 int security_task_kill(struct task_struct *p, struct siginfo *info,
 			int sig, u32 secid);
-int security_task_wait(struct task_struct *p);
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			unsigned long arg4, unsigned long arg5);
 void security_task_to_inode(struct task_struct *p, struct inode *inode);
@@ -980,11 +979,6 @@ static inline int security_task_kill(struct task_struct *p,
 	return 0;
 }
 
-static inline int security_task_wait(struct task_struct *p)
-{
-	return 0;
-}
-
 static inline int security_task_prctl(int option, unsigned long arg2,
 				      unsigned long arg3,
 				      unsigned long arg4,
-- 
cgit v1.2.3


From d8c34b949d8c9f61e099e00f22770e400adf2b76 Mon Sep 17 00:00:00 2001
From: Gideon Israel Dsouza <gidisrael@gmail.com>
Date: Sat, 31 Dec 2016 21:26:23 +0530
Subject: crypto: Replaced gcc specific attributes with macros from compiler.h

Continuing from this commit: 52f5684c8e1e
("kernel: use macros from compiler.h instead of __attribute__((...))")

I submitted 4 total patches. They are part of task I've taken up to
increase compiler portability in the kernel. I've cleaned up the
subsystems under /kernel /mm /block and /security, this patch targets
/crypto.

There is <linux/compiler.h> which provides macros for various gcc specific
constructs. Eg: __weak for __attribute__((weak)). I've cleaned all
instances of gcc specific attributes with the right macros for the crypto
subsystem.

I had to make one additional change into compiler-gcc.h for the case when
one wants to use this: __attribute__((aligned) and not specify an alignment
factor. From the gcc docs, this will result in the largest alignment for
that data type on the target machine so I've named the macro
__aligned_largest. Please advise if another name is more appropriate.

Signed-off-by: Gideon Israel Dsouza <gidisrael@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/compiler-gcc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0444b1336268..fddd1a5eb322 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -116,6 +116,7 @@
  */
 #define __pure			__attribute__((pure))
 #define __aligned(x)		__attribute__((aligned(x)))
+#define __aligned_largest	__attribute__((aligned))
 #define __printf(a, b)		__attribute__((format(printf, a, b)))
 #define __scanf(a, b)		__attribute__((format(scanf, a, b)))
 #define __attribute_const__	__attribute__((__const__))
-- 
cgit v1.2.3


From 2cf8e2dfdf88363476f23bc600745250b94dbbed Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Thu, 12 Jan 2017 11:17:39 +0000
Subject: regmap: Fixup the kernel-doc comments on functions/structures

Most of the kernel-doc comments in regmap don't actually generate
correctly. This patch fixes up a few common issues, corrects some typos
and adds some missing argument descriptions.

The most common issues being using a : after the function name which
causes the short description to not render correctly and not separating
the long and short descriptions of the function. There are quite a few
instances of arguments not being described or given the wrong name as
well.

This patch doesn't fixup functions/structures that are currently missing
descriptions.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 115 ++++++++++++++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 9adc7b21903d..38a85d50fefd 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -39,12 +39,13 @@ enum regcache_type {
 };
 
 /**
- * Default value for a register.  We use an array of structs rather
- * than a simple array as many modern devices have very sparse
- * register maps.
+ * struct reg_default - Default value for a register.
  *
  * @reg: Register address.
  * @def: Register default value.
+ *
+ * We use an array of structs rather than a simple array as many modern devices
+ * have very sparse register maps.
  */
 struct reg_default {
 	unsigned int reg;
@@ -52,12 +53,14 @@ struct reg_default {
 };
 
 /**
- * Register/value pairs for sequences of writes with an optional delay in
- * microseconds to be applied after each write.
+ * struct reg_sequence - An individual write from a sequence of writes.
  *
  * @reg: Register address.
  * @def: Register value.
  * @delay_us: Delay to be applied after the register write in microseconds
+ *
+ * Register/value pairs for sequences of writes with an optional delay in
+ * microseconds to be applied after each write.
  */
 struct reg_sequence {
 	unsigned int reg;
@@ -97,6 +100,7 @@ struct reg_sequence {
 
 /**
  * regmap_read_poll_timeout - Poll until a condition is met or a timeout occurs
+ *
  * @map: Regmap to read from
  * @addr: Address to poll
  * @val: Unsigned integer variable to read the value into
@@ -145,8 +149,8 @@ enum regmap_endian {
 };
 
 /**
- * A register range, used for access related checks
- * (readable/writeable/volatile/precious checks)
+ * struct regmap_range - A register range, used for access related checks
+ *                       (readable/writeable/volatile/precious checks)
  *
  * @range_min: address of first register
  * @range_max: address of last register
@@ -158,16 +162,18 @@ struct regmap_range {
 
 #define regmap_reg_range(low, high) { .range_min = low, .range_max = high, }
 
-/*
- * A table of ranges including some yes ranges and some no ranges.
- * If a register belongs to a no_range, the corresponding check function
- * will return false. If a register belongs to a yes range, the corresponding
- * check function will return true. "no_ranges" are searched first.
+/**
+ * struct regmap_access_table - A table of register ranges for access checks
  *
  * @yes_ranges : pointer to an array of regmap ranges used as "yes ranges"
  * @n_yes_ranges: size of the above array
  * @no_ranges: pointer to an array of regmap ranges used as "no ranges"
  * @n_no_ranges: size of the above array
+ *
+ * A table of ranges including some yes ranges and some no ranges.
+ * If a register belongs to a no_range, the corresponding check function
+ * will return false. If a register belongs to a yes range, the corresponding
+ * check function will return true. "no_ranges" are searched first.
  */
 struct regmap_access_table {
 	const struct regmap_range *yes_ranges;
@@ -180,7 +186,7 @@ typedef void (*regmap_lock)(void *);
 typedef void (*regmap_unlock)(void *);
 
 /**
- * Configuration for the register map of a device.
+ * struct regmap_config - Configuration for the register map of a device.
  *
  * @name: Optional name of the regmap. Useful when a device has multiple
  *        register regions.
@@ -313,22 +319,24 @@ struct regmap_config {
 };
 
 /**
- * Configuration for indirectly accessed or paged registers.
- * Registers, mapped to this virtual range, are accessed in two steps:
- *     1. page selector register update;
- *     2. access through data window registers.
+ * struct regmap_range_cfg - Configuration for indirectly accessed or paged
+ *                           registers.
  *
  * @name: Descriptive name for diagnostics
  *
  * @range_min: Address of the lowest register address in virtual range.
  * @range_max: Address of the highest register in virtual range.
  *
- * @page_sel_reg: Register with selector field.
- * @page_sel_mask: Bit shift for selector value.
- * @page_sel_shift: Bit mask for selector value.
+ * @selector_reg: Register with selector field.
+ * @selector_mask: Bit shift for selector value.
+ * @selector_shift: Bit mask for selector value.
  *
  * @window_start: Address of first (lowest) register in data window.
  * @window_len: Number of registers in data window.
+ *
+ * Registers, mapped to this virtual range, are accessed in two steps:
+ *     1. page selector register update;
+ *     2. access through data window registers.
  */
 struct regmap_range_cfg {
 	const char *name;
@@ -371,7 +379,8 @@ typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
 /**
- * Description of a hardware bus for the register map infrastructure.
+ * struct regmap_bus - Description of a hardware bus for the register map
+ *                     infrastructure.
  *
  * @fast_io: Register IO is fast. Use a spinlock instead of a mutex
  *	     to perform locking. This field is ignored if custom lock/unlock
@@ -384,6 +393,10 @@ typedef void (*regmap_hw_free_context)(void *context);
  *               must serialise with respect to non-async I/O.
  * @reg_write: Write a single register value to the given register address. This
  *             write operation has to complete when returning from the function.
+ * @reg_update_bits: Update bits operation to be used against volatile
+ *                   registers, intended for devices supporting some mechanism
+ *                   for setting clearing bits without having to
+ *                   read/modify/write.
  * @read: Read operation.  Data is returned in the buffer used to transmit
  *         data.
  * @reg_read: Read a single register value from a given register address.
@@ -513,7 +526,7 @@ struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97,
 #endif
 
 /**
- * regmap_init(): Initialise register map
+ * regmap_init() - Initialise register map
  *
  * @dev: Device that will be interacted with
  * @bus: Bus-specific callbacks to use with device
@@ -531,7 +544,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 		      const struct regmap_config *config);
 
 /**
- * regmap_init_i2c(): Initialise register map
+ * regmap_init_i2c() - Initialise register map
  *
  * @i2c: Device that will be interacted with
  * @config: Configuration for register map
@@ -544,9 +557,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 				i2c, config)
 
 /**
- * regmap_init_spi(): Initialise register map
+ * regmap_init_spi() - Initialise register map
  *
- * @spi: Device that will be interacted with
+ * @dev: Device that will be interacted with
  * @config: Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer to
@@ -557,8 +570,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 				dev, config)
 
 /**
- * regmap_init_spmi_base(): Create regmap for the Base register space
- * @sdev:	SPMI device that will be interacted with
+ * regmap_init_spmi_base() - Create regmap for the Base register space
+ *
+ * @dev:	SPMI device that will be interacted with
  * @config:	Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer to
@@ -569,8 +583,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 				dev, config)
 
 /**
- * regmap_init_spmi_ext(): Create regmap for Ext register space
- * @sdev:	Device that will be interacted with
+ * regmap_init_spmi_ext() - Create regmap for Ext register space
+ *
+ * @dev:	Device that will be interacted with
  * @config:	Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer to
@@ -581,7 +596,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 				dev, config)
 
 /**
- * regmap_init_mmio_clk(): Initialise register map with register clock
+ * regmap_init_mmio_clk() - Initialise register map with register clock
  *
  * @dev: Device that will be interacted with
  * @clk_id: register clock consumer ID
@@ -596,7 +611,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 				dev, clk_id, regs, config)
 
 /**
- * regmap_init_mmio(): Initialise register map
+ * regmap_init_mmio() - Initialise register map
  *
  * @dev: Device that will be interacted with
  * @regs: Pointer to memory-mapped IO region
@@ -609,7 +624,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	regmap_init_mmio_clk(dev, NULL, regs, config)
 
 /**
- * regmap_init_ac97(): Initialise AC'97 register map
+ * regmap_init_ac97() - Initialise AC'97 register map
  *
  * @ac97: Device that will be interacted with
  * @config: Configuration for register map
@@ -623,7 +638,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 
 /**
- * devm_regmap_init(): Initialise managed register map
+ * devm_regmap_init() - Initialise managed register map
  *
  * @dev: Device that will be interacted with
  * @bus: Bus-specific callbacks to use with device
@@ -640,7 +655,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				dev, bus, bus_context, config)
 
 /**
- * devm_regmap_init_i2c(): Initialise managed register map
+ * devm_regmap_init_i2c() - Initialise managed register map
  *
  * @i2c: Device that will be interacted with
  * @config: Configuration for register map
@@ -654,9 +669,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				i2c, config)
 
 /**
- * devm_regmap_init_spi(): Initialise register map
+ * devm_regmap_init_spi() - Initialise register map
  *
- * @spi: Device that will be interacted with
+ * @dev: Device that will be interacted with
  * @config: Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer
@@ -668,8 +683,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				dev, config)
 
 /**
- * devm_regmap_init_spmi_base(): Create managed regmap for Base register space
- * @sdev:	SPMI device that will be interacted with
+ * devm_regmap_init_spmi_base() - Create managed regmap for Base register space
+ *
+ * @dev:	SPMI device that will be interacted with
  * @config:	Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer
@@ -681,8 +697,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				dev, config)
 
 /**
- * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space
- * @sdev:	SPMI device that will be interacted with
+ * devm_regmap_init_spmi_ext() - Create managed regmap for Ext register space
+ *
+ * @dev:	SPMI device that will be interacted with
  * @config:	Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer
@@ -694,7 +711,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				dev, config)
 
 /**
- * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
+ * devm_regmap_init_mmio_clk() - Initialise managed register map with clock
  *
  * @dev: Device that will be interacted with
  * @clk_id: register clock consumer ID
@@ -710,7 +727,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 				dev, clk_id, regs, config)
 
 /**
- * devm_regmap_init_mmio(): Initialise managed register map
+ * devm_regmap_init_mmio() - Initialise managed register map
  *
  * @dev: Device that will be interacted with
  * @regs: Pointer to memory-mapped IO region
@@ -724,7 +741,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	devm_regmap_init_mmio_clk(dev, NULL, regs, config)
 
 /**
- * devm_regmap_init_ac97(): Initialise AC'97 register map
+ * devm_regmap_init_ac97() - Initialise AC'97 register map
  *
  * @ac97: Device that will be interacted with
  * @config: Configuration for register map
@@ -799,7 +816,7 @@ bool regmap_reg_in_ranges(unsigned int reg,
 			  unsigned int nranges);
 
 /**
- * Description of an register field
+ * struct reg_field - Description of an register field
  *
  * @reg: Offset of the register within the regmap bank
  * @lsb: lsb of the register field.
@@ -840,7 +857,7 @@ int regmap_fields_update_bits_base(struct regmap_field *field,  unsigned int id,
 				   bool *change, bool async, bool force);
 
 /**
- * Description of an IRQ for the generic regmap irq_chip.
+ * struct regmap_irq - Description of an IRQ for the generic regmap irq_chip.
  *
  * @reg_offset: Offset of the status/mask register within the bank
  * @mask:       Mask used to flag/control the register.
@@ -860,9 +877,7 @@ struct regmap_irq {
 	[_irq] = { .reg_offset = (_off), .mask = (_mask) }
 
 /**
- * Description of a generic regmap irq_chip.  This is not intended to
- * handle every possible interrupt controller, but it should handle a
- * substantial proportion of those that are found in the wild.
+ * struct regmap_irq_chip - Description of a generic regmap irq_chip.
  *
  * @name:        Descriptive name for IRQ controller.
  *
@@ -896,6 +911,10 @@ struct regmap_irq {
  *		     after handling the interrupts in regmap_irq_handler().
  * @irq_drv_data:    Driver specific IRQ data which is passed as parameter when
  *		     driver specific pre/post interrupt handler is called.
+ *
+ * This is not intended to handle every possible interrupt controller, but
+ * it should handle a substantial proportion of those that are found in the
+ * wild.
  */
 struct regmap_irq_chip {
 	const char *name;
-- 
cgit v1.2.3


From 8fb472c09b9df478a062eacc7841448e40fc3c17 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 12 Jan 2017 15:53:33 +0100
Subject: ipmr: improve hash scalability

Recently we started using ipmr with thousands of entries and easily hit
soft lockups on smaller devices. The reason is that the hash function
uses the high order bits from the src and dst, but those don't change in
many common cases, also the hash table  is only 64 elements so with
thousands it doesn't scale at all.
This patch migrates the hash table to rhashtable, and in particular the
rhl interface which allows for duplicate elements to be chained because
of the MFC_PROXY support (*,G; *,*,oif cases) which allows for multiple
duplicate entries to be added with different interfaces (IMO wrong, but
it's been in for a long time).

And here are some results from tests I've run in a VM:
 mr_table size (default, allocated for all namespaces):
  Before                    After
   49304 bytes               2400 bytes

 Add 65000 routes (the diff is much larger on smaller devices):
  Before                    After
   1m42s                     58s

 Forwarding 256 byte packets with 65000 routes (test done in a VM):
  Before                    After
   3 Mbps / ~1465 pps        122 Mbps / ~59000 pps

As a bonus we no longer see the soft lockups on smaller devices which
showed up even with 2000 entries before.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 57 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index f019b62f27b5..d7f63339ef0b 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -3,6 +3,7 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
+#include <linux/rhashtable.h>
 #include <net/sock.h>
 #include <uapi/linux/mroute.h>
 
@@ -60,7 +61,6 @@ struct vif_device {
 #define VIFF_STATIC 0x8000
 
 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-#define MFC_LINES 64
 
 struct mr_table {
 	struct list_head	list;
@@ -69,8 +69,9 @@ struct mr_table {
 	struct sock __rcu	*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc_unres_queue;
-	struct list_head	mfc_cache_array[MFC_LINES];
 	struct vif_device	vif_table[MAXVIFS];
+	struct rhltable		mfc_hash;
+	struct list_head	mfc_cache_list;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
@@ -85,17 +86,48 @@ enum {
 	MFC_STATIC = BIT(0),
 };
 
+struct mfc_cache_cmp_arg {
+	__be32 mfc_mcastgrp;
+	__be32 mfc_origin;
+};
+
+/**
+ * struct mfc_cache - multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_mcastgrp: destination multicast group address
+ * @mfc_origin: source address
+ * @cmparg: used for rhashtable comparisons
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
 struct mfc_cache {
-	struct list_head list;
-	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
-	__be32 mfc_origin;			/* Source of packet 		*/
-	vifi_t mfc_parent;			/* Source interface		*/
-	int mfc_flags;				/* Flags on line		*/
+	struct rhlist_head mnode;
+	union {
+		struct {
+			__be32 mfc_mcastgrp;
+			__be32 mfc_origin;
+		};
+		struct mfc_cache_cmp_arg cmparg;
+	};
+	vifi_t mfc_parent;
+	int mfc_flags;
 
 	union {
 		struct {
 			unsigned long expires;
-			struct sk_buff_head unresolved;	/* Unresolved buffers		*/
+			struct sk_buff_head unresolved;
 		} unres;
 		struct {
 			unsigned long last_assert;
@@ -105,18 +137,13 @@ struct mfc_cache {
 			unsigned long pkt;
 			unsigned long wrong_if;
 			unsigned long lastuse;
-			unsigned char ttls[MAXVIFS];	/* TTL thresholds		*/
+			unsigned char ttls[MAXVIFS];
 		} res;
 	} mfc_un;
+	struct list_head list;
 	struct rcu_head	rcu;
 };
 
-#ifdef __BIG_ENDIAN
-#define MFC_HASH(a,b)	(((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
-#else
-#define MFC_HASH(a,b)	((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1))
-#endif
-
 struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
-- 
cgit v1.2.3


From 950b0d91dc108f54bccca5a2f75bb46f2df63d29 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 11 Jan 2017 14:13:34 -0800
Subject: pinctrl: core: Fix regression caused by delayed work for hogs

Commit df61b366af26 ("pinctrl: core: Use delayed work for hogs") caused a
regression at least with sh-pfc that is also a GPIO controller as
noted by Geert Uytterhoeven <geert@linux-m68k.org>.

As the original pinctrl_register() has issues calling pin controller
driver functions early before the controller has finished registering,
we can't just revert commit df61b366af26. That would break the drivers
using GENERIC_PINCTRL_GROUPS or GENERIC_PINMUX_FUNCTIONS.

So let's fix the issue with the following steps as a single patch:

1. Revert the late_init parts of commit df61b366af26.

   The late_init clearly won't work and we have to just give up
   on fixing pinctrl_register() for GENERIC_PINCTRL_GROUPS and
   GENERIC_PINMUX_FUNCTIONS.

2. Split pinctrl_register() into two parts

   By splitting pinctrl_register() into pinctrl_init_controller()
   and pinctrl_create_and_start() we have better control over when
   it's safe to call pinctrl_create().

3. Introduce a new pinctrl_register_and_init() function

   As suggested by Linus Walleij <linus.walleij@linaro.org>, we
   can just introduce a new function for the controllers that need
   pinctrl_create() called later.

4. Convert the four known problem cases to use new function

   Let's convert pinctrl-imx, pinctrl-single, sh-pfc and ti-iodelay
   to use the new function to fix the issues. The rest of the drivers
   can be converted later. Let's also update Documentation/pinctrl.txt
   accordingly because of the known issues with pinctrl_register().

Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Gary Bisson <gary.bisson@boundarydevices.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinctrl.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index a42e57da270d..8ce2d87a238b 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -141,12 +141,27 @@ struct pinctrl_desc {
 };
 
 /* External interface to pin controller */
+
+extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
+				     struct device *dev, void *driver_data,
+				     struct pinctrl_dev **pctldev);
+
+/* Please use pinctrl_register_and_init() instead */
 extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 				struct device *dev, void *driver_data);
+
 extern void pinctrl_unregister(struct pinctrl_dev *pctldev);
+
+extern int devm_pinctrl_register_and_init(struct device *dev,
+				struct pinctrl_desc *pctldesc,
+				void *driver_data,
+				struct pinctrl_dev **pctldev);
+
+/* Please use devm_pinctrl_register_and_init() instead */
 extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev,
 				struct pinctrl_desc *pctldesc,
 				void *driver_data);
+
 extern void devm_pinctrl_unregister(struct device *dev,
 				struct pinctrl_dev *pctldev);
 
-- 
cgit v1.2.3


From 4fe0395550aeb6709ea5332f46de3644aef7d328 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 9 Jan 2017 21:37:40 +0100
Subject: PCI/MSI: Remove pci_enable_msi_{exact,range}()

All multi-MSI allocations are now done through pci_irq_alloc_vectors(), so
remove the old pci_enable_msi_range() and pci_enable_msi_exact()
interfaces.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index e2d1a124216a..2159376dc673 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1309,14 +1309,7 @@ void pci_msix_shutdown(struct pci_dev *dev);
 void pci_disable_msix(struct pci_dev *dev);
 void pci_restore_msi_state(struct pci_dev *dev);
 int pci_msi_enabled(void);
-int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec);
-static inline int pci_enable_msi_exact(struct pci_dev *dev, int nvec)
-{
-	int rc = pci_enable_msi_range(dev, nvec, nvec);
-	if (rc < 0)
-		return rc;
-	return 0;
-}
+int pci_enable_msi(struct pci_dev *dev);
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 			  int minvec, int maxvec);
 static inline int pci_enable_msix_exact(struct pci_dev *dev,
@@ -1347,10 +1340,7 @@ static inline void pci_msix_shutdown(struct pci_dev *dev) { }
 static inline void pci_disable_msix(struct pci_dev *dev) { }
 static inline void pci_restore_msi_state(struct pci_dev *dev) { }
 static inline int pci_msi_enabled(void) { return 0; }
-static inline int pci_enable_msi_range(struct pci_dev *dev, int minvec,
-				       int maxvec)
-{ return -ENOSYS; }
-static inline int pci_enable_msi_exact(struct pci_dev *dev, int nvec)
+static inline int pci_enable_msi(struct pci_dev *dev)
 { return -ENOSYS; }
 static inline int pci_enable_msix_range(struct pci_dev *dev,
 		      struct msix_entry *entries, int minvec, int maxvec)
@@ -1426,8 +1416,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
 static inline void pcie_ecrc_get_policy(char *str) { }
 #endif
 
-#define pci_enable_msi(pdev)	pci_enable_msi_exact(pdev, 1)
-
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
 int  ht_create_irq(struct pci_dev *dev, int idx);
-- 
cgit v1.2.3


From deed7be78f512d003c6290da0a781479b31b3d74 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jan 2017 22:11:32 -0800
Subject: tcp: record most recent RTT in RACK loss detection

Record the most recent RTT in RACK. It is often identical to the
"ca_rtt_us" values in tcp_clean_rtx_queue. But when the packet has
been retransmitted, RACK choses to believe the ACK is for the
(latest) retransmitted packet if the RTT is over minimum RTT.

This requires passing the arrival time of the most recent ACK to
RACK routines. The timestamp is now recorded in the "ack_time"
in tcp_sacktag_state during the ACK processing.

This patch does not change the RACK algorithm itself. It only adds
the RTT variable to prepare the next main patch.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fc5848dad7a4..1255c592719c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -207,6 +207,7 @@ struct tcp_sock {
 	/* Information of the most recently (s)acked skb */
 	struct tcp_rack {
 		struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+		u32 rtt_us;  /* Associated RTT */
 		u8 advanced; /* mstamp advanced since last lost marking */
 		u8 reord;    /* reordering detected */
 	} rack;
-- 
cgit v1.2.3


From 1d0833df594390876647c54c2c88069d29059665 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jan 2017 22:11:34 -0800
Subject: tcp: use sequence to break TS ties for RACK loss detection

The packets inside a jumbo skb (e.g., TSO) share the same skb
timestamp, even though they are sent sequentially on the wire. Since
RACK is based on time, it can not detect some packets inside the
same skb are lost.  However, we can leverage the packet sequence
numbers as extended timestamps to detect losses. Therefore, when
RACK timestamp is identical to skb's timestamp (i.e., one of the
packets of the skb is acked or sacked), we use the sequence numbers
of the acked and unacked packets to break ties.

We can use the same sequence logic to advance RACK xmit time as
well to detect more losses and avoid timeout.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1255c592719c..970d5f00589f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -208,6 +208,7 @@ struct tcp_sock {
 	struct tcp_rack {
 		struct skb_mstamp mstamp; /* (Re)sent time of the skb */
 		u32 rtt_us;  /* Associated RTT */
+		u32 end_seq; /* Ending TCP sequence of the skb */
 		u8 advanced; /* mstamp advanced since last lost marking */
 		u8 reord;    /* reordering detected */
 	} rack;
-- 
cgit v1.2.3


From 840a3cbe89694fad75578856976f180e852e69aa Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jan 2017 22:11:38 -0800
Subject: tcp: remove forward retransmit feature

Forward retransmit is an esoteric feature in RFC3517 (condition(3)
in the NextSeg()). Basically if a packet is not considered lost by
the current criteria (# of dupacks etc), but the congestion window
has room for more packets, then retransmit this packet.

However it actually conflicts with the rest of recovery design. For
example, when reordering is detected we want to be conservative
in retransmitting packets but forward-retransmit feature would
break that to force more retransmission. Also the implementation is
fairly complicated inside the retransmission logic inducing extra
iterations in the write queue. With RACK losses are being detected
timely and this heuristic is no longer necessary. There this patch
removes the feature.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 970d5f00589f..8e5f4c15d0e5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -307,7 +307,6 @@ struct tcp_sock {
 					 */
 
 	int     lost_cnt_hint;
-	u32     retransmit_high;	/* L-bits may be on up to this seqno */
 
 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/
 	u32	high_seq;	/* snd_nxt at onset of congestion	*/
-- 
cgit v1.2.3


From bec41a11dd3dc8c54f766b4f494140ca92ba7c10 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jan 2017 22:11:39 -0800
Subject: tcp: remove early retransmit

This patch removes the support of RFC5827 early retransmit (i.e.,
fast recovery on small inflight with <3 dupacks) because it is
subsumed by the new RACK loss detection. More specifically when
RACK receives DUPACKs, it'll arm a reordering timer to start fast
recovery after a quarter of (min)RTT, hence it covers the early
retransmit except RACK does not limit itself to specific inflight
or dupack numbers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8e5f4c15d0e5..4733368f953a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -224,8 +224,7 @@ struct tcp_sock {
 		repair      : 1,
 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
 	u8	repair_queue;
-	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
-		syn_data:1,	/* SYN includes data */
+	u8	syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
-- 
cgit v1.2.3


From 4a7f6009441144783e5925551c72e3f2e1b0839b Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jan 2017 22:11:41 -0800
Subject: tcp: remove thin_dupack feature

Thin stream DUPACK is to start fast recovery on only one DUPACK
provided the connection is a thin stream (i.e., low inflight).  But
this older feature is now subsumed with RACK. If a connection
receives only a single DUPACK, RACK would arm a reordering timer
and soon starts fast recovery instead of timeout if no further
ACKs are received.

The socket option (THIN_DUPACK) is kept as a nop for compatibility.
Note that this patch does not change another thin-stream feature
which enables linear RTO. Although it might be good to generalize
that in the future (i.e., linear RTO for the first say 3 retries).

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4733368f953a..6c22332afb75 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -220,7 +220,7 @@ struct tcp_sock {
 		unused:5;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
-		thin_dupack : 1,/* Fast retransmit on first dupack      */
+		unused1	    : 1,
 		repair      : 1,
 		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
 	u8	repair_queue;
-- 
cgit v1.2.3


From a665ece8b471de45bc19af05d52a1eaa5bc06dca Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 12 Jan 2017 13:23:31 +0200
Subject: x86/platform/intel: Remove PMIC GPIO block support

Moorestown support was removed by commit:

  1a8359e411eb ("x86/mid: Remove Intel Moorestown")

Remove this leftover.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: platform-driver-x86@vger.kernel.org
Link: http://lkml.kernel.org/r/20170112112331.93236-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/intel_pmic_gpio.h | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 include/linux/intel_pmic_gpio.h

(limited to 'include/linux')

diff --git a/include/linux/intel_pmic_gpio.h b/include/linux/intel_pmic_gpio.h
deleted file mode 100644
index 920109a29191..000000000000
--- a/include/linux/intel_pmic_gpio.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef LINUX_INTEL_PMIC_H
-#define LINUX_INTEL_PMIC_H
-
-struct intel_pmic_gpio_platform_data {
-	/* the first IRQ of the chip */
-	unsigned	irq_base;
-	/* number assigned to the first GPIO */
-	unsigned	gpio_base;
-	/* sram address for gpiointr register, the langwell chip will map
-	 * the PMIC spi GPIO expander's GPIOINTR register in sram.
-	 */
-	unsigned	gpiointr;
-};
-
-#endif
-- 
cgit v1.2.3


From 5b485629ba0d5d027880769ff467c587b24b4bde Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 8 Jan 2017 23:58:09 +0900
Subject: kprobes, extable: Identify kprobes trampolines as kernel text area

Improve __kernel_text_address()/kernel_text_address() to return
true if the given address is on a kprobe's instruction slot
trampoline.

This can help stacktraces to determine the address is on a
text area or not.

To implement this atomically in is_kprobe_*_slot(), also change
the insn_cache page list to an RCU list.

This changes timings a bit (it delays page freeing to the RCU garbage
collection phase), but none of that is in the hot path.

Note: this change can add small overhead to stack unwinders because
it adds 2 additional checks to __kernel_text_address(). However, the
impact should be very small, because kprobe_insn_pages list has 1 entry
per 256 probes(on x86, on arm/arm64 it will be 1024 probes),
and kprobe_optinsn_pages has 1 entry per 32 probes(on x86).
In most use cases, the number of kprobe events may be less
than 20, which means that is_kprobe_*_slot() will check just one entry.

Tested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/148388747896.6869.6354262871751682264.stgit@devbox
[ Improved the changelog and coding style. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8f6849084248..16ddfb8b304a 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -278,9 +278,13 @@ struct kprobe_insn_cache {
 	int nr_garbage;
 };
 
+#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
 extern void __free_insn_slot(struct kprobe_insn_cache *c,
 			     kprobe_opcode_t *slot, int dirty);
+/* sleep-less address checking routine  */
+extern bool __is_insn_slot_addr(struct kprobe_insn_cache *c,
+				unsigned long addr);
 
 #define DEFINE_INSN_CACHE_OPS(__name)					\
 extern struct kprobe_insn_cache kprobe_##__name##_slots;		\
@@ -294,6 +298,18 @@ static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
 {									\
 	__free_insn_slot(&kprobe_##__name##_slots, slot, dirty);	\
 }									\
+									\
+static inline bool is_kprobe_##__name##_slot(unsigned long addr)	\
+{									\
+	return __is_insn_slot_addr(&kprobe_##__name##_slots, addr);	\
+}
+#else /* __ARCH_WANT_KPROBES_INSN_SLOT */
+#define DEFINE_INSN_CACHE_OPS(__name)					\
+static inline bool is_kprobe_##__name##_slot(unsigned long addr)	\
+{									\
+	return 0;							\
+}
+#endif
 
 DEFINE_INSN_CACHE_OPS(insn);
 
@@ -330,7 +346,6 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 					     int write, void __user *buffer,
 					     size_t *length, loff_t *ppos);
 #endif
-
 #endif /* CONFIG_OPTPROBES */
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
@@ -481,6 +496,19 @@ static inline int enable_jprobe(struct jprobe *jp)
 	return enable_kprobe(&jp->kp);
 }
 
+#ifndef CONFIG_KPROBES
+static inline bool is_kprobe_insn_slot(unsigned long addr)
+{
+	return false;
+}
+#endif
+#ifndef CONFIG_OPTPROBES
+static inline bool is_kprobe_optinsn_slot(unsigned long addr)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_KPROBES
 /*
  * Blacklist ganerating macro. Specify functions which is not probed
-- 
cgit v1.2.3


From c31cc6a5187e8b09ccee34f81728a90f80e872e7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Jan 2017 18:11:43 +0100
Subject: sched/cputime: Allow accounting system time using cpustat index

In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay
cputime accounting to the tick, let's provide APIs to account system
time to precise contexts: hardirq, softirq, pure system, ...

Inspired-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1483636310-6557-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 00f776816aa3..14b63bb714d4 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -80,6 +80,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
+extern void account_system_index_time(struct task_struct *, cputime_t,
+				      enum cpu_usage_stat);
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
-- 
cgit v1.2.3


From 1213699ab426608ff1925ab263dd6925102bb92a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Jan 2017 18:11:44 +0100
Subject: sched/cputime: Export account_guest_time()

In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay
cputime accounting to the tick, let's allow archs to account cputime
directly to gtime.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1483636310-6557-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 14b63bb714d4..cfd6c0c6d4e8 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,6 +79,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 }
 
 extern void account_user_time(struct task_struct *, cputime_t);
+extern void account_guest_time(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_system_index_time(struct task_struct *, cputime_t,
 				      enum cpu_usage_stat);
-- 
cgit v1.2.3


From c8d7dabf8f91fadd265e6eb87afb201d14ea299b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Jan 2017 18:11:50 +0100
Subject: sched/cputime: Rename vtime_account_user() to vtime_flush()

CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y used to accumulate user time and
account it on ticks and context switches only through the
vtime_account_user() function.

Now this model has been generalized on the 3 archs for all kind of
cputime (system, irq, ...) and all the cputime flushing happens under
vtime_account_user().

So let's rename this function to better reflect its new role.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1483636310-6557-11-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 2 +-
 include/linux/vtime.h       | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index cfd6c0c6d4e8..c3e38ded2d73 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -89,7 +89,7 @@ extern void account_idle_time(cputime_t);
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
-	vtime_account_user(tsk);
+	vtime_flush(tsk);
 }
 #else
 extern void account_process_tick(struct task_struct *, int user);
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index aa9bfea8804a..0681fe25abeb 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -58,27 +58,28 @@ static inline void vtime_task_switch(struct task_struct *prev)
 
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
-extern void vtime_account_user(struct task_struct *tsk);
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_user(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_user_enter(struct task_struct *tsk);
 
 static inline void vtime_user_exit(struct task_struct *tsk)
 {
 	vtime_account_user(tsk);
 }
+
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
+static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
@@ -93,9 +94,11 @@ static inline void vtime_account_irq_exit(struct task_struct *tsk)
 	/* On hard|softirq exit we always account to hard|softirq cputime */
 	vtime_account_system(tsk);
 }
+extern void vtime_flush(struct task_struct *tsk);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
 static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
+static inline void vtime_flush(struct task_struct *tsk) { }
 #endif
 
 
-- 
cgit v1.2.3


From 642fa448ae6b3a4e5e8737054a094173405b7643 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 3 Jan 2017 13:43:14 -0800
Subject: sched/core: Remove set_task_state()

This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:

  be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")

... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.

However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.

Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().

Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.

[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com

== 1. x86-64 ==

Avg runtime set_task_state():    601 msecs
Avg runtime set_current_state(): 552 msecs

                                            vanilla                 dirty
Hmean    unlink1-processes-2      36089.26 (  0.00%)    38977.33 (  8.00%)
Hmean    unlink1-processes-5      28555.01 (  0.00%)    29832.55 (  4.28%)
Hmean    unlink1-processes-8      37323.75 (  0.00%)    44974.57 ( 20.50%)
Hmean    unlink1-processes-12     43571.88 (  0.00%)    44283.01 (  1.63%)
Hmean    unlink1-processes-21     34431.52 (  0.00%)    38284.45 ( 11.19%)
Hmean    unlink1-processes-30     34813.26 (  0.00%)    37975.17 (  9.08%)
Hmean    unlink1-processes-48     37048.90 (  0.00%)    39862.78 (  7.59%)
Hmean    unlink1-processes-79     35630.01 (  0.00%)    36855.30 (  3.44%)
Hmean    unlink1-processes-110    36115.85 (  0.00%)    39843.91 ( 10.32%)
Hmean    unlink1-processes-141    32546.96 (  0.00%)    35418.52 (  8.82%)
Hmean    unlink1-processes-172    34674.79 (  0.00%)    36899.21 (  6.42%)
Hmean    unlink1-processes-203    37303.11 (  0.00%)    36393.04 ( -2.44%)
Hmean    unlink1-processes-224    35712.13 (  0.00%)    36685.96 (  2.73%)

== 2. ppc64le ==

Avg runtime set_task_state():  938 msecs
Avg runtime set_current_state: 940 msecs

                                            vanilla                 dirty
Hmean    unlink1-processes-2      19269.19 (  0.00%)    30704.50 ( 59.35%)
Hmean    unlink1-processes-5      20106.15 (  0.00%)    21804.15 (  8.45%)
Hmean    unlink1-processes-8      17496.97 (  0.00%)    17243.28 ( -1.45%)
Hmean    unlink1-processes-12     14224.15 (  0.00%)    17240.21 ( 21.20%)
Hmean    unlink1-processes-21     14155.66 (  0.00%)    15681.23 ( 10.78%)
Hmean    unlink1-processes-30     14450.70 (  0.00%)    15995.83 ( 10.69%)
Hmean    unlink1-processes-48     16945.57 (  0.00%)    16370.42 ( -3.39%)
Hmean    unlink1-processes-79     15788.39 (  0.00%)    14639.27 ( -7.28%)
Hmean    unlink1-processes-110    14268.48 (  0.00%)    14377.40 (  0.76%)
Hmean    unlink1-processes-141    14023.65 (  0.00%)    16271.69 ( 16.03%)
Hmean    unlink1-processes-172    13417.62 (  0.00%)    16067.55 ( 19.75%)
Hmean    unlink1-processes-203    15293.08 (  0.00%)    15440.40 (  0.96%)
Hmean    unlink1-processes-234    13719.32 (  0.00%)    16190.74 ( 18.01%)
Hmean    unlink1-processes-265    16400.97 (  0.00%)    16115.22 ( -1.74%)
Hmean    unlink1-processes-296    14388.60 (  0.00%)    16216.13 ( 12.70%)
Hmean    unlink1-processes-320    15771.85 (  0.00%)    15905.96 (  0.85%)

x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad3ec9ec61f7..f4f9d32f12b3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -227,7 +227,7 @@ extern void proc_sched_set_task(struct task_struct *p);
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
 
-/* Convenience macros for the sake of set_task_state */
+/* Convenience macros for the sake of set_current_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
 #define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
@@ -254,17 +254,6 @@ extern char ___assert_task_state[1 - 2*!!(
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
-#define __set_task_state(tsk, state_value)			\
-	do {							\
-		(tsk)->task_state_change = _THIS_IP_;		\
-		(tsk)->state = (state_value);			\
-	} while (0)
-#define set_task_state(tsk, state_value)			\
-	do {							\
-		(tsk)->task_state_change = _THIS_IP_;		\
-		smp_store_mb((tsk)->state, (state_value));	\
-	} while (0)
-
 #define __set_current_state(state_value)			\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
@@ -277,20 +266,6 @@ extern char ___assert_task_state[1 - 2*!!(
 	} while (0)
 
 #else
-
-/*
- * @tsk had better be current, or you get to keep the pieces.
- *
- * The only reason is that computing current can be more expensive than
- * using a pointer that's already available.
- *
- * Therefore, see set_current_state().
- */
-#define __set_task_state(tsk, state_value)		\
-	do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value)		\
-	smp_store_mb((tsk)->state, (state_value))
-
 /*
  * set_current_state() includes a barrier so that the write of current->state
  * is correctly serialised wrt the caller's subsequent test of whether to
-- 
cgit v1.2.3


From 8f95c90ceb541a38ac16fec48c05142ef1450c25 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 11 Jan 2017 07:22:25 -0800
Subject: sched/wait, RCU: Introduce rcuwait machinery

rcuwait provides support for (single) RCU-safe task wait/wake functionality,
with the caveat that it must not be called after exit_notify(), such that
we avoid racing with rcu delayed_put_task_struct callbacks, task_struct
being rcu unaware in this context -- for which we similarly have
task_rcu_dereference() magic, but with different return semantics, which
can conflict with the wakeup side.

The interfaces are quite straightforward:

  rcuwait_wait_event()
  rcuwait_wake_up()

More details are in the comments, but it's perhaps worth mentioning at least,
that users must provide proper serialization when waiting on a condition, and
avoid corrupting a concurrent waiter. Also care must be taken between the task
and the condition for when calling the wakeup -- we cannot miss wakeups. When
porting users, this is for example, a given when using waitqueues in that
everything is done under the q->lock. As such, it can remove sources of non
preemptable unbounded work for realtime.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Link: http://lkml.kernel.org/r/1484148146-14210-2-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rcuwait.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 include/linux/rcuwait.h

(limited to 'include/linux')

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
new file mode 100644
index 000000000000..0e93d56c7ab2
--- /dev/null
+++ b/include/linux/rcuwait.h
@@ -0,0 +1,63 @@
+#ifndef _LINUX_RCUWAIT_H_
+#define _LINUX_RCUWAIT_H_
+
+#include <linux/rcupdate.h>
+
+/*
+ * rcuwait provides a way of blocking and waking up a single
+ * task in an rcu-safe manner; where it is forbidden to use
+ * after exit_notify(). task_struct is not properly rcu protected,
+ * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ *
+ * Alternatively we have task_rcu_dereference(), but the return
+ * semantics have different implications which would break the
+ * wakeup side. The only time @task is non-nil is when a user is
+ * blocked (or checking if it needs to) on a condition, and reset
+ * as soon as we know that the condition has succeeded and are
+ * awoken.
+ */
+struct rcuwait {
+	struct task_struct *task;
+};
+
+#define __RCUWAIT_INITIALIZER(name)		\
+	{ .task = NULL, }
+
+static inline void rcuwait_init(struct rcuwait *w)
+{
+	w->task = NULL;
+}
+
+extern void rcuwait_wake_up(struct rcuwait *w);
+
+/*
+ * The caller is responsible for locking around rcuwait_wait_event(),
+ * such that writes to @task are properly serialized.
+ */
+#define rcuwait_wait_event(w, condition)				\
+({									\
+	/*								\
+	 * Complain if we are called after do_exit()/exit_notify(),     \
+	 * as we cannot rely on the rcu critical region for the		\
+	 * wakeup side.							\
+	 */                                                             \
+	WARN_ON(current->exit_state);                                   \
+									\
+	rcu_assign_pointer((w)->task, current);				\
+	for (;;) {							\
+		/*							\
+		 * Implicit barrier (A) pairs with (B) in		\
+		 * rcuwait_trywake().					\
+		 */							\
+		set_current_state(TASK_UNINTERRUPTIBLE);		\
+		if (condition)						\
+			break;						\
+									\
+		schedule();						\
+	}								\
+									\
+	WRITE_ONCE((w)->task, NULL);					\
+	__set_current_state(TASK_RUNNING);				\
+})
+
+#endif /* _LINUX_RCUWAIT_H_ */
-- 
cgit v1.2.3


From 52b94129f274937e4c25dd17b76697664a3c43c9 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 11 Jan 2017 07:22:26 -0800
Subject: locking/percpu-rwsem: Replace waitqueue with rcuwait

The use of any kind of wait queue is an overkill for pcpu-rwsems.
While one option would be to use the less heavy simple (swait)
flavor, this is still too much for what pcpu-rwsems needs. For one,
we do not care about any sort of queuing in that the only (rare) time
writers (and readers, for that matter) are queued is when trying to
acquire the regular contended rw_sem. There cannot be any further
queuing as writers are serialized by the rw_sem in the first place.

Given that percpu_down_write() must not be called after exit_notify(),
we can replace the bulky waitqueue with rcuwait such that a writer
can wait for its turn to take the lock. As such, we can avoid the
queue handling and locking overhead.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Link: http://lkml.kernel.org/r/1484148146-14210-3-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/percpu-rwsem.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5b2e6159b744..93664f022ecf 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -4,15 +4,15 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/wait.h>
+#include <linux/rcuwait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
-	struct rw_semaphore	rw_sem;
-	wait_queue_head_t	writer;
+	struct rw_semaphore	rw_sem; /* slowpath */
+	struct rcuwait          writer; /* blocked writer */
 	int			readers_block;
 };
 
@@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = {				\
 	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
 	.read_count = &__percpu_rwsem_rc_##name,			\
 	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
-	.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),		\
+	.writer = __RCUWAIT_INITIALIZER(name.writer),			\
 }
 
 extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
-- 
cgit v1.2.3


From e274795ea7b7caa0fd74ef651594382a69e2a951 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 11 Jan 2017 14:17:48 +0100
Subject: locking/mutex: Fix mutex handoff

While reviewing the ww_mutex patches, I noticed that it was still
possible to (incorrectly) succeed for (incorrect) code like:

	mutex_lock(&a);
	mutex_lock(&a);

This was possible if the second mutex_lock() would block (as expected)
but then receive a spurious wakeup. At that point it would find itself
at the front of the queue, request a handoff and instantly claim
ownership and continue, since owner would point to itself.

Avoid this scenario and simplify the code by introducing a third low
bit to signal handoff pickup. So once we request handoff, unlock
clears the handoff bit and sets the pickup bit along with the new
owner.

This also removes the need for the .handoff argument to
__mutex_trylock(), since that becomes superfluous with PICKUP.

In order to guarantee enough low bits, ensure task_struct alignment is
at least L1_CACHE_BYTES (which seems a good ideal regardless).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b97870f2debd..3e1fccb47f11 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -65,7 +65,7 @@ struct mutex {
 
 static inline struct task_struct *__mutex_owner(struct mutex *lock)
 {
-	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
 }
 
 /*
-- 
cgit v1.2.3


From ea9e0fb8fe1bdfca81bd76052a5cce70bb053430 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Date: Wed, 21 Dec 2016 19:46:32 +0100
Subject: locking/ww_mutex: Set use_ww_ctx even when locking without a context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will add a new field to struct mutex_waiter.  This field must be
initialized for all waiters if any waiter uses the ww_use_ctx path.

So there is a trade-off: Keep ww_mutex locking without a context on
the faster non-use_ww_ctx path, at the cost of adding the
initialization to all mutex locks (including non-ww_mutexes), or avoid
the additional cost for non-ww_mutex locks, at the cost of adding
additional checks to the use_ww_ctx path.

We take the latter choice.  It may be worth eliminating the users of
ww_mutex_lock(lock, NULL), but there are a lot of them.

Signed-off-by: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-5-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 7b0066814fa0..5f2e8379baff 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -222,11 +222,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
  */
 static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-	if (ctx)
-		return __ww_mutex_lock(lock, ctx);
-
-	mutex_lock(&lock->base);
-	return 0;
+	return __ww_mutex_lock(lock, ctx);
 }
 
 /**
@@ -262,10 +258,7 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct
 static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
 							   struct ww_acquire_ctx *ctx)
 {
-	if (ctx)
-		return __ww_mutex_lock_interruptible(lock, ctx);
-	else
-		return mutex_lock_interruptible(&lock->base);
+	return __ww_mutex_lock_interruptible(lock, ctx);
 }
 
 /**
-- 
cgit v1.2.3


From c5470b22d1833241cae996d23a8a346ff8ec4d58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Date: Wed, 21 Dec 2016 19:46:33 +0100
Subject: locking/ww_mutex: Remove the __ww_mutex_lock*() inline wrappers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep the documentation in the header file since there is no good place
for it in mutex.c: there are two rather different implementations with
different EXPORT_SYMBOLs for each function.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-6-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 5f2e8379baff..c07528522bbc 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
 #endif
 }
 
-extern int __must_check __ww_mutex_lock(struct ww_mutex *lock,
-					struct ww_acquire_ctx *ctx);
-extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
-						      struct ww_acquire_ctx *ctx);
-
 /**
  * ww_mutex_lock - acquire the w/w mutex
  * @lock: the mutex to be acquired
@@ -220,10 +215,8 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
  *
  * A mutex acquired with this function must be released with ww_mutex_unlock.
  */
-static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
-{
-	return __ww_mutex_lock(lock, ctx);
-}
+extern int __must_check ww_mutex_lock(struct ww_mutex *lock,
+				      struct ww_acquire_ctx *ctx);
 
 /**
  * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible
@@ -255,11 +248,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct
  *
  * A mutex acquired with this function must be released with ww_mutex_unlock.
  */
-static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
-							   struct ww_acquire_ctx *ctx)
-{
-	return __ww_mutex_lock_interruptible(lock, ctx);
-}
+extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
+						    struct ww_acquire_ctx *ctx);
 
 /**
  * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex
-- 
cgit v1.2.3


From 6baa5c60a97d7f1a37a4b5ab5fb3a450e56e8b06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Date: Wed, 21 Dec 2016 19:46:34 +0100
Subject: locking/ww_mutex: Add waiters in stamp order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add regular waiters in stamp order. Keep adding waiters that have no
context in FIFO order and take care not to starve them.

While adding our task as a waiter, back off if we detect that there is
a waiter with a lower stamp in front of us.

Make sure to call lock_contended even when we back off early.

For w/w mutexes, being first in the wait list is only stable when
taking the lock without a context. Therefore, the purpose of the first
flag is split into two: 'first' remains to indicate whether we want to
spin optimistically, while 'handoff' indicates that we should be
prepared to accept a handoff.

For w/w locking with a context, we always accept handoffs after the
first schedule(), to handle the following sequence of events:

 1. Task #0 unlocks and hands off to Task #2 which is first in line

 2. Task #1 adds itself in front of Task #2

 3. Task #2 wakes up and must accept the handoff even though it is no
    longer first in line

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-7-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3e1fccb47f11..e17942ffb3fc 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,8 @@
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
 
+struct ww_acquire_ctx;
+
 /*
  * Simple, straightforward mutexes with strict semantics:
  *
@@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
 struct mutex_waiter {
 	struct list_head	list;
 	struct task_struct	*task;
+	struct ww_acquire_ctx	*ww_ctx;
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
 #endif
-- 
cgit v1.2.3


From 977625a693283dbb35b2a3f674bc0237f7347348 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <Nicolai.Haehnle@amd.com>
Date: Wed, 21 Dec 2016 19:46:39 +0100
Subject: locking/mutex: Initialize mutex_waiter::ww_ctx with poison when
 debugging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Help catch cases where mutex_lock is used directly on w/w mutexes, which
otherwise result in the w/w tasks reading uninitialized data.

Signed-off-by: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-12-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/poison.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 51334edec506..a39540326417 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -80,6 +80,7 @@
 /********** kernel/mutexes **********/
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
+#define MUTEX_POISON_WW_CTX	((void *) 0x500 + POISON_POINTER_DELTA)
 
 /********** lib/flex_array.c **********/
 #define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
-- 
cgit v1.2.3


From 12907fbb1a691807bb0420a27126e15934cb7954 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Dec 2016 11:44:28 +0100
Subject: sched/clock, clocksource: Add optional cs::mark_unstable() method

PeterZ reported that we'd fail to mark the TSC unstable when the
clocksource watchdog finds it unsuitable.

Allow a clocksource to run a custom action when its being marked
unstable and hook up the TSC unstable code.

Reported-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/clocksource.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e315d04a2fd9..cfc75848a35d 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -62,6 +62,8 @@ struct module;
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
+ * @mark_unstable:	Optional function to inform the clocksource driver that
+ *			the watchdog marked the clocksource unstable
  * @owner:		module reference, must be set by clocksource in modules
  *
  * Note: This struct is not used in hotpathes of the timekeeping code
@@ -93,6 +95,7 @@ struct clocksource {
 	unsigned long flags;
 	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
+	void (*mark_unstable)(struct clocksource *cs);
 
 	/* private: */
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
-- 
cgit v1.2.3


From 9881b024b7d7671f6a014091bc96506b89081802 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Dec 2016 13:35:52 +0100
Subject: sched/clock: Delay switching sched_clock to stable

Currently we switch to the stable sched_clock if we guess the TSC is
usable, and then switch back to the unstable path if it turns out TSC
isn't stable during SMP bringup after all.

Delay switching to the stable path until after SMP bringup is
complete. This way we'll avoid switching during the time we detect the
worst of the TSC offences.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad3ec9ec61f7..94a48bb58297 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2515,6 +2515,10 @@ extern u64 sched_clock_cpu(int cpu);
 extern void sched_clock_init(void);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
 static inline void sched_clock_tick(void)
 {
 }
@@ -2537,6 +2541,7 @@ static inline u64 local_clock(void)
 	return sched_clock();
 }
 #else
+extern void sched_clock_init_late(void);
 /*
  * Architectures can set this to 1 if they have specified
  * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
-- 
cgit v1.2.3


From 10ab56434f2f633a51e432ee8b7c29e12438e163 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Oct 2016 12:58:10 -0400
Subject: sched/core: Separate out io_schedule_prepare() and
 io_schedule_finish()

Now that IO schedule accounting is done inside __schedule(),
io_schedule() can be split into three steps - prep, schedule, and
finish - where the schedule part doesn't need any special annotation.
This allows marking a sleep as iowait by simply wrapping an existing
blocking function with io_schedule_prepare() and io_schedule_finish().

Because task_struct->in_iowait is single bit, the caller of
io_schedule_prepare() needs to record and the pass its state to
io_schedule_finish() to be safe regarding nesting.  While this isn't
the prettiest, these functions are mostly gonna be used by core
functions and we don't want to use more space for ->in_iowait.

While at it, as it's simple to do now, reimplement io_schedule()
without unnecessarily going through io_schedule_timeout().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adilger.kernel@dilger.ca
Cc: jack@suse.com
Cc: kernel-team@fb.com
Cc: mingbo@fb.com
Cc: tytso@mit.edu
Link: http://lkml.kernel.org/r/1477673892-28940-3-git-send-email-tj@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94a48bb58297..a8daed914eef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -461,12 +461,10 @@ extern signed long schedule_timeout_idle(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
+extern int __must_check io_schedule_prepare(void);
+extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
-
-static inline void io_schedule(void)
-{
-	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
-}
+extern void io_schedule(void);
 
 void __noreturn do_task_dead(void);
 
-- 
cgit v1.2.3


From 1460cb65a10f6c7a6e3a1c76513338861a0a43b6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Oct 2016 12:58:11 -0400
Subject: locking/mutex, sched/wait: Add mutex_lock_io()

We sometimes end up propagating IO blocking through mutexes; however,
because there currently is no way of annotating mutex sleeps as
iowait, there are cases where iowait and /proc/stat:procs_blocked
report misleading numbers obscuring the actual state of the system.

This patch adds mutex_lock_io() so that mutex sleeps can be marked as
iowait in those cases.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adilger.kernel@dilger.ca
Cc: jack@suse.com
Cc: kernel-team@fb.com
Cc: mingbo@fb.com
Cc: tytso@mit.edu
Link: http://lkml.kernel.org/r/1477673892-28940-4-git-send-email-tj@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b97870f2debd..980ba16b8749 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -156,10 +156,12 @@ extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
 extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
 					unsigned int subclass);
+extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass);
 
 #define mutex_lock(lock) mutex_lock_nested(lock, 0)
 #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+#define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0)
 
 #define mutex_lock_nest_lock(lock, nest_lock)				\
 do {									\
@@ -171,11 +173,13 @@ do {									\
 extern void mutex_lock(struct mutex *lock);
 extern int __must_check mutex_lock_interruptible(struct mutex *lock);
 extern int __must_check mutex_lock_killable(struct mutex *lock);
+extern void mutex_lock_io(struct mutex *lock);
 
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
 # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
+# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock)
 #endif
 
 /*
-- 
cgit v1.2.3


From 9e3d6223d2093a8903c8f570a06284453ee59944 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 9 Dec 2016 09:30:11 +0100
Subject: math64, timers: Fix 32bit mul_u64_u32_shr() and friends

It turns out that while GCC-4.4 manages to generate 32x32->64 mult
instructions for the 32bit mul_u64_u32_shr() code, any GCC after that
fails horribly.

Fix this by providing an explicit mul_u32_u32() function which can be
architcture provided.

Reported-by: Chris Metcalf <cmetcalf@mellanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com> [for tile]
Cc: Christopher S. Hall <christopher.s.hall@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Laurent Vivier <lvivier@redhat.com>
Cc: Liav Rehana <liavr@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Parit Bhargava <prarit@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161209083011.GD15765@worktop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/math64.h | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6e8b5b270ffe..80690c96c734 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -133,6 +133,16 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 	return ret;
 }
 
+#ifndef mul_u32_u32
+/*
+ * Many a GCC version messes this up and generates a 64x64 mult :-(
+ */
+static inline u64 mul_u32_u32(u32 a, u32 b)
+{
+	return (u64)a * b;
+}
+#endif
+
 #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
 
 #ifndef mul_u64_u32_shr
@@ -160,9 +170,9 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 	al = a;
 	ah = a >> 32;
 
-	ret = ((u64)al * mul) >> shift;
+	ret = mul_u32_u32(al, mul) >> shift;
 	if (ah)
-		ret += ((u64)ah * mul) << (32 - shift);
+		ret += mul_u32_u32(ah, mul) << (32 - shift);
 
 	return ret;
 }
@@ -186,10 +196,10 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
 	a0.ll = a;
 	b0.ll = b;
 
-	rl.ll = (u64)a0.l.low * b0.l.low;
-	rm.ll = (u64)a0.l.low * b0.l.high;
-	rn.ll = (u64)a0.l.high * b0.l.low;
-	rh.ll = (u64)a0.l.high * b0.l.high;
+	rl.ll = mul_u32_u32(a0.l.low, b0.l.low);
+	rm.ll = mul_u32_u32(a0.l.low, b0.l.high);
+	rn.ll = mul_u32_u32(a0.l.high, b0.l.low);
+	rh.ll = mul_u32_u32(a0.l.high, b0.l.high);
 
 	/*
 	 * Each of these lines computes a 64-bit intermediate result into "c",
@@ -229,8 +239,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 	} u, rl, rh;
 
 	u.ll = a;
-	rl.ll = (u64)u.l.low * mul;
-	rh.ll = (u64)u.l.high * mul + rl.l.high;
+	rl.ll = mul_u32_u32(u.l.low, mul);
+	rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high;
 
 	/* Bits 32-63 of the result will be in rh.l.low. */
 	rl.l.high = do_div(rh.ll, divisor);
-- 
cgit v1.2.3


From af2e859edd477fa1ea3d1d106f41a595cff3d162 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 1 Dec 2016 11:47:04 +0000
Subject: locking/ww_mutex: Fix compilation of __WW_MUTEX_INITIALIZER

From conflicting macro parameters, passing the wrong name to
__MUTEX_INITIALIZER and a stray '\', #define __WW_MUTEX_INITIALIZER was
very unhappy.

One unnecessary change was to choose to pass &ww_class instead of
implicitly taking the address of the class within the macro.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 1b375dc30710 ("mutex: Move ww_mutex definitions to ww_mutex.h")
Link: http://lkml.kernel.org/r/20161201114711.28697-2-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index c07528522bbc..083950fd5b0b 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -51,10 +51,10 @@ struct ww_mutex {
 };
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \
-		, .ww_class = &ww_class
+# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \
+		, .ww_class = class
 #else
-# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class)
+# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
 #endif
 
 #define __WW_CLASS_INITIALIZER(ww_class) \
@@ -63,7 +63,7 @@ struct ww_mutex {
 		, .mutex_name = #ww_class "_mutex" }
 
 #define __WW_MUTEX_INITIALIZER(lockname, class) \
-		{ .base = { \__MUTEX_INITIALIZER(lockname) } \
+		{ .base =  __MUTEX_INITIALIZER(lockname.base) \
 		__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
 
 #define DEFINE_WW_CLASS(classname) \
-- 
cgit v1.2.3


From 1e24edca0557dba6486d39d3c24c288475432bcf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 17:12:23 +0100
Subject: locking/atomic, kref: Add KREF_INIT()

Since we need to change the implementation, stop exposing internals.

Provide KREF_INIT() to allow static initialization of struct kref.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kref.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index e15828fd71f1..9af255ad1e2f 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -24,6 +24,8 @@ struct kref {
 	atomic_t refcount;
 };
 
+#define KREF_INIT(n)	{ .refcount = ATOMIC_INIT(n), }
+
 /**
  * kref_init - initialize object.
  * @kref: object in question.
-- 
cgit v1.2.3


From 2c935bc57221cc2edc787c72ea0e2d30cdcd3d5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 17:29:48 +0100
Subject: locking/atomic, kref: Add kref_read()

Since we need to change the implementation, stop exposing internals.

Provide kref_read() to read the current reference count; typically
used for debug messages.

Kills two anti-patterns:

	atomic_read(&kref->refcount)
	kref->refcount.counter

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kref.h         | 5 +++++
 include/linux/sunrpc/cache.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 9af255ad1e2f..7c88d865f82f 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -35,6 +35,11 @@ static inline void kref_init(struct kref *kref)
 	atomic_set(&kref->refcount, 1);
 }
 
+static inline int kref_read(const struct kref *kref)
+{
+	return atomic_read(&kref->refcount);
+}
+
 /**
  * kref_get - increment refcount for object.
  * @kref: object.
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..8a511c0985aa 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -198,7 +198,7 @@ static inline struct cache_head  *cache_get(struct cache_head *h)
 
 static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 {
-	if (atomic_read(&h->ref.refcount) <= 2 &&
+	if (kref_read(&h->ref) <= 2 &&
 	    h->expiry_time < cd->nextcheck)
 		cd->nextcheck = h->expiry_time;
 	kref_put(&h->ref, cd->cache_put);
-- 
cgit v1.2.3


From bdfafc4ffdd24e491119d81f85ddc4393fa49803 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 17:34:19 +0100
Subject: locking/atomic, kref: Kill kref_sub()

By general sentiment kref_sub() is a bad interface, make it go away.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kref.h | 32 ++++----------------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 7c88d865f82f..31c49a64cdf9 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -54,9 +54,8 @@ static inline void kref_get(struct kref *kref)
 }
 
 /**
- * kref_sub - subtract a number of refcounts for object.
+ * kref_put - decrement refcount for object.
  * @kref: object.
- * @count: Number of recounts to subtract.
  * @release: pointer to the function that will clean up the object when the
  *	     last reference to the object is released.
  *	     This pointer is required, and it is not acceptable to pass kfree
@@ -65,46 +64,23 @@ static inline void kref_get(struct kref *kref)
  *	     maintainer, and anyone else who happens to notice it.  You have
  *	     been warned.
  *
- * Subtract @count from the refcount, and if 0, call release().
+ * Decrement the refcount, and if 0, call release().
  * Return 1 if the object was removed, otherwise return 0.  Beware, if this
  * function returns 0, you still can not count on the kref from remaining in
  * memory.  Only use the return value if you want to see if the kref is now
  * gone, not present.
  */
-static inline int kref_sub(struct kref *kref, unsigned int count,
-	     void (*release)(struct kref *kref))
+static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 {
 	WARN_ON(release == NULL);
 
-	if (atomic_sub_and_test((int) count, &kref->refcount)) {
+	if (atomic_dec_and_test(&kref->refcount)) {
 		release(kref);
 		return 1;
 	}
 	return 0;
 }
 
-/**
- * kref_put - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.  If the caller does pass kfree to this
- *	     function, you will be publicly mocked mercilessly by the kref
- *	     maintainer, and anyone else who happens to notice it.  You have
- *	     been warned.
- *
- * Decrement the refcount, and if 0, call release().
- * Return 1 if the object was removed, otherwise return 0.  Beware, if this
- * function returns 0, you still can not count on the kref from remaining in
- * memory.  Only use the return value if you want to see if the kref is now
- * gone, not present.
- */
-static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
-{
-	return kref_sub(kref, 1, release);
-}
-
 static inline int kref_put_mutex(struct kref *kref,
 				 void (*release)(struct kref *kref),
 				 struct mutex *lock)
-- 
cgit v1.2.3


From 23b19ec3377924eb8f303880102e056e9214ba72 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 14 Jan 2017 12:11:59 +0100
Subject: locking/ww_mutex: Turn off __must_check for now
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the ww_mutex inline wrappers gone there's a lot of dormant
anti-patterns emerging in an x86 allyesconfig build:

  kernel/locking/test-ww_mutex.c:80:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  kernel/locking/test-ww_mutex.c:55:3: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  kernel/locking/test-ww_mutex.c:134:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  kernel/locking/test-ww_mutex.c:213:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  kernel/locking/test-ww_mutex.c:177:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  kernel/locking/test-ww_mutex.c:266:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/drm_modeset_lock.c:430:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/vgem/vgem_fence.c:193:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/i915/i915_gem_batch_pool.c:125:4: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/i915/i915_gem_execbuffer.c:1302:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/radeon/radeon_prime.c:69:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]
  drivers/gpu/drm/nouveau/nouveau_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result]

... but we cannot just litter the kernel build log with such warnings.

These need to be fixed separately - turn off the warning for now.

Cc: Nicolai Hähnle <nicolai.haehnle@amd.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 083950fd5b0b..5dd9a7682227 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -215,8 +215,7 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
  *
  * A mutex acquired with this function must be released with ww_mutex_unlock.
  */
-extern int __must_check ww_mutex_lock(struct ww_mutex *lock,
-				      struct ww_acquire_ctx *ctx);
+extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx);
 
 /**
  * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible
-- 
cgit v1.2.3


From f21860bac05b609d71757338361d26209ff0759b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 14 Jan 2017 17:11:36 +0100
Subject: locking/mutex, sched/wait: Fix the mutex_lock_io_nested() define

Mike noticed this bogosity:

 > > +# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock)
 >                                                 ^^^^^^^^^^^^^^ typo

This new locking API is not used yet, so this didn't trigger in testing.

Fix it.

Reported-by: Mike Galbraith <efault@gmx.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adilger.kernel@dilger.ca
Cc: jack@suse.com
Cc: kernel-team@fb.com
Cc: mingbo@fb.com
Cc: tytso@mit.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 980ba16b8749..7fffbfcd5430 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -179,7 +179,7 @@ extern void mutex_lock_io(struct mutex *lock);
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
 # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
-# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock)
+# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock)
 #endif
 
 /*
-- 
cgit v1.2.3


From e12799125074c67cd30bc3d7a5ff5e2f29ea1411 Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Tue, 22 Nov 2016 19:03:09 +0200
Subject: firmware: qcom: scm: Add empty functions to help compile testing

This will help to compile testing drivers which depends on scm
functions with COMPILE_TEST Kconfig option.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom_scm.h | 50 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index cc32ab852fbc..7e004fb57fc4 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -13,9 +13,9 @@
 #ifndef __QCOM_SCM_H
 #define __QCOM_SCM_H
 
-extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
-extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus);
-
+#define QCOM_SCM_VERSION(major, minor)	(((major) << 16) | ((minor) & 0xFF))
+#define QCOM_SCM_CPU_PWR_DOWN_L2_ON	0x0
+#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF	0x1
 #define QCOM_SCM_HDCP_MAX_REQ_CNT	5
 
 struct qcom_scm_hdcp_req {
@@ -23,27 +23,47 @@ struct qcom_scm_hdcp_req {
 	u32 val;
 };
 
+#if IS_ENABLED(CONFIG_QCOM_SCM)
+extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
+extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus);
 extern bool qcom_scm_is_available(void);
-
 extern bool qcom_scm_hdcp_available(void);
 extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
-		u32 *resp);
-
+			     u32 *resp);
 extern bool qcom_scm_pas_supported(u32 peripheral);
 extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
-		size_t size);
+				   size_t size);
 extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
-		phys_addr_t size);
+				  phys_addr_t size);
 extern int qcom_scm_pas_auth_and_reset(u32 peripheral);
 extern int qcom_scm_pas_shutdown(u32 peripheral);
-
-#define QCOM_SCM_CPU_PWR_DOWN_L2_ON	0x0
-#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF	0x1
-
 extern void qcom_scm_cpu_power_down(u32 flags);
-
-#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF))
-
 extern u32 qcom_scm_get_version(void);
+#else
+static inline
+int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus)
+{
+	return -ENODEV;
+}
+static inline
+int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus)
+{
+	return -ENODEV;
+}
+static inline bool qcom_scm_is_available(void) { return false; }
+static inline bool qcom_scm_hdcp_available(void) { return false; }
+static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
+				    u32 *resp) { return -ENODEV; }
+static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; }
+static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
+					  size_t size) { return -ENODEV; }
+static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
+					 phys_addr_t size) { return -ENODEV; }
+static inline int
+qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; }
+static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
+static inline void qcom_scm_cpu_power_down(u32 flags) {}
+static inline u32 qcom_scm_get_version(void) { return 0; }
+#endif
 
 #endif
-- 
cgit v1.2.3


From a811b420b6c13759540070c0e9541b7cd8569168 Mon Sep 17 00:00:00 2001
From: Andy Gross <andy.gross@linaro.org>
Date: Mon, 16 Jan 2017 23:24:15 -0600
Subject: firmware: qcom_scm: Add set remote state API

This patch adds a set remote state SCM API.  This will be used by the
Venus and GPU subsystems to set state on the remote processors.

This work was based on two patch sets by Jordan Crouse and Stanimir
Varbanov.

Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom_scm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 7e004fb57fc4..d32f6f1a5225 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -39,6 +39,7 @@ extern int qcom_scm_pas_auth_and_reset(u32 peripheral);
 extern int qcom_scm_pas_shutdown(u32 peripheral);
 extern void qcom_scm_cpu_power_down(u32 flags);
 extern u32 qcom_scm_get_version(void);
+extern int qcom_scm_set_remote_state(u32 state, u32 id);
 #else
 static inline
 int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus)
@@ -64,6 +65,7 @@ qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; }
 static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
 static inline void qcom_scm_cpu_power_down(u32 flags) {}
 static inline u32 qcom_scm_get_version(void) { return 0; }
+static inline u32
+qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; }
 #endif
-
 #endif
-- 
cgit v1.2.3


From 5fa23530d4fcc7e84be9a557c58d0e670a15c042 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 16 Jan 2017 10:40:43 +0000
Subject: of: base: add support to find the level of the last cache

It is useful to have helper function just to get the number of cache
levels for a given logical cpu. We can obtain the same by just checking
the level at which the last cache is present. This patch adds support
to find the level of the last cache for a given cpu.

It will be used on ARM64 platform where the device tree provides the
information for the additional non-architected/transparent/external
last level caches that are not integrated with the processors.

Cc: Mark Rutland <mark.rutland@arm.com>
Suggested-by: Rob Herring <robh+dt@kernel.org>
Acked-by: Rob Herring <robh+dt@kernel.org>
Tested-by: Tan Xiaojun <tanxiaojun@huawei.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
[will: use u32 instead of int for cache_level]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/of.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index d72f01009297..21e6323de0f3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -280,6 +280,7 @@ extern struct device_node *of_get_child_by_name(const struct device_node *node,
 
 /* cache lookup */
 extern struct device_node *of_find_next_cache_node(const struct device_node *);
+extern int of_find_last_cache_level(unsigned int cpu);
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 
-- 
cgit v1.2.3


From c51ca6cf545bc51ad38bd50816bde37c647d608d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Sat, 10 Dec 2016 15:13:59 -0700
Subject: block: move existing elevator ops to union

Prep patch for adding MQ ops as well, since doing anon unions with
named initializers doesn't work on older compilers.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
---
 include/linux/elevator.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b276e9ef0e0b..2a9e966eed03 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -94,7 +94,9 @@ struct elevator_type
 	struct kmem_cache *icq_cache;
 
 	/* fields provided by elevator implementation */
-	struct elevator_ops ops;
+	union {
+		struct elevator_ops sq;
+	} ops;
 	size_t icq_size;	/* see iocontext.h */
 	size_t icq_align;	/* ditto */
 	struct elv_fs_entry *elevator_attrs;
-- 
cgit v1.2.3


From 16a3c2a70cad5ccdc2dc0a4544bff82554807493 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 15 Dec 2016 14:27:46 -0700
Subject: blk-mq: un-export blk_mq_free_hctx_request()

It's only used in blk-mq, kill it from the main exported header
and kill the symbol export as well.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Omar Sandoval <osandov@fb.com>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index afc81d77e471..2686f9e7302a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -181,7 +181,6 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
 void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_free_request(struct request *rq);
-void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 
 enum {
-- 
cgit v1.2.3


From fd2d332677c687ca90c12a47d6c377c547100b56 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 12 Jan 2017 10:04:45 -0700
Subject: blk-mq: add support for carrying internal tag information in blk_qc_t

No functional change in this patch, just in preparation for having
two types of tags available to the block layer for a single request.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
---
 include/linux/blk_types.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 519ea2c9df61..0e5b1cd5113c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -232,22 +232,29 @@ static inline bool op_is_sync(unsigned int op)
 }
 
 typedef unsigned int blk_qc_t;
-#define BLK_QC_T_NONE	-1U
-#define BLK_QC_T_SHIFT	16
+#define BLK_QC_T_NONE		-1U
+#define BLK_QC_T_SHIFT		16
+#define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
 	return cookie != BLK_QC_T_NONE;
 }
 
-static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num)
+static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
+				       bool internal)
 {
-	return tag | (queue_num << BLK_QC_T_SHIFT);
+	blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
+
+	if (internal)
+		ret |= BLK_QC_T_INTERNAL;
+
+	return ret;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 {
-	return cookie >> BLK_QC_T_SHIFT;
+	return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
 }
 
 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
@@ -255,6 +262,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
+static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
+{
+	return (cookie & BLK_QC_T_INTERNAL) != 0;
+}
+
 struct blk_issue_stat {
 	u64 time;
 };
-- 
cgit v1.2.3


From bd166ef183c263c5ced656d49ef19c7da4adc774 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 17 Jan 2017 06:03:22 -0700
Subject: blk-mq-sched: add framework for MQ capable IO schedulers

This adds a set of hooks that intercepts the blk-mq path of
allocating/inserting/issuing/completing requests, allowing
us to develop a scheduler within that framework.

We reuse the existing elevator scheduler API on the registration
side, but augment that with the scheduler flagging support for
the blk-mq interfce, and with a separate set of ops hooks for MQ
devices.

We split driver and scheduler tags, so we can run the scheduling
independently of device queue depth.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
---
 include/linux/blk-mq.h   |  5 ++++-
 include/linux/blkdev.h   |  4 +++-
 include/linux/elevator.h | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2686f9e7302a..63569eb46d15 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
 
 	unsigned long		flags;		/* BLK_MQ_F_* flags */
 
+	void			*sched_data;
 	struct request_queue	*queue;
 	struct blk_flush_queue	*fq;
 
@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
+	struct blk_mq_tags	*sched_tags;
 
 	struct srcu_struct	queue_rq_srcu;
 
@@ -156,6 +158,7 @@ enum {
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
+	BLK_MQ_S_SCHED_RESTART	= 2,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
@@ -179,13 +182,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
-void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 
 enum {
 	BLK_MQ_REQ_NOWAIT	= (1 << 0), /* return when out of requests */
 	BLK_MQ_REQ_RESERVED	= (1 << 1), /* allocate from reserved pool */
+	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2e99d659b0f1..25564857f5f8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -154,6 +154,7 @@ struct request {
 
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
+	int tag;
 	sector_t __sector;		/* sector cursor */
 
 	struct bio *bio;
@@ -220,9 +221,10 @@ struct request {
 
 	unsigned short ioprio;
 
+	int internal_tag;
+
 	void *special;		/* opaque pointer available for LLD use */
 
-	int tag;
 	int errors;
 
 	/*
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2a9e966eed03..ecb96fd67c6d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -77,6 +77,34 @@ struct elevator_ops
 	elevator_registered_fn *elevator_registered_fn;
 };
 
+struct blk_mq_alloc_data;
+struct blk_mq_hw_ctx;
+
+struct elevator_mq_ops {
+	int (*init_sched)(struct request_queue *, struct elevator_type *);
+	void (*exit_sched)(struct elevator_queue *);
+
+	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
+	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
+	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
+	void (*request_merged)(struct request_queue *, struct request *, int);
+	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
+	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
+	void (*put_request)(struct request *);
+	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
+	void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *);
+	bool (*has_work)(struct blk_mq_hw_ctx *);
+	void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
+	void (*started_request)(struct request *);
+	void (*requeue_request)(struct request *);
+	struct request *(*former_request)(struct request_queue *, struct request *);
+	struct request *(*next_request)(struct request_queue *, struct request *);
+	int (*get_rq_priv)(struct request_queue *, struct request *);
+	void (*put_rq_priv)(struct request_queue *, struct request *);
+	void (*init_icq)(struct io_cq *);
+	void (*exit_icq)(struct io_cq *);
+};
+
 #define ELV_NAME_MAX	(16)
 
 struct elv_fs_entry {
@@ -96,12 +124,14 @@ struct elevator_type
 	/* fields provided by elevator implementation */
 	union {
 		struct elevator_ops sq;
+		struct elevator_mq_ops mq;
 	} ops;
 	size_t icq_size;	/* see iocontext.h */
 	size_t icq_align;	/* ditto */
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
+	bool uses_mq;
 
 	/* managed by elevator core */
 	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
@@ -125,6 +155,7 @@ struct elevator_queue
 	struct kobject kobj;
 	struct mutex sysfs_lock;
 	unsigned int registered:1;
+	unsigned int uses_mq:1;
 	DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
 };
 
@@ -141,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
 extern void elv_merged_request(struct request_queue *, struct request *, int);
 extern void elv_bio_merged(struct request_queue *q, struct request *,
 				struct bio *);
+extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
-- 
cgit v1.2.3


From d34849913819a5e0cbfbe724dbe79df89278c524 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 13 Jan 2017 14:43:58 -0700
Subject: blk-mq-sched: allow setting of default IO scheduler

Add Kconfig entries to manage what devices get assigned an MQ
scheduler, and add a blk-mq flag for drivers to opt out of scheduling.
The latter is useful for admin type queues that still allocate a blk-mq
queue and tag set, but aren't use for normal IO.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
---
 include/linux/blk-mq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 63569eb46d15..8e4df3d6c8cd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -153,6 +153,7 @@ enum {
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
 	BLK_MQ_F_BLOCKING	= 1 << 5,
+	BLK_MQ_F_NO_SCHED	= 1 << 6,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 
-- 
cgit v1.2.3


From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001
From: Rolf Neugebauer <rolf.neugebauer@docker.com>
Date: Tue, 17 Jan 2017 18:13:51 +0000
Subject: virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit

This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a
subtle change in how the virtio_net flags are derived from the SKBs
ip_summed field.

With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID
when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to
ip_summed == CHECKSUM_NONE, which should be the same.

Further, the virtio spec 1.0 / CS04 explicitly says that
VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver.

Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb")
Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 66204007d7ac..56436472ccc7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 				skb_checksum_start_offset(skb));
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
-	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
 
 	return 0;
-- 
cgit v1.2.3


From d45ae1f7041ac52ade6c5ec76d96bbed765d67aa Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2017 12:29:35 -0500
Subject: tracing: Process constants for (un)likely() profiler

When running the likely/unlikely profiler, one of the results did not look
accurate. It noted that the unlikely() in link_path_walk() was 100%
incorrect. When I added a trace_printk() to see what was happening there, it
became 80% correct! Looking deeper into what whas happening, I found that
gcc split that if statement into two paths. One where the if statement
became a constant, the other path a variable. The other path had the if
statement always hit (making the unlikely there, always false), but since
the #define unlikely() has:

  #define unlikely() (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))

Where constants are ignored by the branch profiler, the "constant" path
made by the compiler was ignored, even though it was hit 80% of the time.

By just passing the constant value to the __branch_check__() function and
tracing it out of line (as always correct, as likely/unlikely isn't a factor
for constants), then we get back the accurate readings of branches that were
optimized by gcc causing part of the execution to become constant.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/compiler.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index cf0fa5d86059..bbbe1570de1c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -107,12 +107,13 @@ struct ftrace_branch_data {
  */
 #if defined(CONFIG_TRACE_BRANCH_PROFILING) \
     && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
-void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
+void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+			  int expect, int is_constant);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
-#define __branch_check__(x, expect) ({					\
+#define __branch_check__(x, expect, is_constant) ({			\
 			int ______r;					\
 			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
@@ -122,8 +123,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 				.file = __FILE__,			\
 				.line = __LINE__,			\
 			};						\
-			______r = likely_notrace(x);			\
-			ftrace_likely_update(&______f, ______r, expect); \
+			______r = __builtin_expect(!!(x), expect);	\
+			ftrace_likely_update(&______f, ______r,		\
+					     expect, is_constant);	\
 			______r;					\
 		})
 
@@ -133,10 +135,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  * written by Daniel Walker.
  */
 # ifndef likely
-#  define likely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
+#  define likely(x)	(__branch_check__(x, 1, __builtin_constant_p(x)))
 # endif
 # ifndef unlikely
-#  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
+#  define unlikely(x)	(__branch_check__(x, 0, __builtin_constant_p(x)))
 # endif
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
-- 
cgit v1.2.3


From 0a13cd1a05e7a549259d4f803d2ec2efda07ed7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 18:03:11 +0100
Subject: locking/atomic, kref: Implement kref_put_lock()

Because home-rolling your own is _awesome_, stop doing it. Provide
kref_put_lock(), just like kref_put_mutex() but for a spinlock.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kref.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 31c49a64cdf9..9db9685fed84 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 
 struct kref {
 	atomic_t refcount;
@@ -86,12 +87,21 @@ static inline int kref_put_mutex(struct kref *kref,
 				 struct mutex *lock)
 {
 	WARN_ON(release == NULL);
-	if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) {
-		mutex_lock(lock);
-		if (unlikely(!atomic_dec_and_test(&kref->refcount))) {
-			mutex_unlock(lock);
-			return 0;
-		}
+
+	if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
+static inline int kref_put_lock(struct kref *kref,
+				void (*release)(struct kref *kref),
+				spinlock_t *lock)
+{
+	WARN_ON(release == NULL);
+
+	if (atomic_dec_and_lock(&kref->refcount, lock)) {
 		release(kref);
 		return 1;
 	}
-- 
cgit v1.2.3


From 84d58132d285b2edef951d6633c1e5224e8b5283 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 11 Jan 2017 06:35:10 -0800
Subject: rpmsg: Introduce "poll" to endpoint ops

This allows rpmsg backends to implement polling of the outgoing buffer,
which provides poll support to user space when using the rpmsg character
device.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/rpmsg.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index 18f9e1ae4b7e..10d6ae8bbb7d 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -41,6 +41,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/poll.h>
 
 #define RPMSG_ADDR_ANY		0xFFFFFFFF
 
@@ -156,6 +157,9 @@ int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst);
 int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
 			     void *data, int len);
 
+unsigned int rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
+			poll_table *wait);
+
 #else
 
 static inline int register_rpmsg_device(struct rpmsg_device *dev)
@@ -254,6 +258,15 @@ static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
 	return -ENXIO;
 }
 
+static inline unsigned int rpmsg_poll(struct rpmsg_endpoint *ept,
+				      struct file *filp, poll_table *wait)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_RPMSG) */
 
 /* use a macro to avoid include chaining to get THIS_MODULE */
-- 
cgit v1.2.3


From cc16f00f6529aa2378f2b949a6f68e9dc6dec363 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 18 Jan 2017 00:44:42 +0800
Subject: sctp: add support for generating stream reconf ssn reset request
 chunk

This patch is to add asoc strreset_outseq and strreset_inseq for
saving the reconf request sequence, initialize them when create
assoc and process init, and also to define Incoming and Outgoing
SSN Reset Request Parameter described in rfc6525 section 4.1 and
4.2, As they can be in one same chunk as section rfc6525 3.1-3
describes, it makes them in one function.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index fcb4c3646173..a9e790685af3 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -108,6 +108,7 @@ typedef enum {
 	/* Use hex, as defined in ADDIP sec. 3.1 */
 	SCTP_CID_ASCONF			= 0xC1,
 	SCTP_CID_ASCONF_ACK		= 0x80,
+	SCTP_CID_RECONF			= 0x82,
 } sctp_cid_t; /* enum */
 
 
@@ -199,6 +200,13 @@ typedef enum {
 	SCTP_PARAM_SUCCESS_REPORT	= cpu_to_be16(0xc005),
 	SCTP_PARAM_ADAPTATION_LAYER_IND = cpu_to_be16(0xc006),
 
+	/* RE-CONFIG. Section 4 */
+	SCTP_PARAM_RESET_OUT_REQUEST		= cpu_to_be16(0x000d),
+	SCTP_PARAM_RESET_IN_REQUEST		= cpu_to_be16(0x000e),
+	SCTP_PARAM_RESET_TSN_REQUEST		= cpu_to_be16(0x000f),
+	SCTP_PARAM_RESET_RESPONSE		= cpu_to_be16(0x0010),
+	SCTP_PARAM_RESET_ADD_OUT_STREAMS	= cpu_to_be16(0x0011),
+	SCTP_PARAM_RESET_ADD_IN_STREAMS		= cpu_to_be16(0x0012),
 } sctp_param_t; /* enum */
 
 
@@ -710,4 +718,23 @@ struct sctp_infox {
 	struct sctp_association *asoc;
 };
 
+struct sctp_reconf_chunk {
+	sctp_chunkhdr_t chunk_hdr;
+	__u8 params[0];
+} __packed;
+
+struct sctp_strreset_outreq {
+	sctp_paramhdr_t param_hdr;
+	__u32 request_seq;
+	__u32 response_seq;
+	__u32 send_reset_at_tsn;
+	__u16 list_of_streams[0];
+} __packed;
+
+struct sctp_strreset_inreq {
+	sctp_paramhdr_t param_hdr;
+	__u32 request_seq;
+	__u16 list_of_streams[0];
+} __packed;
+
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From c61f13eaa1ee17728c41370100d2d45c254ce76f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 13 Jan 2017 11:14:39 -0800
Subject: gcc-plugins: Add structleak for more stack initialization

This plugin detects any structures that contain __user attributes and
makes sure it is being fully initialized so that a specific class of
information exposure is eliminated. (This plugin was originally designed
to block the exposure of siginfo in CVE-2013-2141.)

Ported from grsecurity/PaX. This version adds a verbose option to the
plugin and the Kconfig.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index cf0fa5d86059..91c30cba984e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -27,7 +27,11 @@ extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
 #else /* __CHECKER__ */
-# define __user
+# ifdef STRUCTLEAK_PLUGIN
+#  define __user __attribute__((user))
+# else
+#  define __user
+# endif
 # define __kernel
 # define __safe
 # define __force
-- 
cgit v1.2.3


From 62bc306e2083436675e33b5bdeb6a77907d35971 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 17 Jan 2017 11:07:15 -0500
Subject: audit: log 32-bit socketcalls

32-bit socketcalls were not being logged by audit on x86_64 systems.
Log them.  This is basically a duplicate of the call from
net/socket.c:sys_socketcall(), but it addresses the impedance mismatch
between 32-bit userspace process and 64-bit kernel audit.

See: https://github.com/linux-audit/audit-kernel/issues/14

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d4443f93db6..2be99b276d29 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -387,6 +387,20 @@ static inline int audit_socketcall(int nargs, unsigned long *args)
 		return __audit_socketcall(nargs, args);
 	return 0;
 }
+
+static inline int audit_socketcall_compat(int nargs, u32 *args)
+{
+	unsigned long a[AUDITSC_ARGS];
+	int i;
+
+	if (audit_dummy_context())
+		return 0;
+
+	for (i = 0; i < nargs; i++)
+		a[i] = (unsigned long)args[i];
+	return __audit_socketcall(nargs, a);
+}
+
 static inline int audit_sockaddr(int len, void *addr)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -513,6 +527,12 @@ static inline int audit_socketcall(int nargs, unsigned long *args)
 {
 	return 0;
 }
+
+static inline int audit_socketcall_compat(int nargs, u32 *args)
+{
+	return 0;
+}
+
 static inline void audit_fd_pair(int fd1, int fd2)
 { }
 static inline int audit_sockaddr(int len, void *addr)
-- 
cgit v1.2.3


From fd61c6ba313de6758aeeab58fe03bd9fbbc8cea9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 17 Jan 2017 15:51:07 -0800
Subject: net: ipv6: remove nowait arg to rt6_fill_node

All callers of rt6_fill_node pass 0 for nowait arg. Remove the arg and
simplify rt6_fill_node accordingly.

rt6_fill_node passes the nowait of 0 to ip6mr_get_route. Remove the
nowait arg from it as well.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 19a1c0c2993b..ce44e3e96d27 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -116,7 +116,7 @@ struct mfc6_cache {
 
 struct rtmsg;
 extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
-			   struct rtmsg *rtm, int nowait, u32 portid);
+			   struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
 extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
-- 
cgit v1.2.3


From d407bd25a204bd66b7346dde24bd3d37ef0e0b05 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 18 Jan 2017 15:14:17 +0100
Subject: bpf: don't trigger OOM killer under pressure with map alloc

This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(),
that are to be used for map allocations. Using kmalloc() for very large
allocations can cause excessive work within the page allocator, so i) fall
back earlier to vmalloc() when the attempt is considered costly anyway,
and even more importantly ii) don't trigger OOM killer with any of the
allocators.

Since this is based on a user space request, for example, when creating
maps with element pre-allocation, we really want such requests to fail
instead of killing other user space processes.

Also, don't spam the kernel log with warnings should any of the allocations
fail under pressure. Given that, we can make backend selection in
bpf_map_area_alloc() generic, and convert all maps over to use this API
for spots with potentially large allocation requests.

Note, replacing the one kmalloc_array() is fine as overflow checks happen
earlier in htab_map_alloc(), since it must also protect the multiplication
for vmalloc() should kmalloc_array() fail.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05cf951df3fe..3ed1f3b1d594 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -247,6 +247,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
+void *bpf_map_area_alloc(size_t size);
+void bpf_map_area_free(void *base);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
-- 
cgit v1.2.3


From 4a7c972644c1151f6dd34ff4b5f7eacb239e22ee Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 18 Jan 2017 17:45:01 +0100
Subject: net: Remove usage of net_device last_rx member

The network stack no longer uses the last_rx member of struct net_device
since the bonding driver switched to use its own private last_rx in
commit 9f242738376d ("bonding: use last_arp_rx in slave_last_rx()").

However, some drivers still (ab)use the field for their own purposes and
some driver just update it without actually using it.

Previously, there was an accompanying comment for the last_rx member
added in commit 4dc89133f49b ("net: add a comment on netdev->last_rx")
which asked drivers not to update is, unless really needed. However,
this commend was removed in commit f8ff080dacec ("bonding: remove
useless updating of slave->dev->last_rx"), so some drivers added later
on still did update last_rx.

Remove all usage of last_rx and switch three drivers (sky2, atp and
smc91c92_cs) which actually read and write it to use their own private
copy in netdev_priv.

Compile-tested with allyesconfig and allmodconfig on x86 and arm.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Mirko Lindner <mlindner@marvell.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97ae0ac513ee..3868c32d98af 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1551,7 +1551,6 @@ enum netdev_priv_flags {
  *	@ax25_ptr:	AX.25 specific data
  *	@ieee80211_ptr:	IEEE 802.11 specific data, assign before registering
  *
- *	@last_rx:	Time of last Rx
  *	@dev_addr:	Hw address (before bcast,
  *			because most packets are unicast)
  *
@@ -1777,8 +1776,6 @@ struct net_device {
 /*
  * Cache lines mostly used on receive path (including eth_type_trans())
  */
-	unsigned long		last_rx;
-
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;
 
-- 
cgit v1.2.3


From d69dece5f5b6bc7a5e39d2b6136ddc69469331fe Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 18 Jan 2017 17:09:05 -0800
Subject: LSM: Add /sys/kernel/security/lsm

I am still tired of having to find indirect ways to determine
what security modules are active on a system. I have added
/sys/kernel/security/lsm, which contains a comma separated
list of the active security modules. No more groping around
in /proc/filesystems or other clever hacks.

Unchanged from previous versions except for being updated
to the latest security next branch.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/lsm_hooks.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9cf50ad2fe20..0f3c309065d7 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1875,6 +1875,7 @@ struct security_hook_list {
 	struct list_head		list;
 	struct list_head		*head;
 	union security_list_options	hook;
+	char				*lsm;
 };
 
 /*
@@ -1887,15 +1888,10 @@ struct security_hook_list {
 	{ .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } }
 
 extern struct security_hook_heads security_hook_heads;
+extern char *lsm_names;
 
-static inline void security_add_hooks(struct security_hook_list *hooks,
-				      int count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		list_add_tail_rcu(&hooks[i].list, hooks[i].head);
-}
+extern void security_add_hooks(struct security_hook_list *hooks, int count,
+				char *lsm);
 
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 /*
-- 
cgit v1.2.3


From 85b36c931ff328297572a3e6136fac573795ad79 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 18 Jan 2017 09:38:04 -0800
Subject: jump_labels: Move header guard #endif down where it belongs

The ending header guard is misplaced. This has no
functional change, this is just an eye-sore.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: bp@suse.de
Cc: catalin.marinas@arm.com
Cc: jbaron@akamai.com
Cc: pbonzini@redhat.com
Cc: tony.luck@intel.com
Link: http://lkml.kernel.org/r/20170118173804.16281-1-mcgrof@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a0547c571800..b63d6b7b0db0 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void);
 #define static_branch_enable(x)		static_key_enable(&(x)->key)
 #define static_branch_disable(x)	static_key_disable(&(x)->key)
 
-#endif	/* _LINUX_JUMP_LABEL_H */
-
 #endif /* __ASSEMBLY__ */
+
+#endif	/* _LINUX_JUMP_LABEL_H */
-- 
cgit v1.2.3


From 739e6f5945d88dcee01590913f6886132a10c215 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 11 Jan 2017 13:37:07 +0100
Subject: gpio: provide lockdep keys for nested/unnested irqchips

The helper function for adding a GPIO chip compiles in a lockdep
key for debugging, the same key is needed for nested chips as
well.

The macro construction is unreadable, replace this with two
static inlines instead.

The _gpiochip_irqchip_add prefixed function is not helpful,
rename it with gpiochip_irqchip_add_key() that tell us what the
function is actually doing.

Fixes: d245b3f9bd36 ("gpio: simplify adding threaded interrupts")
Cc: Roger Quadros <rogerq@ti.com>
Reported-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Reported-by: Roger Quadros <rogerq@ti.com>
Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 70 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c2748accea71..e973faba69dc 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -274,37 +274,67 @@ void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
 		struct irq_chip *irqchip,
 		int parent_irq);
 
-int _gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
+			     struct irq_chip *irqchip,
+			     unsigned int first_irq,
+			     irq_flow_handler_t handler,
+			     unsigned int type,
+			     bool nested,
+			     struct lock_class_key *lock_key);
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Lockdep requires that each irqchip instance be created with a
+ * unique key so as to avoid unnecessary warnings. This upfront
+ * boilerplate static inlines provides such a key for each
+ * unique instance.
+ */
+static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+				       struct irq_chip *irqchip,
+				       unsigned int first_irq,
+				       irq_flow_handler_t handler,
+				       unsigned int type)
+{
+	static struct lock_class_key key;
+
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, false, &key);
+}
+
+static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  struct irq_chip *irqchip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
-			  unsigned int type,
-			  bool nested,
-			  struct lock_class_key *lock_key);
+			  unsigned int type)
+{
+
+	static struct lock_class_key key;
+
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, true, &key);
+}
+#else
+static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
+				       struct irq_chip *irqchip,
+				       unsigned int first_irq,
+				       irq_flow_handler_t handler,
+				       unsigned int type)
+{
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, false, NULL);
+}
 
-/* FIXME: I assume threaded IRQchips do not have the lockdep problem */
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 			  struct irq_chip *irqchip,
 			  unsigned int first_irq,
 			  irq_flow_handler_t handler,
 			  unsigned int type)
 {
-	return _gpiochip_irqchip_add(gpiochip, irqchip, first_irq,
-				     handler, type, true, NULL);
+	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
+					handler, type, true, NULL);
 }
-
-#ifdef CONFIG_LOCKDEP
-#define gpiochip_irqchip_add(...)				\
-(								\
-	({							\
-		static struct lock_class_key _key;		\
-		_gpiochip_irqchip_add(__VA_ARGS__, false, &_key); \
-	})							\
-)
-#else
-#define gpiochip_irqchip_add(...)				\
-	_gpiochip_irqchip_add(__VA_ARGS__, false, NULL)
-#endif
+#endif /* CONFIG_LOCKDEP */
 
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
 
-- 
cgit v1.2.3


From f95bd041203b9f27a14e6ab733b9598049d7ffef Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 13 Jan 2017 11:00:25 +0100
Subject: memory: aemif: allow passing device lookup table as platform data

TI aemif driver creates its own subnodes of the device tree in order
to guarantee that all child devices are probed after the AEMIF timing
parameters are configured.

Some devices (e.g. da850) use struct of_dev_auxdata for clock lookup
but nodes created from within the aemif driver can't access the lookup
table.

Create a platform data structure that holds a pointer to
of_dev_auxdata so that we can use it with of_platform_populate().

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/ti-aemif.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/platform_data/ti-aemif.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ti-aemif.h b/include/linux/platform_data/ti-aemif.h
new file mode 100644
index 000000000000..ac72e115093c
--- /dev/null
+++ b/include/linux/platform_data/ti-aemif.h
@@ -0,0 +1,23 @@
+/*
+ * TI DaVinci AEMIF platform glue.
+ *
+ * Copyright (C) 2017 BayLibre SAS
+ *
+ * Author:
+ *   Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __TI_DAVINCI_AEMIF_DATA_H__
+#define __TI_DAVINCI_AEMIF_DATA_H__
+
+#include <linux/of_platform.h>
+
+struct aemif_platform_data {
+	struct of_dev_auxdata *dev_lookup;
+};
+
+#endif /* __TI_DAVINCI_AEMIF_DATA_H__ */
-- 
cgit v1.2.3


From 6d2c5d6c46dd3d9924831efd6c913fdf4d484985 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 16 Jan 2017 17:50:02 +0100
Subject: kmod: make usermodehelper path a const string

This is in preparation for making it so that usermode helper programs
can't be changed, if desired, by userspace.  We will tackle the mess of
cleaning up the write-ability of argv and env later, that's going to
take more work, for much less gain...

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kmod.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index fcfd2bf14d3f..c4e441e00db5 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -56,7 +56,7 @@ struct file;
 struct subprocess_info {
 	struct work_struct work;
 	struct completion *complete;
-	char *path;
+	const char *path;
 	char **argv;
 	char **envp;
 	int wait;
@@ -67,10 +67,11 @@ struct subprocess_info {
 };
 
 extern int
-call_usermodehelper(char *path, char **argv, char **envp, int wait);
+call_usermodehelper(const char *path, char **argv, char **envp, int wait);
 
 extern struct subprocess_info *
-call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask,
+call_usermodehelper_setup(const char *path, char **argv, char **envp,
+			  gfp_t gfp_mask,
 			  int (*init)(struct subprocess_info *info, struct cred *new),
 			  void (*cleanup)(struct subprocess_info *), void *data);
 
-- 
cgit v1.2.3


From 219fb0c1436e4893a290ba270bc0e644d02465a3 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 11 Jan 2017 16:43:37 +0200
Subject: serial: sh-sci: Remove the platform data dma slave rx/tx channel IDs

Only SH platforms still use platform data for the sh-sci, and none of
them declare DMA channels connected to the SCI. Remove the corresponding
platform data fields and simplify the driver accordingly.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 9f2bfd055742..1a894c47bfc0 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -71,9 +71,6 @@ struct plat_sci_port {
 	unsigned char	regtype;
 
 	struct plat_sci_port_ops	*ops;
-
-	unsigned int	dma_slave_tx;
-	unsigned int	dma_slave_rx;
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3


From d5cb1319a91d4f1328b1c70b82c5899acd96af85 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 11 Jan 2017 16:43:38 +0200
Subject: serial: sh-sci: Remove manual break debouncing

The sh-sci driver implements manual break debouncing for a few SH
platforms by reading the value of the RX pin port register. This feature
is optional and the driver considers all negative or zero values of the
platform data port_reg field as invalid. As the four platforms that set
the field to a register address all use an address higher than
0x7fffffff, the driver will always consider the value as invalid and
never perform debouncing. The feature is unused, remove it.

Debouncing could be implemented properly in the future using the pinctrl
and GPIO APIs if desired.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1a894c47bfc0..b4419931bf4c 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -9,8 +9,6 @@
  * Generic header for SuperH (H)SCI(F) (used by sh/sh64 and related parts)
  */
 
-#define SCIx_NOT_SUPPORTED	(-1)
-
 /* Serial Control Register (@ = not supported by all parts) */
 #define SCSCR_TIE	BIT(7)	/* Transmit Interrupt Enable */
 #define SCSCR_RIE	BIT(6)	/* Receive Interrupt Enable */
@@ -41,8 +39,6 @@ enum {
 	SCIx_NR_REGTYPES,
 };
 
-struct device;
-
 struct plat_sci_port_ops {
 	void (*init_pins)(struct uart_port *, unsigned int cflag);
 };
@@ -66,7 +62,6 @@ struct plat_sci_port {
 	/*
 	 * Platform overrides if necessary, defaults otherwise.
 	 */
-	int		port_reg;
 	unsigned char	regshift;
 	unsigned char	regtype;
 
-- 
cgit v1.2.3


From 97ed9790c514066bfae67f22e084b505ed5af436 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 11 Jan 2017 16:43:39 +0200
Subject: serial: sh-sci: Remove unused platform data capabilities field

The field isn't set by any platform but is only used internally in the
driver to hold data parsed from DT. Move it to the sci_port structure.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index b4419931bf4c..f9a4526f4ec5 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -43,18 +43,12 @@ struct plat_sci_port_ops {
 	void (*init_pins)(struct uart_port *, unsigned int cflag);
 };
 
-/*
- * Port-specific capabilities
- */
-#define SCIx_HAVE_RTSCTS	BIT(0)
-
 /*
  * Platform device specific platform_data struct
  */
 struct plat_sci_port {
 	unsigned int	type;			/* SCI / SCIF / IRDA / HSCIF */
 	upf_t		flags;			/* UPF_* flags */
-	unsigned long	capabilities;		/* Port features/capabilities */
 
 	unsigned int	sampling_rate;
 	unsigned int	scscr;			/* SCSCR initialization */
-- 
cgit v1.2.3


From dfc80387aefb78161f83732804c6d01c89c24595 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 11 Jan 2017 16:43:40 +0200
Subject: serial: sh-sci: Compute the regshift value for SCI ports

SCI instances found in SH SoCs have different spacing between registers
depending on the SoC. The platform data contains a regshift field that
tells the driver by how many bits to shift the register offset to
compute its address. We can compute the regshift value automatically
based on the memory resource size, there's no need to pass the value
through platform data.

Fix the sh7750 SCI and sh7760 SIM port memory resources length to ensure
proper computation of the regshift value.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index f9a4526f4ec5..e598eaef3962 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -56,7 +56,6 @@ struct plat_sci_port {
 	/*
 	 * Platform overrides if necessary, defaults otherwise.
 	 */
-	unsigned char	regshift;
 	unsigned char	regtype;
 
 	struct plat_sci_port_ops	*ops;
-- 
cgit v1.2.3


From 9ed90d20449b01beb71a4e125d291a36c80c4ad4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 16 Jan 2017 16:54:28 -0600
Subject: tty: move the non-file related parts of tty_release to new
 tty_release_struct

For in-kernel tty users, we need to be able to create and destroy
'struct tty' that are not associated with a file. The creation side is
fine, but tty_release() needs to be split into the file handle portion
and the struct tty portion. Introduce a new function, tty_release_struct,
to handle just the destroying of a struct tty.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 40144f382516..86c7853282b7 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -528,6 +528,7 @@ extern int tty_alloc_file(struct file *file);
 extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void tty_free_file(struct file *file);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
+extern void tty_release_struct(struct tty_struct *tty, int idx);
 extern int tty_release(struct inode *inode, struct file *filp);
 extern void tty_init_termios(struct tty_struct *tty);
 extern int tty_standard_install(struct tty_driver *driver,
-- 
cgit v1.2.3


From 134e6a034cb004ed5acd3048792de70ced1c6cf5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 19 Jan 2017 08:57:14 -0500
Subject: tracing: Show number of constants profiled in likely profiler

Now that constants are traced, it is useful to see the number of constants
that are traced in the likely/unlikely profiler in order to know if they
should be ignored or not.

The likely/unlikely will display a number after the "correct" number if a
"constant" count exists.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/compiler.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bbbe1570de1c..a73cc9afa784 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -101,13 +101,18 @@ struct ftrace_branch_data {
 	};
 };
 
+struct ftrace_likely_data {
+	struct ftrace_branch_data	data;
+	unsigned long			constant;
+};
+
 /*
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
  */
 #if defined(CONFIG_TRACE_BRANCH_PROFILING) \
     && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
-void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			  int expect, int is_constant);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
@@ -115,13 +120,13 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val,
 
 #define __branch_check__(x, expect, is_constant) ({			\
 			int ______r;					\
-			static struct ftrace_branch_data		\
+			static struct ftrace_likely_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_annotated_branch"))) \
 				______f = {				\
-				.func = __func__,			\
-				.file = __FILE__,			\
-				.line = __LINE__,			\
+				.data.func = __func__,			\
+				.data.file = __FILE__,			\
+				.data.line = __LINE__,			\
 			};						\
 			______r = __builtin_expect(!!(x), expect);	\
 			ftrace_likely_update(&______f, ______r,		\
-- 
cgit v1.2.3


From 579b2a65d245c093d3e63845c320b9321f112b75 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:08 +0530
Subject: iommu: add IOMMU_PRIV attribute

Add the IOMMU_PRIV attribute, which is used to indicate privileged
mappings.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..69e2417a2965 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,13 @@
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC	(1 << 3)
 #define IOMMU_MMIO	(1 << 4) /* e.g. things like MSI doorbells */
+/*
+ * This is to make the IOMMU API setup privileged
+ * mapppings accessible by the master only at higher
+ * privileged execution level and inaccessible at
+ * less privileged levels.
+ */
+#define IOMMU_PRIV	(1 << 5)
 
 struct iommu_ops;
 struct iommu_group;
-- 
cgit v1.2.3


From b2fb366425ceb85dca56afa538257ec5a2c4f6d1 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:11 +0530
Subject: common: DMA-mapping: add DMA_ATTR_PRIVILEGED attribute

This patch adds the DMA_ATTR_PRIVILEGED attribute to the DMA-mapping
subsystem.

Some advanced peripherals such as remote processors and GPUs perform
accesses to DMA buffers in both privileged "supervisor" and unprivileged
"user" modes.  This attribute is used to indicate to the DMA-mapping
subsystem that the buffer is fully accessible at the elevated privilege
level (and ideally inaccessible or at least read-only at the
lesser-privileged levels).

Cc: linux-doc@vger.kernel.org
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-mapping.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 10c5a17b1f51..c24721a33b4c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -62,6 +62,13 @@
  */
 #define DMA_ATTR_NO_WARN	(1UL << 8)
 
+/*
+ * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
+ * accessible at an elevated privilege level (and ideally inaccessible or
+ * at least read-only at lesser-privileged levels).
+ */
+#define DMA_ATTR_PRIVILEGED		(1UL << 9)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
-- 
cgit v1.2.3


From 737c85ca1c3af4f97acb61cd53415ec039b31111 Mon Sep 17 00:00:00 2001
From: Mitchel Humpherys <mitchelh@codeaurora.org>
Date: Fri, 6 Jan 2017 18:58:12 +0530
Subject: arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED

The newly added DMA_ATTR_PRIVILEGED is useful for creating mappings that
are only accessible to privileged DMA engines.  Implement it in
dma-iommu.c so that the ARM64 DMA IOMMU mapper can make use of it.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 7f7e9a7e3839..c5511e1d5560 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -34,7 +34,8 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		u64 size, struct device *dev);
 
 /* General helpers for DMA-API <-> IOMMU-API interaction */
-int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
+int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
+		     unsigned long attrs);
 
 /*
  * These implement the bulk of the relevant DMA mapping callbacks, but require
-- 
cgit v1.2.3


From c92d781f1a5ea19708b1e1e2b85a3fbd4a738b30 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 16 Jan 2017 16:54:31 -0600
Subject: tty: constify tty_ldisc_receive_buf buffer pointer

This is needed to work with the client operations which uses const ptrs.

Really, the flags pointer could be const, too, but this would be a tree
wide fix.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 86c7853282b7..21c0fabfed60 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -657,7 +657,7 @@ extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
 extern void tty_ldisc_release(struct tty_struct *tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
-extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
+extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
 				 char *f, int count);
 
 /* n_tty.c */
-- 
cgit v1.2.3


From 4567d686f5c6d955e57a3afa1741944c1e7f4033 Mon Sep 17 00:00:00 2001
From: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>
Date: Thu, 19 Jan 2017 17:05:04 +0100
Subject: phy: increase size of MII_BUS_ID_SIZE and bus_id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some bus names are pretty long and do not fit into
17 chars. Increase therefore MII_BUS_ID_SIZE and
phy_fixup.bus_id to larger number. Now mii_bus.id
can host larger name.

Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>
Signed-off-by: Magnus Öberg <magnus.oberg@westermo.se>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index f7d95f644eed..5c9d2529685f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -158,11 +158,7 @@ static inline const char *phy_modes(phy_interface_t interface)
 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */
 #define PHY_ID_FMT "%s:%02x"
 
-/*
- * Need to be a little smaller than phydev->dev.bus_id to leave room
- * for the ":%02x"
- */
-#define MII_BUS_ID_SIZE	(20 - 3)
+#define MII_BUS_ID_SIZE	61
 
 /* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
    IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */
@@ -632,7 +628,7 @@ struct phy_driver {
 /* A Structure for boards to register fixups with the PHY Lib */
 struct phy_fixup {
 	struct list_head list;
-	char bus_id[20];
+	char bus_id[MII_BUS_ID_SIZE + 3];
 	u32 phy_uid;
 	u32 phy_uid_mask;
 	int (*run)(struct phy_device *phydev);
-- 
cgit v1.2.3


From f9a1ef720e9e32bc6a4a382c15ac77d62749c79e Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Mon, 10 Oct 2016 16:05:53 +0300
Subject: net/mlx5: Add MTPPS and MTPPSE registers infrastructure

Implement query and set functionality for MTPPS and MTPPSE registers.
MTPPS (Management Pulse Per Second) provides the device PPS capabilities,
configures the PPS in and out modules and holds the PPS in time stamp.
Query MTPPS is supported only when HCA_CAP.pps is set and modify is supported
when HCA_CAP.pps_modify is set.

MTPPSE (Management Pulse Per Second Event) configures the different event
generation modes for PPS. Supported when HCA_CAP.pps is set.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   | 18 +++++++++++++
 include/linux/mlx5/driver.h   |  3 +++
 include/linux/mlx5/mlx5_ifc.h | 60 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7c5265db02a9..6ac8eb5a89ca 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -289,6 +289,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_GPIO_EVENT	   = 0x15,
 	MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
 	MLX5_EVENT_TYPE_REMOTE_CONFIG	   = 0x19,
+	MLX5_EVENT_TYPE_PPS_EVENT          = 0x25,
 
 	MLX5_EVENT_TYPE_DB_BF_CONGESTION   = 0x1a,
 	MLX5_EVENT_TYPE_STALL_EVENT	   = 0x1b,
@@ -569,6 +570,22 @@ struct mlx5_eqe_port_module {
 	u8        error_type;
 } __packed;
 
+struct mlx5_eqe_pps {
+	u8		rsvd0[3];
+	u8		pin;
+	u8		rsvd1[4];
+	union {
+		struct {
+			__be32		time_sec;
+			__be32		time_nsec;
+		};
+		struct {
+			__be64		time_stamp;
+		};
+	};
+	u8		rsvd2[12];
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -583,6 +600,7 @@ union ev_data {
 	struct mlx5_eqe_page_fault	page_fault;
 	struct mlx5_eqe_vport_change	vport_change;
 	struct mlx5_eqe_port_module	port_module;
+	struct mlx5_eqe_pps		pps;
 } __packed;
 
 struct mlx5_eqe {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a309f6a4a15..ebbc8834063b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -125,6 +125,8 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MTPPS		 = 0x9053,
+	MLX5_REG_MTPPSE		 = 0x9054,
 };
 
 enum mlx5_dcbx_oper_mode {
@@ -172,6 +174,7 @@ enum mlx5_dev_event {
 	MLX5_DEV_EVENT_PKEY_CHANGE,
 	MLX5_DEV_EVENT_GUID_CHANGE,
 	MLX5_DEV_EVENT_CLIENT_REREG,
+	MLX5_DEV_EVENT_PPS,
 };
 
 enum mlx5_port_status {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 37327f6ba9cb..f5292f3b0301 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -835,7 +835,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         port_type[0x2];
 	u8         num_ports[0x8];
 
-	u8         reserved_at_1c0[0x3];
+	u8         reserved_at_1c0[0x1];
+	u8         pps[0x1];
+	u8         pps_modify[0x1];
 	u8         log_max_msg[0x5];
 	u8         reserved_at_1c8[0x4];
 	u8         max_tc[0x4];
@@ -7821,6 +7823,60 @@ struct mlx5_ifc_initial_seg_bits {
 	u8         reserved_at_80a0[0x17fc0];
 };
 
+struct mlx5_ifc_mtpps_reg_bits {
+	u8         reserved_at_0[0xc];
+	u8         cap_number_of_pps_pins[0x4];
+	u8         reserved_at_10[0x4];
+	u8         cap_max_num_of_pps_in_pins[0x4];
+	u8         reserved_at_18[0x4];
+	u8         cap_max_num_of_pps_out_pins[0x4];
+
+	u8         reserved_at_20[0x24];
+	u8         cap_pin_3_mode[0x4];
+	u8         reserved_at_48[0x4];
+	u8         cap_pin_2_mode[0x4];
+	u8         reserved_at_50[0x4];
+	u8         cap_pin_1_mode[0x4];
+	u8         reserved_at_58[0x4];
+	u8         cap_pin_0_mode[0x4];
+
+	u8         reserved_at_60[0x4];
+	u8         cap_pin_7_mode[0x4];
+	u8         reserved_at_68[0x4];
+	u8         cap_pin_6_mode[0x4];
+	u8         reserved_at_70[0x4];
+	u8         cap_pin_5_mode[0x4];
+	u8         reserved_at_78[0x4];
+	u8         cap_pin_4_mode[0x4];
+
+	u8         reserved_at_80[0x80];
+
+	u8         enable[0x1];
+	u8         reserved_at_101[0xb];
+	u8         pattern[0x4];
+	u8         reserved_at_110[0x4];
+	u8         pin_mode[0x4];
+	u8         pin[0x8];
+
+	u8         reserved_at_120[0x20];
+
+	u8         time_stamp[0x40];
+
+	u8         out_pulse_duration[0x10];
+	u8         out_periodic_adjustment[0x10];
+
+	u8         reserved_at_1a0[0x60];
+};
+
+struct mlx5_ifc_mtppse_reg_bits {
+	u8         reserved_at_0[0x18];
+	u8         pin[0x8];
+	u8         event_arm[0x1];
+	u8         reserved_at_21[0x1b];
+	u8         event_generation_mode[0x4];
+	u8         reserved_at_40[0x40];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -7865,6 +7921,8 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pvlc_reg_bits pvlc_reg;
 	struct mlx5_ifc_slrg_reg_bits slrg_reg;
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
+	struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
+	struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3


From 105433659d394b70dc3b6ceb65d0c1673e14cee1 Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Sun, 9 Oct 2016 16:25:43 +0300
Subject: net/mlx5: Add support to s-tag in mlx5 firmware interface

Add svlan_tag and rename vlan_tag to cvlan_tag in flow table entry
match param.

Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f5292f3b0301..6f19e4b8574f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -365,8 +365,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
 	u8         ip_protocol[0x8];
 	u8         ip_dscp[0x6];
 	u8         ip_ecn[0x2];
-	u8         vlan_tag[0x1];
-	u8         reserved_at_91[0x1];
+	u8         cvlan_tag[0x1];
+	u8         svlan_tag[0x1];
 	u8         frag[0x1];
 	u8         reserved_at_93[0x4];
 	u8         tcp_flags[0x9];
@@ -398,9 +398,11 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 	u8         inner_second_cfi[0x1];
 	u8         inner_second_vid[0xc];
 
-	u8         outer_second_vlan_tag[0x1];
-	u8         inner_second_vlan_tag[0x1];
-	u8         reserved_at_62[0xe];
+	u8         outer_second_cvlan_tag[0x1];
+	u8         inner_second_cvlan_tag[0x1];
+	u8         outer_second_svlan_tag[0x1];
+	u8         inner_second_svlan_tag[0x1];
+	u8         reserved_at_64[0xc];
 	u8         gre_protocol[0x10];
 
 	u8         gre_key_h[0x18];
-- 
cgit v1.2.3


From cfdcbceaeffc669b70d904d80a2df9c86c232566 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 8 Dec 2016 15:52:00 +0200
Subject: net/mlx5: Expose PCAM, MCAM registers infrastructure

PCAM: Ports capabilities mask register.
MCAM: Management capabilities mask register.

PCAM and MCAM registers will provide information regarding firmware
support for different features, in order to avoid cases where new driver
combined with old firmware results in syndromes (for ex. PCIe counters
before this patchset).

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   | 16 ++++++++++++
 include/linux/mlx5/driver.h   |  2 ++
 include/linux/mlx5/mlx5_ifc.h | 60 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 6ac8eb5a89ca..79f38e67fe2f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -970,6 +970,22 @@ enum mlx5_cap_type {
 	MLX5_CAP_NUM
 };
 
+enum mlx5_pcam_reg_groups {
+	MLX5_PCAM_REGS_5000_TO_507F                 = 0x0,
+};
+
+enum mlx5_pcam_feature_groups {
+	MLX5_PCAM_FEATURE_ENHANCED_FEATURES         = 0x0,
+};
+
+enum mlx5_mcam_reg_groups {
+	MLX5_MCAM_REGS_FIRST_128                    = 0x0,
+};
+
+enum mlx5_mcam_feature_groups {
+	MLX5_MCAM_FEATURE_ENHANCED_FEATURES         = 0x0,
+};
+
 /* GET Dev Caps macros */
 #define MLX5_CAP_GEN(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ebbc8834063b..60c2b156da8c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -121,12 +121,14 @@ enum {
 	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PCMR		 = 0x5041,
 	MLX5_REG_PMLP		 = 0x5002,
+	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
+	MLX5_REG_MCAM		 = 0x907f,
 };
 
 enum mlx5_dcbx_oper_mode {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6f19e4b8574f..e8061a95326a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -826,7 +826,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         nic_flow_table[0x1];
 	u8         eswitch_flow_table[0x1];
 	u8	   early_vf_enable[0x1];
-	u8         reserved_at_1a9[0x2];
+	u8         mcam_reg[0x1];
+	u8         pcam_reg[0x1];
 	u8         local_ca_ack_delay[0x5];
 	u8         port_module_event[0x1];
 	u8         reserved_at_1b1[0x1];
@@ -7481,6 +7482,63 @@ struct mlx5_ifc_peir_reg_bits {
 	u8         error_type[0x8];
 };
 
+struct mlx5_ifc_pcam_enhanced_features_bits {
+	u8         reserved_at_0[0x7e];
+
+	u8         ppcnt_discard_group[0x1];
+	u8         ppcnt_statistical_group[0x1];
+};
+
+struct mlx5_ifc_pcam_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         feature_group[0x8];
+	u8         reserved_at_10[0x8];
+	u8         access_reg_group[0x8];
+
+	u8         reserved_at_20[0x20];
+
+	union {
+		u8         reserved_at_0[0x80];
+	} port_access_reg_cap_mask;
+
+	u8         reserved_at_c0[0x80];
+
+	union {
+		struct mlx5_ifc_pcam_enhanced_features_bits enhanced_features;
+		u8         reserved_at_0[0x80];
+	} feature_cap_mask;
+
+	u8         reserved_at_1c0[0xc0];
+};
+
+struct mlx5_ifc_mcam_enhanced_features_bits {
+	u8         reserved_at_0[0x7f];
+
+	u8         pcie_performance_group[0x1];
+};
+
+struct mlx5_ifc_mcam_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         feature_group[0x8];
+	u8         reserved_at_10[0x8];
+	u8         access_reg_group[0x8];
+
+	u8         reserved_at_20[0x20];
+
+	union {
+		u8         reserved_at_0[0x80];
+	} mng_access_reg_cap_mask;
+
+	u8         reserved_at_c0[0x80];
+
+	union {
+		struct mlx5_ifc_mcam_enhanced_features_bits enhanced_features;
+		u8         reserved_at_0[0x80];
+	} mng_feature_cap_mask;
+
+	u8         reserved_at_1c0[0x80];
+};
+
 struct mlx5_ifc_pcap_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-- 
cgit v1.2.3


From 71862561f3a62015a11de16d1c306481e8415c08 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 8 Dec 2016 16:03:31 +0200
Subject: net/mlx5: Query and cache PCAM, MCAM registers on initialization

On load_one, we now cache our capabilities registers internally, similar
to QUERY_HCA_CAP. Capabilities can later be queried using macros
introduced in this patch.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 6 ++++++
 include/linux/mlx5/driver.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 79f38e67fe2f..f21528abfb74 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1081,6 +1081,12 @@ enum mlx5_mcam_feature_groups {
 #define MLX5_CAP_QOS(mdev, cap)\
 	MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap)
 
+#define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
+	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
+
+#define MLX5_CAP_MCAM_FEATURE(mdev, fld) \
+	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 60c2b156da8c..69c4661d391e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -739,6 +739,10 @@ struct mlx5_core_dev {
 	struct mlx5_port_caps	port_caps[MLX5_MAX_PORTS];
 	u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 	u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
+	struct {
+		u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
+		u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
+	} caps;
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
 	enum mlx5_device_state	state;
-- 
cgit v1.2.3


From d8dc0508c50f838f8abcf023cd74ca9a0f86b5a8 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 27 Sep 2016 17:04:51 +0300
Subject: net/mlx5: Add PPCNT physical layer statistical group infrastructure

Add the needed infrastructure for future use of PPCNT physical layer
statistical group.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/device.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f21528abfb74..5ad1d3ca47dc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1115,6 +1115,7 @@ enum {
 	MLX5_PER_PRIORITY_COUNTERS_GROUP      = 0x10,
 	MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11,
 	MLX5_PHYSICAL_LAYER_COUNTERS_GROUP    = 0x12,
+	MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16,
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e8061a95326a..f4860fa719d9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1384,6 +1384,42 @@ struct mlx5_ifc_phys_layer_cntrs_bits {
 	u8         reserved_at_640[0x180];
 };
 
+struct mlx5_ifc_phys_layer_statistical_cntrs_bits {
+	u8         time_since_last_clear_high[0x20];
+
+	u8         time_since_last_clear_low[0x20];
+
+	u8         phy_received_bits_high[0x20];
+
+	u8         phy_received_bits_low[0x20];
+
+	u8         phy_symbol_errors_high[0x20];
+
+	u8         phy_symbol_errors_low[0x20];
+
+	u8         phy_corrected_bits_high[0x20];
+
+	u8         phy_corrected_bits_low[0x20];
+
+	u8         phy_corrected_bits_lane0_high[0x20];
+
+	u8         phy_corrected_bits_lane0_low[0x20];
+
+	u8         phy_corrected_bits_lane1_high[0x20];
+
+	u8         phy_corrected_bits_lane1_low[0x20];
+
+	u8         phy_corrected_bits_lane2_high[0x20];
+
+	u8         phy_corrected_bits_lane2_low[0x20];
+
+	u8         phy_corrected_bits_lane3_high[0x20];
+
+	u8         phy_corrected_bits_lane3_low[0x20];
+
+	u8         reserved_at_200[0x5c0];
+};
+
 struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
 	u8	   symbol_error_counter[0x10];
 
@@ -2928,6 +2964,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
 	struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
+	struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs;
 	u8         reserved_at_0[0x7c0];
 };
 
-- 
cgit v1.2.3


From 8ed1a6306dc7892b63be7cdb1e3b1123265f42ff Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 17 Nov 2016 13:46:01 +0200
Subject: net/mlx5: Add MPCNT register infrastructure

Add the needed infrastructure for future use of MPCNT register.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  4 ++++
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5ad1d3ca47dc..1cf97dce8215 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1119,6 +1119,10 @@ enum {
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
+enum {
+	MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
+};
+
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
 	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 69c4661d391e..f4d6d390a9cf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -126,6 +126,7 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
 	MLX5_REG_MCAM		 = 0x907f,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f4860fa719d9..d96ebc319d63 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1802,6 +1802,30 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 	u8         reserved_at_4c0[0x300];
 };
 
+struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
+	u8         life_time_counter_high[0x20];
+
+	u8         life_time_counter_low[0x20];
+
+	u8         rx_errors[0x20];
+
+	u8         tx_errors[0x20];
+
+	u8         l0_to_recovery_eieos[0x20];
+
+	u8         l0_to_recovery_ts[0x20];
+
+	u8         l0_to_recovery_framing[0x20];
+
+	u8         l0_to_recovery_retrain[0x20];
+
+	u8         crc_error_dllp[0x20];
+
+	u8         crc_error_tlp[0x20];
+
+	u8         reserved_at_140[0x680];
+};
+
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
@@ -2968,6 +2992,11 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	u8         reserved_at_0[0x7c0];
 };
 
+union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
+	struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
+	u8         reserved_at_0[0x7c0];
+};
+
 union mlx5_ifc_event_auto_bits {
 	struct mlx5_ifc_comp_event_bits comp_event;
 	struct mlx5_ifc_dct_events_bits dct_events;
@@ -7290,6 +7319,18 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
+struct mlx5_ifc_mpcnt_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         pcie_index[0x8];
+	u8         reserved_at_10[0xa];
+	u8         grp[0x6];
+
+	u8         clr[0x1];
+	u8         reserved_at_21[0x1f];
+
+	union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
+};
+
 struct mlx5_ifc_ppad_reg_bits {
 	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
@@ -8006,6 +8047,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
 	struct mlx5_ifc_ppad_reg_bits ppad_reg;
 	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+	struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
 	struct mlx5_ifc_pplm_reg_bits pplm_reg;
 	struct mlx5_ifc_pplr_reg_bits pplr_reg;
 	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
-- 
cgit v1.2.3


From 701052c578195e6e02a22647fa6fd1c90c31dafd Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 14 Dec 2016 17:40:41 +0200
Subject: net/mlx5: Move cached hca caps to designated caps struct

The caps structure consists of hca caps and port/management caps,
all under one roof.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 34 +++++++++++++++++-----------------
 include/linux/mlx5/driver.h |  4 ++--
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 1cf97dce8215..7b6cd67a263f 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -988,36 +988,36 @@ enum mlx5_mcam_feature_groups {
 
 /* GET Dev Caps macros */
 #define MLX5_CAP_GEN(mdev, cap) \
-	MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap)
+	MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
 
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
-	MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap)
+	MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
 
 #define MLX5_CAP_ETH(mdev, cap) \
 	MLX5_GET(per_protocol_networking_offload_caps,\
-		 mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+		 mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
 
 #define MLX5_CAP_ETH_MAX(mdev, cap) \
 	MLX5_GET(per_protocol_networking_offload_caps,\
-		 mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+		 mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
 
 #define MLX5_CAP_ROCE(mdev, cap) \
-	MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap)
+	MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap)
 
 #define MLX5_CAP_ROCE_MAX(mdev, cap) \
-	MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap)
+	MLX5_GET(roce_cap, mdev->caps.hca_max[MLX5_CAP_ROCE], cap)
 
 #define MLX5_CAP_ATOMIC(mdev, cap) \
-	MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap)
+	MLX5_GET(atomic_caps, mdev->caps.hca_cur[MLX5_CAP_ATOMIC], cap)
 
 #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \
-	MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap)
+	MLX5_GET(atomic_caps, mdev->caps.hca_max[MLX5_CAP_ATOMIC], cap)
 
 #define MLX5_CAP_FLOWTABLE(mdev, cap) \
-	MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap)
+	MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
 
 #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
-	MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap)
+	MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap)
 
 #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap)
@@ -1039,11 +1039,11 @@ enum mlx5_mcam_feature_groups {
 
 #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
 	MLX5_GET(flow_table_eswitch_cap, \
-		 mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+		 mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
 
 #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
 	MLX5_GET(flow_table_eswitch_cap, \
-		 mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+		 mdev->caps.hca_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
 
 #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
 	MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
@@ -1065,21 +1065,21 @@ enum mlx5_mcam_feature_groups {
 
 #define MLX5_CAP_ESW(mdev, cap) \
 	MLX5_GET(e_switch_cap, \
-		 mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap)
+		 mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap)
 
 #define MLX5_CAP_ESW_MAX(mdev, cap) \
 	MLX5_GET(e_switch_cap, \
-		 mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap)
+		 mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap)
 
 #define MLX5_CAP_ODP(mdev, cap)\
-	MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
+	MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap)
 
 #define MLX5_CAP_VECTOR_CALC(mdev, cap) \
 	MLX5_GET(vector_calc_cap, \
-		 mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
+		 mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap)
 
 #define MLX5_CAP_QOS(mdev, cap)\
-	MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap)
+	MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap)
 
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
 	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f4d6d390a9cf..1bc4641734da 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -738,9 +738,9 @@ struct mlx5_core_dev {
 	char			board_id[MLX5_BOARD_ID_LEN];
 	struct mlx5_cmd		cmd;
 	struct mlx5_port_caps	port_caps[MLX5_MAX_PORTS];
-	u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
-	u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 	struct {
+		u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
+		u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 		u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
 		u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
 	} caps;
-- 
cgit v1.2.3


From acb04058de49458010c44bb35b849d45113fd668 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 19 Jan 2017 14:36:33 +0100
Subject: sched/clock: Fix hotplug crash

Mike reported that he could trigger the WARN_ON_ONCE() in
set_sched_clock_stable() using hotplug.

This exposed a fundamental problem with the interface, we should never
mark the TSC stable if we ever find it to be unstable. Therefore
set_sched_clock_stable() is a broken interface.

The reason it existed is that not having it is a pain, it means all
relevant architecture code needs to call clear_sched_clock_stable()
where appropriate.

Of the three architectures that select HAVE_UNSTABLE_SCHED_CLOCK ia64
and parisc are trivial in that they never called
set_sched_clock_stable(), so add an unconditional call to
clear_sched_clock_stable() to them.

For x86 the story is a lot more involved, and what this patch tries to
do is ensure we preserve the status quo. So even is Cyrix or Transmeta
have usable TSC they never called set_sched_clock_stable() so they now
get an explicit mark unstable.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9881b024b7d7 ("sched/clock: Delay switching sched_clock to stable")
Link: http://lkml.kernel.org/r/20170119133633.GB6536@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a8daed914eef..69e6852fede1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2547,7 +2547,6 @@ extern void sched_clock_init_late(void);
  * is reliable after all:
  */
 extern int sched_clock_stable(void);
-extern void set_sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
 extern void sched_clock_tick(void);
-- 
cgit v1.2.3


From e326ce013a8e851193eb337aafb1aa396c533a61 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 20 Jan 2017 03:25:34 +0100
Subject: Revert "PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag"

Revert commit 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0
flag) as it caused system suspend (in the default configuration) to fail
on Dell XPS13 (9360) with the Kaby Lake processor.

Fixes: 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag)
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0c729c3c8549..d9718378a8be 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -194,8 +194,6 @@ struct platform_freeze_ops {
 };
 
 #ifdef CONFIG_SUSPEND
-extern suspend_state_t mem_sleep_default;
-
 /**
  * suspend_set_ops - set platform dependent suspend operations
  * @ops: The new suspend operations to set.
-- 
cgit v1.2.3


From 9c829c097f2f1cbebcfff69a7254b1df9852fe4e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Wed, 28 Dec 2016 14:56:47 -0800
Subject: of: device: Support loading a module with OF based modalias

In the case of ULPI devices, we want to be able to load the
driver before registering the device so that we don't get stuck
in a loop waiting for the phy module to appear and failing usb
controller probe. Currently we request the ulpi module via the
ulpi ids, but in the DT case we might need to request it with the
OF based modalias instead. Add a common function that allows
anyone to request a module with the OF based modalias.

Acked-by: Rob Herring <robh@kernel.org>
Cc: <devicetree@vger.kernel.org>
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 include/linux/of_device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index cc7dd687a89d..e9afbcc8de12 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -37,6 +37,7 @@ extern const void *of_device_get_match_data(const struct device *dev);
 
 extern ssize_t of_device_get_modalias(struct device *dev,
 					char *str, ssize_t len);
+extern int of_device_request_module(struct device *dev);
 
 extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env);
@@ -78,6 +79,11 @@ static inline int of_device_get_modalias(struct device *dev,
 	return -ENODEV;
 }
 
+static inline int of_device_request_module(struct device *dev)
+{
+	return -ENODEV;
+}
+
 static inline int of_device_uevent_modalias(struct device *dev,
 				   struct kobj_uevent_env *env)
 {
-- 
cgit v1.2.3


From a89b94b53371bbfa582787c2fa3378000ea4263d Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Wed, 28 Dec 2016 14:56:51 -0800
Subject: usb: chipidea: Handle extcon events properly

We're currently emulating the vbus and id interrupts in the OTGSC
read API, but we also need to make sure that if we're handling
the events with extcon that we don't enable the interrupts for
those events in the hardware. Therefore, properly emulate this
register if we're using extcon, but don't enable the interrupts.
This allows me to get my cable connect/disconnect working
properly without getting spurious interrupts on my device that
uses an extcon for these two events.

Acked-by: Peter Chen <peter.chen@nxp.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Ivan T. Ivanov" <iivanov.xz@gmail.com>
Fixes: 3ecb3e09b042 ("usb: chipidea: Use extcon framework for VBUS and ID detect")
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 include/linux/usb/chipidea.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 5dd75fa47dd8..f9be467d6695 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -14,6 +14,7 @@ struct ci_hdrc;
  * struct ci_hdrc_cable - structure for external connector cable state tracking
  * @state: current state of the line
  * @changed: set to true when extcon event happen
+ * @enabled: set to true if we've enabled the vbus or id interrupt
  * @edev: device which generate events
  * @ci: driver state of the chipidea device
  * @nb: hold event notification callback
@@ -22,6 +23,7 @@ struct ci_hdrc;
 struct ci_hdrc_cable {
 	bool				state;
 	bool				changed;
+	bool				enabled;
 	struct extcon_dev		*edev;
 	struct ci_hdrc			*ci;
 	struct notifier_block		nb;
-- 
cgit v1.2.3


From 8feb3680bd0363a8d784fa0d065e0a6cdc9e0cff Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Wed, 28 Dec 2016 14:56:52 -0800
Subject: usb: chipidea: Add platform flag for wrapper phy management

The ULPI phy on qcom platforms needs to be initialized and
powered on after a USB reset and before we toggle the run/stop
bit. Otherwise, the phy locks up and doesn't work properly.
Therefore, add a flag to skip any phy power management in the
core layer, leaving it up to the glue driver to manage.

Acked-by: Peter Chen <peter.chen@nxp.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 include/linux/usb/chipidea.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index f9be467d6695..d07b162073f7 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -57,6 +57,7 @@ struct ci_hdrc_platform_data {
 #define CI_HDRC_OVERRIDE_AHB_BURST	BIT(9)
 #define CI_HDRC_OVERRIDE_TX_BURST	BIT(10)
 #define CI_HDRC_OVERRIDE_RX_BURST	BIT(11)
+#define CI_HDRC_OVERRIDE_PHY_CONTROL	BIT(12) /* Glue layer manages phy */
 	enum usb_dr_mode	dr_mode;
 #define CI_HDRC_CONTROLLER_RESET_EVENT		0
 #define CI_HDRC_CONTROLLER_STOPPED_EVENT	1
-- 
cgit v1.2.3


From 5cc49268995a1f063a7a569299393e4cf9d06923 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Fri, 20 Jan 2017 15:11:55 +0800
Subject: usb: chipidea: Consolidate extcon notifiers

The two extcon notifiers are almost the same except for the
variable name for the cable structure and the id notifier inverts
the cable->state logic. Make it the same and replace two
functions with one to save some lines. This also makes it so that
the id cable state is true when the id pin is pulled low, so we
change the name of ->state to ->connected to properly reflect
that we're interested in the cable being connected.

Acked-by: Peter Chen <peter.chen@nxp.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Ivan T. Ivanov" <iivanov.xz@gmail.com>
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 include/linux/usb/chipidea.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index d07b162073f7..7e3daa37cf60 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -12,7 +12,7 @@ struct ci_hdrc;
 
 /**
  * struct ci_hdrc_cable - structure for external connector cable state tracking
- * @state: current state of the line
+ * @connected: true if cable is connected, false otherwise
  * @changed: set to true when extcon event happen
  * @enabled: set to true if we've enabled the vbus or id interrupt
  * @edev: device which generate events
@@ -21,7 +21,7 @@ struct ci_hdrc;
  * @conn: used for notification registration
  */
 struct ci_hdrc_cable {
-	bool				state;
+	bool				connected;
 	bool				changed;
 	bool				enabled;
 	struct extcon_dev		*edev;
-- 
cgit v1.2.3


From 11893dae63da0f5b251cf7f9a24d64c8ff4771ff Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Wed, 28 Dec 2016 14:57:06 -0800
Subject: usb: chipidea: msm: Handle phy power states

The ULPI phy on qcom platforms needs to be initialized and
powered on after a USB reset and before we toggle the run/stop
bit. Otherwise, the phy locks up and doesn't work properly. Hook
the phy initialization into the RESET event and the phy power off
into the STOPPED event.

Acked-by: Peter Chen <peter.chen@nxp.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 include/linux/usb/chipidea.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 7e3daa37cf60..c5fdfcf99828 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -61,7 +61,7 @@ struct ci_hdrc_platform_data {
 	enum usb_dr_mode	dr_mode;
 #define CI_HDRC_CONTROLLER_RESET_EVENT		0
 #define CI_HDRC_CONTROLLER_STOPPED_EVENT	1
-	void	(*notify_event) (struct ci_hdrc *ci, unsigned event);
+	int	(*notify_event) (struct ci_hdrc *ci, unsigned event);
 	struct regulator	*reg_vbus;
 	struct usb_otg_caps	ci_otg_caps;
 	bool			tpl_support;
-- 
cgit v1.2.3


From ee48c726d0b014ac8dd5ec803fb63ce0596a42cf Mon Sep 17 00:00:00 2001
From: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Date: Fri, 13 Jan 2017 17:57:40 +0000
Subject: reset: Change shared flag from int to bool

Since the new parameter being added is going to be a bool this patch
changes the shared flag from int to bool to match the new parameter.

Signed-off-by: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 5daff15722d3..ec1d1fd28f5f 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -13,10 +13,10 @@ int reset_control_deassert(struct reset_control *rstc);
 int reset_control_status(struct reset_control *rstc);
 
 struct reset_control *__of_reset_control_get(struct device_node *node,
-				     const char *id, int index, int shared);
+				     const char *id, int index, bool shared);
 void reset_control_put(struct reset_control *rstc);
 struct reset_control *__devm_reset_control_get(struct device *dev,
-				     const char *id, int index, int shared);
+				     const char *id, int index, bool shared);
 
 int __must_check device_reset(struct device *dev);
 
@@ -69,14 +69,14 @@ static inline int device_reset_optional(struct device *dev)
 
 static inline struct reset_control *__of_reset_control_get(
 					struct device_node *node,
-					const char *id, int index, int shared)
+					const char *id, int index, bool shared)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev,
-					const char *id, int index, int shared)
+					const char *id, int index, bool shared)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
@@ -132,19 +132,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 static inline struct reset_control *reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true);
 }
 
 static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false);
 }
 
 static inline struct reset_control *reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true);
 }
 
 /**
@@ -185,7 +185,7 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 static inline struct reset_control *of_reset_control_get_shared(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, 1);
+	return __of_reset_control_get(node, id, 0, true);
 }
 
 /**
@@ -202,7 +202,7 @@ static inline struct reset_control *of_reset_control_get_shared(
 static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, 0);
+	return __of_reset_control_get(node, NULL, index, false);
 }
 
 /**
@@ -230,7 +230,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 static inline struct reset_control *of_reset_control_get_shared_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, 1);
+	return __of_reset_control_get(node, NULL, index, true);
 }
 
 /**
@@ -252,7 +252,7 @@ __must_check devm_reset_control_get_exclusive(struct device *dev,
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
 #endif
-	return __devm_reset_control_get(dev, id, 0, 0);
+	return __devm_reset_control_get(dev, id, 0, false);
 }
 
 /**
@@ -267,19 +267,19 @@ __must_check devm_reset_control_get_exclusive(struct device *dev,
 static inline struct reset_control *devm_reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, 1);
+	return __devm_reset_control_get(dev, id, 0, true);
 }
 
 static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, 0);
+	return __devm_reset_control_get(dev, id, 0, false);
 }
 
 static inline struct reset_control *devm_reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, 1);
+	return __devm_reset_control_get(dev, id, 0, true);
 }
 
 /**
@@ -297,7 +297,7 @@ static inline struct reset_control *devm_reset_control_get_optional_shared(
 static inline struct reset_control *
 devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, 0);
+	return __devm_reset_control_get(dev, NULL, index, false);
 }
 
 /**
@@ -313,7 +313,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 static inline struct reset_control *
 devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, 1);
+	return __devm_reset_control_get(dev, NULL, index, true);
 }
 
 /*
-- 
cgit v1.2.3


From bb475230b8e59a547ab66ac3b02572df21a580e9 Mon Sep 17 00:00:00 2001
From: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Date: Fri, 13 Jan 2017 17:57:41 +0000
Subject: reset: make optional functions really optional

The *_get_optional_* functions weren't really optional so this patch
makes them really optional.

These *_get_optional_* functions will now return NULL instead of an error
if no matching reset phandle is found in the DT, and all the
reset_control_* functions now accept NULL rstc pointers.

Signed-off-by: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index ec1d1fd28f5f..86b4ed75359e 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -13,10 +13,12 @@ int reset_control_deassert(struct reset_control *rstc);
 int reset_control_status(struct reset_control *rstc);
 
 struct reset_control *__of_reset_control_get(struct device_node *node,
-				     const char *id, int index, bool shared);
+				     const char *id, int index, bool shared,
+				     bool optional);
 void reset_control_put(struct reset_control *rstc);
 struct reset_control *__devm_reset_control_get(struct device *dev,
-				     const char *id, int index, bool shared);
+				     const char *id, int index, bool shared,
+				     bool optional);
 
 int __must_check device_reset(struct device *dev);
 
@@ -69,14 +71,15 @@ static inline int device_reset_optional(struct device *dev)
 
 static inline struct reset_control *__of_reset_control_get(
 					struct device_node *node,
-					const char *id, int index, bool shared)
+					const char *id, int index, bool shared,
+					bool optional)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__devm_reset_control_get(
-					struct device *dev,
-					const char *id, int index, bool shared)
+					struct device *dev, const char *id,
+					int index, bool shared, bool optional)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
@@ -104,7 +107,8 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
 #endif
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
+									false);
 }
 
 /**
@@ -132,19 +136,22 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 static inline struct reset_control *reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
+									false);
 }
 
 static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
+									true);
 }
 
 static inline struct reset_control *reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true);
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
+									true);
 }
 
 /**
@@ -160,7 +167,7 @@ static inline struct reset_control *reset_control_get_optional_shared(
 static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, 0);
+	return __of_reset_control_get(node, id, 0, false, false);
 }
 
 /**
@@ -185,7 +192,7 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 static inline struct reset_control *of_reset_control_get_shared(
 				struct device_node *node, const char *id)
 {
-	return __of_reset_control_get(node, id, 0, true);
+	return __of_reset_control_get(node, id, 0, true, false);
 }
 
 /**
@@ -202,7 +209,7 @@ static inline struct reset_control *of_reset_control_get_shared(
 static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, false);
+	return __of_reset_control_get(node, NULL, index, false, false);
 }
 
 /**
@@ -230,7 +237,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 static inline struct reset_control *of_reset_control_get_shared_by_index(
 					struct device_node *node, int index)
 {
-	return __of_reset_control_get(node, NULL, index, true);
+	return __of_reset_control_get(node, NULL, index, true, false);
 }
 
 /**
@@ -252,7 +259,7 @@ __must_check devm_reset_control_get_exclusive(struct device *dev,
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
 #endif
-	return __devm_reset_control_get(dev, id, 0, false);
+	return __devm_reset_control_get(dev, id, 0, false, false);
 }
 
 /**
@@ -267,19 +274,19 @@ __must_check devm_reset_control_get_exclusive(struct device *dev,
 static inline struct reset_control *devm_reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, true);
+	return __devm_reset_control_get(dev, id, 0, true, false);
 }
 
 static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, false);
+	return __devm_reset_control_get(dev, id, 0, false, true);
 }
 
 static inline struct reset_control *devm_reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __devm_reset_control_get(dev, id, 0, true);
+	return __devm_reset_control_get(dev, id, 0, true, true);
 }
 
 /**
@@ -297,7 +304,7 @@ static inline struct reset_control *devm_reset_control_get_optional_shared(
 static inline struct reset_control *
 devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, false);
+	return __devm_reset_control_get(dev, NULL, index, false, false);
 }
 
 /**
@@ -313,7 +320,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 static inline struct reset_control *
 devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
-	return __devm_reset_control_get(dev, NULL, index, true);
+	return __devm_reset_control_get(dev, NULL, index, true, false);
 }
 
 /*
-- 
cgit v1.2.3


From a62a77881b1b6708ffeddd9bf0529494f7b199e3 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 16 Jan 2017 11:17:57 +0100
Subject: brcmfmac: add support for BCM43455 with modalias sdio:c00v02D0dA9BF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM43455 is a more recent revision of the BCM4345. Some of the BCM43455
got a dedicated SDIO device ID which is currently not supported by
brcmfmac.
Adding the new sdio_device_id to brcmfmac is enough to get the BCM43455
supported because the chip itself is already supported (due to BCM4345
support in the driver).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Tested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/mmc/sdio_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index d43ef96bf075..71b113e1223f 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -36,6 +36,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
 #define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
+#define SDIO_DEVICE_ID_BROADCOM_43455		0xa9bf
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
 #define SDIO_DEVICE_ID_BROADCOM_4356		0x4356
 
-- 
cgit v1.2.3


From 6391a4481ba0796805d6581e42f9f0418c099e34 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 20 Jan 2017 14:32:42 +0800
Subject: virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving

Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on
xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too,
fixing this by adding a hint (has_data_valid) and set it only on the
receiving path.

Cc: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 56436472ccc7..5209b5ed2a64 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 
 static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
-					  bool little_endian)
+					  bool little_endian,
+					  bool has_data_valid)
 {
 	memset(hdr, 0, sizeof(*hdr));   /* no info leak */
 
@@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 				skb_checksum_start_offset(skb));
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
+	} else if (has_data_valid &&
+		   skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
 	} /* else everything is zero */
 
 	return 0;
-- 
cgit v1.2.3


From 76640b84bd7a9d68c70d8bc8ecd02cdc4bd8855e Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 19 Jan 2017 14:48:41 +0100
Subject: soc: samsung: pmu: Provide global function to get PMU regmap

PMU is something like a SoC wide service, so add a helper function to get
PMU regmap. This will be used by other Exynos device drivers. This way it
can be avoided to model this dependency in device tree (as phandles to PMU
node) for almost every device in the SoC.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Tomasz Figa <tomasz.figa@gmail.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/linux/soc/samsung/exynos-pmu.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h
index e2e9de1acc5b..e57eb4b6cc5a 100644
--- a/include/linux/soc/samsung/exynos-pmu.h
+++ b/include/linux/soc/samsung/exynos-pmu.h
@@ -12,6 +12,8 @@
 #ifndef __LINUX_SOC_EXYNOS_PMU_H
 #define __LINUX_SOC_EXYNOS_PMU_H
 
+struct regmap;
+
 enum sys_powerdown {
 	SYS_AFTR,
 	SYS_LPA,
@@ -20,5 +22,13 @@ enum sys_powerdown {
 };
 
 extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
+#ifdef CONFIG_EXYNOS_PMU
+extern struct regmap *exynos_get_pmu_regmap(void);
+#else
+static inline struct regmap *exynos_get_pmu_regmap(void)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #endif /* __LINUX_SOC_EXYNOS_PMU_H */
-- 
cgit v1.2.3


From 582a686f52d2e002da64093fe4b9aa5befcaf4e4 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 18 Jan 2017 14:04:37 +0100
Subject: device: bus_type: Introduce num_vf callback

This allows for bus types to implement their own method of retrieving
the number of virtual functions a NIC on that type of bus supports.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 491b4c0ca633..6d73b70a4a5d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -88,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *
  * @suspend:	Called when a device on this bus wants to go to sleep mode.
  * @resume:	Called to bring a device on this bus out of sleep mode.
+ * @num_vf:	Called to find out how many virtual functions a device on this
+ *		bus supports.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@ -127,6 +129,8 @@ struct bus_type {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
+	int (*num_vf)(struct device *dev);
+
 	const struct dev_pm_ops *pm;
 
 	const struct iommu_ops *iommu_ops;
-- 
cgit v1.2.3


From 9af15c38254d81c9991eba89335ca7c537d7f2c3 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 18 Jan 2017 14:04:39 +0100
Subject: device: Implement a bus agnostic dev_num_vf routine

Now that pci_bus_type has num_vf callback set, dev_num_vf can be
implemented in a bus type independent way and the check for whether a
PCI device is being handled in rtnetlink can be dropped.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/device.h | 7 +++++++
 include/linux/pci.h    | 2 --
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6d73b70a4a5d..bd684fc8ec1d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1144,6 +1144,13 @@ extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 
+static inline int dev_num_vf(struct device *dev)
+{
+	if (dev->bus && dev->bus->num_vf)
+		return dev->bus->num_vf(dev);
+	return 0;
+}
+
 /*
  * Root device objects for grouping under /sys/devices
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e2d1a124216a..adbc859fe7c4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -885,7 +885,6 @@ void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type);
 void pci_sort_breadthfirst(void);
 #define dev_is_pci(d) ((d)->bus == &pci_bus_type)
 #define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
-#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
 
 /* Generic PCI functions exported to card drivers */
 
@@ -1630,7 +1629,6 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
-#define dev_num_vf(d) (0)
 #endif /* CONFIG_PCI */
 
 /* Include architecture-dependent settings and functions */
-- 
cgit v1.2.3


From 9f8197980d87a28ec3d0b3b986f770e7e7878485 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jan 2017 10:59:13 +0000
Subject: delay: Add explanation of udelay() inaccuracy

There seems to be some misunderstanding that udelay() and friends will
always guarantee the specified delay.  This is a false understanding.
When udelay() is based on CPU cycles, it can return early for many
reasons which are detailed by Linus' reply to me in a thread in 2011:

  http://lists.openwall.net/linux-kernel/2011/01/12/372

However, a udelay test module was created in 2014 which allows udelay()
to only be 0.5% fast, which is outside of the CPU-cycles udelay()
results I measured back in 2011, which were deemed to be in the "we
don't care" region.

test_udelay() should be fixed to reflect the real allowable tolerance
on udelay(), rather than 0.5%.

Cc: David Riley <davidriley@chromium.org>
Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/delay.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index a6ecb34cf547..2ecb3c46b20a 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -5,6 +5,17 @@
  * Copyright (C) 1993 Linus Torvalds
  *
  * Delay routines, using a pre-computed "loops_per_jiffy" value.
+ *
+ * Please note that ndelay(), udelay() and mdelay() may return early for
+ * several reasons:
+ *  1. computed loops_per_jiffy too low (due to the time taken to
+ *     execute the timer interrupt.)
+ *  2. cache behaviour affecting the time it takes to execute the
+ *     loop function.
+ *  3. CPU clock rate changes.
+ *
+ * Please see this thread:
+ *   http://lists.openwall.net/linux-kernel/2011/01/09/56
  */
 
 #include <linux/kernel.h>
-- 
cgit v1.2.3


From bcc9a76d5ac426bc45c9e863b1830347827ca77a Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sat, 21 Jan 2017 21:33:35 -0500
Subject: locking/rwsem: Reinit wake_q after use

In __rwsem_down_write_failed_common(), the same wake_q variable name
is defined twice, with the inner wake_q hiding the one in outer scope.
We can either use different names for the two wake_q's.

Even better, we can use the same wake_q twice, if necessary.

To enable the latter change, we need to define a new helper function
wake_q_init() to enable reinitalization of wake_q after use.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1485052415-9611-1-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f4f9d32f12b3..4e62b378bd65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1021,6 +1021,12 @@ struct wake_q_head {
 #define DEFINE_WAKE_Q(name)				\
 	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
 
+static inline void wake_q_init(struct wake_q_head *head)
+{
+	head->first = WAKE_Q_TAIL;
+	head->lastp = &head->first;
+}
+
 extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
-- 
cgit v1.2.3


From 582d0ac397ca02a6a3fb653b13154ac87b884beb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 20 Jan 2017 12:36:33 -0800
Subject: net: phy: bcm7xxx: Add entry for BCM7278

Add support for the BCM7278 28nm process Gigabit Ethernet PHY.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 4f7d8be9ddbf..295fb3e73de5 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -24,6 +24,7 @@
 #define PHY_ID_BCM57780			0x03625d90
 
 #define PHY_ID_BCM7250			0xae025280
+#define PHY_ID_BCM7278			0xae0251a0
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7346			0x600d8650
-- 
cgit v1.2.3


From fdbe574eb69312a7fbe09674d69c01b80e4ed9dc Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 19 Jan 2017 20:57:46 +0000
Subject: iommu/dma: Allow MSI-only cookies

IOMMU domain users such as VFIO face a similar problem to DMA API ops
with regard to mapping MSI messages in systems where the MSI write is
subject to IOMMU translation. With the relevant infrastructure now in
place for managed DMA domains, it's actually really simple for other
users to piggyback off that and reap the benefits without giving up
their own IOVA management, and without having to reinvent their own
wheel in the MSI layer.

Allow such users to opt into automatic MSI remapping by dedicating a
region of their IOVA space to a managed cookie, and extend the mapping
routine to implement a trivial linear allocator in such cases, to avoid
the needless overhead of a full-blown IOVA domain.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/dma-iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 7f7e9a7e3839..28df844a23b6 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -27,6 +27,7 @@ int iommu_dma_init(void);
 
 /* Domain management interface for IOMMU drivers */
 int iommu_get_dma_cookie(struct iommu_domain *domain);
+int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
@@ -86,6 +87,11 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
 	return -ENODEV;
 }
 
+static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
+{
+	return -ENODEV;
+}
+
 static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
 }
-- 
cgit v1.2.3


From e5b5234a36ca283158721d3d2e0cddfa324abdf9 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:47 +0000
Subject: iommu: Rename iommu_dm_regions into iommu_resv_regions

We want to extend the callbacks used for dm regions and
use them for reserved regions. Reserved regions can be
- directly mapped regions
- regions that cannot be iommu mapped (PCI host bridge windows, ...)
- MSI regions (because they belong to another address space or because
  they are not translated by the IOMMU and need special handling)

So let's rename the struct and also the callbacks.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..bfecb8b74078 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -118,13 +118,13 @@ enum iommu_attr {
 };
 
 /**
- * struct iommu_dm_region - descriptor for a direct mapped memory region
+ * struct iommu_resv_region - descriptor for a reserved memory region
  * @list: Linked list pointers
  * @start: System physical start address of the region
  * @length: Length of the region in bytes
  * @prot: IOMMU Protection flags (READ/WRITE/...)
  */
-struct iommu_dm_region {
+struct iommu_resv_region {
 	struct list_head	list;
 	phys_addr_t		start;
 	size_t			length;
@@ -150,9 +150,9 @@ struct iommu_dm_region {
  * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
- * @get_dm_regions: Request list of direct mapping requirements for a device
- * @put_dm_regions: Free list of direct mapping requirements for a device
- * @apply_dm_region: Temporary helper call-back for iova reserved ranges
+ * @get_resv_regions: Request list of reserved regions for a device
+ * @put_resv_regions: Free list of reserved regions for a device
+ * @apply_resv_region: Temporary helper call-back for iova reserved ranges
  * @domain_window_enable: Configure and enable a particular window for a domain
  * @domain_window_disable: Disable a particular window for a domain
  * @domain_set_windows: Set the number of windows for a domain
@@ -184,11 +184,12 @@ struct iommu_ops {
 	int (*domain_set_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
 
-	/* Request/Free a list of direct mapping requirements for a device */
-	void (*get_dm_regions)(struct device *dev, struct list_head *list);
-	void (*put_dm_regions)(struct device *dev, struct list_head *list);
-	void (*apply_dm_region)(struct device *dev, struct iommu_domain *domain,
-				struct iommu_dm_region *region);
+	/* Request/Free a list of reserved regions for a device */
+	void (*get_resv_regions)(struct device *dev, struct list_head *list);
+	void (*put_resv_regions)(struct device *dev, struct list_head *list);
+	void (*apply_resv_region)(struct device *dev,
+				  struct iommu_domain *domain,
+				  struct iommu_resv_region *region);
 
 	/* Window handling functions */
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
@@ -233,8 +234,8 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
-extern void iommu_get_dm_regions(struct device *dev, struct list_head *list);
-extern void iommu_put_dm_regions(struct device *dev, struct list_head *list);
+extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
+extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
@@ -443,12 +444,12 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
-static inline void iommu_get_dm_regions(struct device *dev,
+static inline void iommu_get_resv_regions(struct device *dev,
 					struct list_head *list)
 {
 }
 
-static inline void iommu_put_dm_regions(struct device *dev,
+static inline void iommu_put_resv_regions(struct device *dev,
 					struct list_head *list)
 {
 }
-- 
cgit v1.2.3


From d30ddcaa7b028049cdfee3a40248002d07b2bbf3 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:48 +0000
Subject: iommu: Add a new type field in iommu_resv_region

We introduce a new field to differentiate the reserved region
types and specialize the apply_resv_region implementation.

Legacy direct mapped regions have IOMMU_RESV_DIRECT type.
We introduce 2 new reserved memory types:
- IOMMU_RESV_MSI will characterize MSI regions that are mapped
- IOMMU_RESV_RESERVED characterize regions that cannot by mapped.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bfecb8b74078..233a6bf093bf 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -117,18 +117,25 @@ enum iommu_attr {
 	DOMAIN_ATTR_MAX,
 };
 
+/* These are the possible reserved region types */
+#define IOMMU_RESV_DIRECT	(1 << 0)
+#define IOMMU_RESV_RESERVED	(1 << 1)
+#define IOMMU_RESV_MSI		(1 << 2)
+
 /**
  * struct iommu_resv_region - descriptor for a reserved memory region
  * @list: Linked list pointers
  * @start: System physical start address of the region
  * @length: Length of the region in bytes
  * @prot: IOMMU Protection flags (READ/WRITE/...)
+ * @type: Type of the reserved region
  */
 struct iommu_resv_region {
 	struct list_head	list;
 	phys_addr_t		start;
 	size_t			length;
 	int			prot;
+	int			type;
 };
 
 #ifdef CONFIG_IOMMU_API
-- 
cgit v1.2.3


From 2b20cbba3390a55c511acba2f0f517dd27a528b2 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:49 +0000
Subject: iommu: iommu_alloc_resv_region

Introduce a new helper serving the purpose to allocate a reserved
region. This will be used in iommu driver implementing reserved
region callbacks.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 233a6bf093bf..f6bb55d3e606 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -244,6 +244,8 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain,
 extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
+extern struct iommu_resv_region *
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
-- 
cgit v1.2.3


From 6c65fb318e8bbf21e939e651028b955324f1d873 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:51 +0000
Subject: iommu: iommu_get_group_resv_regions

Introduce iommu_get_group_resv_regions whose role consists in
enumerating all devices from the group and collecting their
reserved regions. The list is sorted and overlaps between
regions of the same type are handled by merging the regions.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f6bb55d3e606..bec3730dc009 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -246,6 +246,8 @@ extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern struct iommu_resv_region *
 iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+extern int iommu_get_group_resv_regions(struct iommu_group *group,
+					struct list_head *head);
 
 extern int iommu_attach_group(struct iommu_domain *domain,
 			      struct iommu_group *group);
@@ -463,6 +465,12 @@ static inline void iommu_put_resv_regions(struct device *dev,
 {
 }
 
+static inline int iommu_get_group_resv_regions(struct iommu_group *group,
+					       struct list_head *head)
+{
+	return -ENODEV;
+}
+
 static inline int iommu_request_dm_for_dev(struct device *dev)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From d0f949e220fdf5ed1033e9f6e80749b05f2e7b70 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Fri, 20 Jan 2017 10:15:03 +0100
Subject: mfd: Add STM32 Timers driver

This hardware block could at used at same time for PWM generation
and IIO timers.
PWM and IIO timer configuration are mixed in the same registers
so we need a multi fonction driver to be able to share those registers.

version 7:
- rebase on v4.10-rc2

version 6:
- rename files to stm32-timers
- rename functions to stm32_timers_xxx

version 5:
- fix Lee comments about detect function
- add missing dependency on REGMAP_MMIO

version 4:
- add a function to detect Auto Reload Register (ARR) size
- rename the structure shared with other drivers

version 2:
- rename driver "stm32-gptimer" to be align with SoC documentation
- only keep one compatible
- use of_platform_populate() instead of devm_mfd_add_devices()

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@st.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/stm32-timers.h | 71 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 include/linux/mfd/stm32-timers.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
new file mode 100644
index 000000000000..d0300045f04a
--- /dev/null
+++ b/include/linux/mfd/stm32-timers.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) STMicroelectronics 2016
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#ifndef _LINUX_STM32_GPTIMER_H_
+#define _LINUX_STM32_GPTIMER_H_
+
+#include <linux/clk.h>
+#include <linux/regmap.h>
+
+#define TIM_CR1		0x00	/* Control Register 1      */
+#define TIM_CR2		0x04	/* Control Register 2      */
+#define TIM_SMCR	0x08	/* Slave mode control reg  */
+#define TIM_DIER	0x0C	/* DMA/interrupt register  */
+#define TIM_SR		0x10	/* Status register	   */
+#define TIM_EGR		0x14	/* Event Generation Reg    */
+#define TIM_CCMR1	0x18	/* Capt/Comp 1 Mode Reg    */
+#define TIM_CCMR2	0x1C	/* Capt/Comp 2 Mode Reg    */
+#define TIM_CCER	0x20	/* Capt/Comp Enable Reg    */
+#define TIM_PSC		0x28	/* Prescaler               */
+#define TIM_ARR		0x2c	/* Auto-Reload Register    */
+#define TIM_CCR1	0x34	/* Capt/Comp Register 1    */
+#define TIM_CCR2	0x38	/* Capt/Comp Register 2    */
+#define TIM_CCR3	0x3C	/* Capt/Comp Register 3    */
+#define TIM_CCR4	0x40	/* Capt/Comp Register 4    */
+#define TIM_BDTR	0x44	/* Break and Dead-Time Reg */
+
+#define TIM_CR1_CEN	BIT(0)	/* Counter Enable	   */
+#define TIM_CR1_ARPE	BIT(7)	/* Auto-reload Preload Ena */
+#define TIM_CR2_MMS	(BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */
+#define TIM_SMCR_SMS	(BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */
+#define TIM_SMCR_TS	(BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */
+#define TIM_DIER_UIE	BIT(0)	/* Update interrupt	   */
+#define TIM_SR_UIF	BIT(0)	/* Update interrupt flag   */
+#define TIM_EGR_UG	BIT(0)	/* Update Generation       */
+#define TIM_CCMR_PE	BIT(3)	/* Channel Preload Enable  */
+#define TIM_CCMR_M1	(BIT(6) | BIT(5))  /* Channel PWM Mode 1 */
+#define TIM_CCER_CC1E	BIT(0)	/* Capt/Comp 1  out Ena    */
+#define TIM_CCER_CC1P	BIT(1)	/* Capt/Comp 1  Polarity   */
+#define TIM_CCER_CC1NE	BIT(2)	/* Capt/Comp 1N out Ena    */
+#define TIM_CCER_CC1NP	BIT(3)	/* Capt/Comp 1N Polarity   */
+#define TIM_CCER_CC2E	BIT(4)	/* Capt/Comp 2  out Ena    */
+#define TIM_CCER_CC3E	BIT(8)	/* Capt/Comp 3  out Ena    */
+#define TIM_CCER_CC4E	BIT(12)	/* Capt/Comp 4  out Ena    */
+#define TIM_CCER_CCXE	(BIT(0) | BIT(4) | BIT(8) | BIT(12))
+#define TIM_BDTR_BKE	BIT(12) /* Break input enable	   */
+#define TIM_BDTR_BKP	BIT(13) /* Break input polarity	   */
+#define TIM_BDTR_AOE	BIT(14)	/* Automatic Output Enable */
+#define TIM_BDTR_MOE	BIT(15)	/* Main Output Enable      */
+#define TIM_BDTR_BKF	(BIT(16) | BIT(17) | BIT(18) | BIT(19))
+#define TIM_BDTR_BK2F	(BIT(20) | BIT(21) | BIT(22) | BIT(23))
+#define TIM_BDTR_BK2E	BIT(24) /* Break 2 input enable	   */
+#define TIM_BDTR_BK2P	BIT(25) /* Break 2 input polarity  */
+
+#define MAX_TIM_PSC		0xFFFF
+#define TIM_CR2_MMS_SHIFT	4
+#define TIM_SMCR_TS_SHIFT	4
+#define TIM_BDTR_BKF_MASK	0xF
+#define TIM_BDTR_BKF_SHIFT	16
+#define TIM_BDTR_BK2F_SHIFT	20
+
+struct stm32_timers {
+	struct clk *clk;
+	struct regmap *regmap;
+	u32 max_arr;
+};
+#endif
-- 
cgit v1.2.3


From 631a9639ac413da6242cb15558ebd661cf633622 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:57 +0000
Subject: irqdomain: Add irq domain MSI and MSI_REMAP flags

We introduce two new enum values for the irq domain flag:
- IRQ_DOMAIN_FLAG_MSI indicates the irq domain corresponds to
  an MSI domain
- IRQ_DOMAIN_FLAG_MSI_REMAP indicates the irq domain has MSI
  remapping capabilities.

Those values will be useful to check all MSI irq domains have
MSI remapping support when assessing the safety of IRQ assignment
to a guest.

irq_domain_hierarchical_is_msi_remap() allows to check if an
irq domain or any parent implements MSI remapping.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/irqdomain.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index ffb84604c1de..bc2f5719dace 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -183,6 +183,12 @@ enum {
 	/* Irq domain is an IPI domain with single virq */
 	IRQ_DOMAIN_FLAG_IPI_SINGLE	= (1 << 3),
 
+	/* Irq domain implements MSIs */
+	IRQ_DOMAIN_FLAG_MSI		= (1 << 4),
+
+	/* Irq domain implements MSI remapping */
+	IRQ_DOMAIN_FLAG_MSI_REMAP	= (1 << 5),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -446,6 +452,19 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
 {
 	return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
 }
+
+static inline bool irq_domain_is_msi(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_MSI;
+}
+
+static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
+}
+
+extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
+
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 static inline void irq_domain_activate_irq(struct irq_data *data) { }
 static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
@@ -477,6 +496,22 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
 {
 	return false;
 }
+
+static inline bool irq_domain_is_msi(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool
+irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
+{
+	return false;
+}
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #else /* CONFIG_IRQ_DOMAIN */
-- 
cgit v1.2.3


From c7b41f0af38f53e46050b56a5b0e96710097b83c Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 19 Jan 2017 20:57:59 +0000
Subject: irqdomain: irq_domain_check_msi_remap

This new function checks whether all MSI irq domains
implement IRQ remapping. This is useful to understand
whether VFIO passthrough is safe with respect to interrupts.

On ARM typically an MSI controller can sit downstream
to the IOMMU without preventing VFIO passthrough.
As such any assigned device can write into the MSI doorbell.
In case the MSI controller implements IRQ remapping, assigned
devices will not be able to trigger interrupts towards the
host. On the contrary, the assignment must be emphasized as
unsafe with respect to interrupts.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tomasz.nowicki@caviumnetworks.com>
Tested-by: Bharat Bhushan <bharat.bhushan@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index bc2f5719dace..188eced6813e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -222,6 +222,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 					 void *host_data);
 extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 						   enum irq_domain_bus_token bus_token);
+extern bool irq_domain_check_msi_remap(void);
 extern void irq_set_default_host(struct irq_domain *host);
 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
 				  irq_hw_number_t hwirq, int node,
-- 
cgit v1.2.3


From d78973c32a210b0057b51880f7119bf2b61d5a65 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 12 Dec 2016 18:01:45 -0800
Subject: llist: Clarify comments about when locking is needed

llist.h comments are confusing about when locking is needed versus when it
isn't. Clarify these comments by being more descriptive about why locking is
needed for llist_del_first.

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Huang Ying <ying.huang@intel.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/llist.h | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index fd4ca0b4fe0f..171baa90f6f6 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -3,28 +3,33 @@
 /*
  * Lock-less NULL terminated single linked list
  *
- * If there are multiple producers and multiple consumers, llist_add
- * can be used in producers and llist_del_all can be used in
- * consumers.  They can work simultaneously without lock.  But
- * llist_del_first can not be used here.  Because llist_del_first
- * depends on list->first->next does not changed if list->first is not
- * changed during its operation, but llist_del_first, llist_add,
- * llist_add (or llist_del_all, llist_add, llist_add) sequence in
- * another consumer may violate that.
- *
- * If there are multiple producers and one consumer, llist_add can be
- * used in producers and llist_del_all or llist_del_first can be used
- * in the consumer.
- *
- * This can be summarized as follow:
+ * Cases where locking is not needed:
+ * If there are multiple producers and multiple consumers, llist_add can be
+ * used in producers and llist_del_all can be used in consumers simultaneously
+ * without locking. Also a single consumer can use llist_del_first while
+ * multiple producers simultaneously use llist_add, without any locking.
+ *
+ * Cases where locking is needed:
+ * If we have multiple consumers with llist_del_first used in one consumer, and
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
+ * needed.  This is because llist_del_first depends on list->first->next not
+ * changing, but without lock protection, there's no way to be sure about that
+ * if a preemption happens in the middle of the delete operation and on being
+ * preempted back, the list->first is the same as before causing the cmpxchg in
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
  *
  *           |   add    | del_first |  del_all
  * add       |    -     |     -     |     -
  * del_first |          |     L     |     L
  * del_all   |          |           |     -
  *
- * Where "-" stands for no lock is needed, while "L" stands for lock
- * is needed.
+ * Where, a particular row's operation can happen concurrently with a column's
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
  *
  * The list entries deleted via llist_del_all can be traversed with
  * traversing function such as llist_for_each etc.  But the list
-- 
cgit v1.2.3


From 02a5c550b2738f2bfea8e1e00aa75944d71c9e18 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 17:25:06 -0700
Subject: rcu: Abstract extended quiescent state determination

This commit is the fourth step towards full abstraction of all accesses
to the ->dynticks counter, implementing previously open-coded checks and
comparisons in new rcu_dynticks_in_eqs() and rcu_dynticks_in_eqs_since()
functions.  This abstraction will ease changes to the ->dynticks counter
operation.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcutiny.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ac81e4063b40..4f9b2fa2173d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,6 +27,12 @@
 
 #include <linux/cache.h>
 
+struct rcu_dynticks;
+static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
+{
+	return 0;
+}
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
-- 
cgit v1.2.3


From 1cce1eea0aff51201753fcaca421df825b0813b6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Wed, 14 Dec 2016 15:56:33 +0200
Subject: inotify: Convert to using per-namespace limits

This patchset converts inotify to using the newly introduced
per-userns sysctl infrastructure.

Currently the inotify instances/watches are being accounted in the
user_struct structure. This means that in setups where multiple
users in unprivileged containers map to the same underlying
real user (i.e. pointing to the same user_struct) the inotify limits
are going to be shared as well, allowing one user(or application) to exhaust
all others limits.

Fix this by switching the inotify sysctls to using the
per-namespace/per-user limits. This will allow the server admin to
set sensible global limits, which can further be tuned inside every
individual user namespace. Additionally, in order to preserve the
sysctl ABI make the existing inotify instances/watches sysctls
modify the values of the initial user namespace.

Signed-off-by: Nikolay Borisov <n.borisov.lkml@gmail.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/fsnotify_backend.h | 3 ++-
 include/linux/sched.h            | 4 ----
 include/linux/user_namespace.h   | 4 ++++
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0cf34d6cc253..c8f2738113f4 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -16,6 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/atomic.h>
+#include <linux/user_namespace.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -170,7 +171,7 @@ struct fsnotify_group {
 		struct inotify_group_private_data {
 			spinlock_t	idr_lock;
 			struct idr      idr;
-			struct user_struct      *user;
+			struct ucounts *ucounts;
 		} inotify_data;
 #endif
 #ifdef CONFIG_FANOTIFY
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d1905245c7a..d2334229167f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -868,10 +868,6 @@ struct user_struct {
 	atomic_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
-#ifdef CONFIG_INOTIFY_USER
-	atomic_t inotify_watches; /* How many inotify watches does this user have? */
-	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
-#endif
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
 #endif
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb209d4523f5..363e0e8082a9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -32,6 +32,10 @@ enum ucount_type {
 	UCOUNT_NET_NAMESPACES,
 	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
+#ifdef CONFIG_INOTIFY_USER
+	UCOUNT_INOTIFY_INSTANCES,
+	UCOUNT_INOTIFY_WATCHES,
+#endif
 	UCOUNT_COUNTS,
 };
 
-- 
cgit v1.2.3


From 9227dd2a84a765fcfef1677ff17de0958b192eda Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jan 2017 17:26:31 +1300
Subject: exec: Remove LSM_UNSAFE_PTRACE_CAP

With previous changes every location that tests for
LSM_UNSAFE_PTRACE_CAP also tests for LSM_UNSAFE_PTRACE making the
LSM_UNSAFE_PTRACE_CAP redundant, so remove it.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/security.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index c2125e9093e8..9d9ee90f1f35 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -140,8 +140,7 @@ struct request_sock;
 /* bprm->unsafe reasons */
 #define LSM_UNSAFE_SHARE	1
 #define LSM_UNSAFE_PTRACE	2
-#define LSM_UNSAFE_PTRACE_CAP	4
-#define LSM_UNSAFE_NO_NEW_PRIVS	8
+#define LSM_UNSAFE_NO_NEW_PRIVS	4
 
 #ifdef CONFIG_MMU
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From 1331b62c97217459780e040e8a66bb609f2acd20 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 2 Jan 2017 16:01:57 +0100
Subject: rfkill: remove rfkill-regulator

There are no users of this ("vrfkill") in the tree, so it's just
dead code - remove it.

This also isn't really how rfkill is supposed to be used - it's
intended as a signalling mechanism to/from the device, which the
driver (and partially cfg80211) will handle - having a separate
rfkill instance for a regulator is confusing, the driver should
use the regulator instead to turn off the device when requested.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill-regulator.h | 48 ----------------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 include/linux/rfkill-regulator.h

(limited to 'include/linux')

diff --git a/include/linux/rfkill-regulator.h b/include/linux/rfkill-regulator.h
deleted file mode 100644
index aca36bc83315..000000000000
--- a/include/linux/rfkill-regulator.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * rfkill-regulator.c - Regulator consumer driver for rfkill
- *
- * Copyright (C) 2009  Guiming Zhuo <gmzhuo@gmail.com>
- * Copyright (C) 2011  Antonio Ospite <ospite@studenti.unina.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#ifndef __LINUX_RFKILL_REGULATOR_H
-#define __LINUX_RFKILL_REGULATOR_H
-
-/*
- * Use "vrfkill" as supply id when declaring the regulator consumer:
- *
- * static struct regulator_consumer_supply pcap_regulator_V6_consumers [] = {
- * 	{ .dev_name = "rfkill-regulator.0", .supply = "vrfkill" },
- * };
- *
- * If you have several regulator driven rfkill, you can append a numerical id to
- * .dev_name as done above, and use the same id when declaring the platform
- * device:
- *
- * static struct rfkill_regulator_platform_data ezx_rfkill_bt_data = {
- * 	.name  = "ezx-bluetooth",
- * 	.type  = RFKILL_TYPE_BLUETOOTH,
- * };
- *
- * static struct platform_device a910_rfkill = {
- * 	.name  = "rfkill-regulator",
- * 	.id    = 0,
- * 	.dev   = {
- * 		.platform_data = &ezx_rfkill_bt_data,
- * 	},
- * };
- */
-
-#include <linux/rfkill.h>
-
-struct rfkill_regulator_platform_data {
-	char *name;             /* the name for the rfkill switch */
-	enum rfkill_type type;  /* the type as specified in rfkill.h */
-};
-
-#endif /* __LINUX_RFKILL_REGULATOR_H */
-- 
cgit v1.2.3


From 12a6075cabc0d9ffbc0366b44daa22f278606312 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 10 Jan 2017 18:52:06 +0100
Subject: can: dev: add CAN interface termination API

This patch adds a netlink interface to configure the CAN bus termination of
CAN interfaces.

Inside the driver an array of supported termination values is defined:

const u16 drvname_termination[] = { 60, 120, CAN_TERMINATION_DISABLED };

struct drvname_priv *priv;
priv = netdev_priv(dev);

priv->termination_const = drvname_termination;
priv->termination_const_cnt = ARRAY_SIZE(drvname_termination);
priv->termination = CAN_TERMINATION_DISABLED;

And the funtion to set the value has to be defined:

priv->do_set_termination = drvname_set_termination;

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Reviewed-by: Ramesh Shanmugasundaram <Ramesh.shanmugasundaram@bp.renesas.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5f5270941ba0..f6a57f322f00 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,6 +38,9 @@ struct can_priv {
 	struct can_bittiming bittiming, data_bittiming;
 	const struct can_bittiming_const *bittiming_const,
 		*data_bittiming_const;
+	const u16 *termination_const;
+	unsigned int termination_const_cnt;
+	u16 termination;
 	struct can_clock clock;
 
 	enum can_state state;
@@ -53,6 +56,7 @@ struct can_priv {
 	int (*do_set_bittiming)(struct net_device *dev);
 	int (*do_set_data_bittiming)(struct net_device *dev);
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
+	int (*do_set_termination)(struct net_device *dev, u16 term);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
 	int (*do_get_berr_counter)(const struct net_device *dev,
-- 
cgit v1.2.3


From 431af779256cd6cb8328ac23c5696bae63c33a51 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 11 Jan 2017 17:05:35 +0100
Subject: can: dev: add CAN interface API for fixed bitrates

Some CAN interfaces only support fixed fixed bitrates. This patch adds a
netlink interface to get the list of the CAN interface's fixed bitrates and
data bitrates.

Inside the driver arrays of supported data- bitrate values are defined.

const u32 drvname_bitrate[] = { 20000, 50000, 100000 };
const u32 drvname_data_bitrate[] = { 200000, 500000, 1000000 };

struct drvname_priv *priv;
priv = netdev_priv(dev);

priv->bitrate_const = drvname_bitrate;
priv->bitrate_const_cnt = ARRAY_SIZE(drvname_bitrate);
priv->data_bitrate_const = drvname_data_bitrate;
priv->data_bitrate_const_cnt = ARRAY_SIZE(drvname_data_bitrate);

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index f6a57f322f00..141b05aade81 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -41,6 +41,10 @@ struct can_priv {
 	const u16 *termination_const;
 	unsigned int termination_const_cnt;
 	u16 termination;
+	const u32 *bitrate_const;
+	unsigned int bitrate_const_cnt;
+	const u32 *data_bitrate_const;
+	unsigned int data_bitrate_const_cnt;
 	struct can_clock clock;
 
 	enum can_state state;
-- 
cgit v1.2.3


From 5299709d0a87342dadc1fc9850484fadeb488bf8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:04:01 -0800
Subject: treewide: Constify most dma_map_ops structures

Most dma_map_ops structures are never modified. Constify these
structures such that these can be write-protected. This patch
has been generated as follows:

git grep -l 'struct dma_map_ops' |
  xargs -d\\n sed -i \
    -e 's/struct dma_map_ops/const struct dma_map_ops/g' \
    -e 's/const struct dma_map_ops {/struct dma_map_ops {/g' \
    -e 's/^const struct dma_map_ops;$/struct dma_map_ops;/' \
    -e 's/const const struct dma_map_ops /const struct dma_map_ops /g';
sed -i -e 's/const \(struct dma_map_ops intel_dma_ops\)/\1/' \
  $(git grep -l 'struct dma_map_ops intel_dma_ops');
sed -i -e 's/const \(struct dma_map_ops dma_iommu_ops\)/\1/' \
  $(git grep -l 'struct dma_map_ops' | grep ^arch/powerpc);
sed -i -e '/^struct vmd_dev {$/,/^};$/ s/const \(struct dma_map_ops[[:blank:]]dma_ops;\)/\1/' \
       -e '/^static void vmd_setup_dma_ops/,/^}$/ s/const \(struct dma_map_ops \*dest\)/\1/' \
       -e 's/const \(struct dma_map_ops \*dest = \&vmd->dma_ops\)/\1/' \
    drivers/pci/host/*.c
sed -i -e '/^void __init pci_iommu_alloc(void)$/,/^}$/ s/dma_ops->/intel_dma_ops./' arch/ia64/kernel/pci-dma.c
sed -i -e 's/static const struct dma_map_ops sn_dma_ops/static struct dma_map_ops sn_dma_ops/' arch/ia64/sn/pci/pci_dma.c
sed -i -e 's/(const struct dma_map_ops \*)//' drivers/misc/mic/bus/vop_bus.c

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Russell King <linux@armlinux.org.uk>
Cc: x86@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/dma-mapping.h | 42 +++++++++++++++++++++---------------------
 include/linux/mic_bus.h     |  2 +-
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 10c5a17b1f51..f1da68b82c63 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -127,7 +127,7 @@ struct dma_map_ops {
 	int is_phys;
 };
 
-extern struct dma_map_ops dma_noop_ops;
+extern const struct dma_map_ops dma_noop_ops;
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
@@ -170,8 +170,8 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
  * dma dependent code.  Code that depends on the dma-mapping
  * API needs to set 'depends on HAS_DMA' in its Kconfig
  */
-extern struct dma_map_ops bad_dma_ops;
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+extern const struct dma_map_ops bad_dma_ops;
+static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
 {
 	return &bad_dma_ops;
 }
@@ -182,7 +182,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
 					      enum dma_data_direction dir,
 					      unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	kmemcheck_mark_initialized(ptr, size);
@@ -201,7 +201,7 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
 					  enum dma_data_direction dir,
 					  unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_page)
@@ -217,7 +217,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 				   int nents, enum dma_data_direction dir,
 				   unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	int i, ents;
 	struct scatterlist *s;
 
@@ -235,7 +235,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
 				      int nents, enum dma_data_direction dir,
 				      unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	debug_dma_unmap_sg(dev, sg, nents, dir);
@@ -249,7 +249,7 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev,
 					    enum dma_data_direction dir,
 					    unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	kmemcheck_mark_initialized(page_address(page) + offset, size);
@@ -265,7 +265,7 @@ static inline void dma_unmap_page_attrs(struct device *dev,
 					enum dma_data_direction dir,
 					unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_page)
@@ -279,7 +279,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
 					  enum dma_data_direction dir,
 					  unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	BUG_ON(!valid_dma_direction(dir));
@@ -300,7 +300,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
 				      size_t size, enum dma_data_direction dir,
 				      unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_resource)
@@ -312,7 +312,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 					   size_t size,
 					   enum dma_data_direction dir)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->sync_single_for_cpu)
@@ -324,7 +324,7 @@ static inline void dma_sync_single_for_device(struct device *dev,
 					      dma_addr_t addr, size_t size,
 					      enum dma_data_direction dir)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->sync_single_for_device)
@@ -364,7 +364,7 @@ static inline void
 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		    int nelems, enum dma_data_direction dir)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->sync_sg_for_cpu)
@@ -376,7 +376,7 @@ static inline void
 dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		       int nelems, enum dma_data_direction dir)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->sync_sg_for_device)
@@ -421,7 +421,7 @@ static inline int
 dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
 	       dma_addr_t dma_addr, size_t size, unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	BUG_ON(!ops);
 	if (ops->mmap)
 		return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
@@ -439,7 +439,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
 		      dma_addr_t dma_addr, size_t size,
 		      unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	BUG_ON(!ops);
 	if (ops->get_sgtable)
 		return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
@@ -457,7 +457,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag,
 				       unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 	void *cpu_addr;
 
 	BUG_ON(!ops);
@@ -479,7 +479,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 				     void *cpu_addr, dma_addr_t dma_handle,
 				     unsigned long attrs)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!ops);
 	WARN_ON(irqs_disabled());
@@ -537,7 +537,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 #ifndef HAVE_ARCH_DMA_SUPPORTED
 static inline int dma_supported(struct device *dev, u64 mask)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	if (!ops)
 		return 0;
@@ -550,7 +550,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 #ifndef HAVE_ARCH_DMA_SET_MASK
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
-	struct dma_map_ops *ops = get_dma_ops(dev);
+	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	if (ops->set_dma_mask)
 		return ops->set_dma_mask(dev, mask);
diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h
index 27d7c95fd0da..504d54c71bdb 100644
--- a/include/linux/mic_bus.h
+++ b/include/linux/mic_bus.h
@@ -90,7 +90,7 @@ struct mbus_hw_ops {
 };
 
 struct mbus_device *
-mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
 		     struct mbus_hw_ops *hw_ops, int index,
 		     void __iomem *mmio_va);
 void mbus_unregister_device(struct mbus_device *mbdev);
-- 
cgit v1.2.3


From 5657933dbb6e25feaf5d8df8c88f96cdade693a3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:04:02 -0800
Subject: treewide: Move dma_ops from struct dev_archdata into struct device

Some but not all architectures provide set_dma_ops(). Move dma_ops
from struct dev_archdata into struct device such that it becomes
possible on all architectures to configure dma_ops per device.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Russell King <linux@armlinux.org.uk>
Cc: x86@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 491b4c0ca633..46a567261ccc 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -921,6 +921,7 @@ struct device {
 #ifdef CONFIG_NUMA
 	int		numa_node;	/* NUMA node this device is close to */
 #endif
+	const struct dma_map_ops *dma_ops;
 	u64		*dma_mask;	/* dma mask (if dma'able device) */
 	u64		coherent_dma_mask;/* Like dma_mask, but for
 					     alloc_coherent mappings as
-- 
cgit v1.2.3


From ca6e8e1031419549f67291ca31b43126f07cecdb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:04:03 -0800
Subject: treewide: Consolidate set_dma_ops() implementations

Now that all set_dma_ops() implementations are identical (ignoring
BUG_ON() statements), remove the architecture specific definitions
and add a definition in <linux/dma-mapping.h>.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/dma-mapping.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f1da68b82c63..e97f23e8b2d9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -164,6 +164,11 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
+static inline void set_dma_ops(struct device *dev,
+			       const struct dma_map_ops *dma_ops)
+{
+	dev->dma_ops = dma_ops;
+}
 #else
 /*
  * Define the dma api to allow compilation but not linking of
-- 
cgit v1.2.3


From 815dd18788fe0d41899f51b91d0560279cf16b0d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:04:04 -0800
Subject: treewide: Consolidate get_dma_ops() implementations

Introduce a new architecture-specific get_arch_dma_ops() function
that takes a struct bus_type * argument. Add get_dma_ops() in
<linux/dma-mapping.h>.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Russell King <linux@armlinux.org.uk>
Cc: x86@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/dma-mapping.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index e97f23e8b2d9..ab8710888ddf 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -164,6 +164,13 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
+static inline const struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+	if (dev && dev->dma_ops)
+		return dev->dma_ops;
+	return get_arch_dma_ops(dev ? dev->bus : NULL);
+}
+
 static inline void set_dma_ops(struct device *dev,
 			       const struct dma_map_ops *dma_ops)
 {
-- 
cgit v1.2.3


From 551199aca1c3102ebed390566d681cc1290284ca Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:04:07 -0800
Subject: lib/dma-virt: Add dma_virt_ops

Several RDMA drivers (hfi1, qib and rxe) expect that ib_sge.addr
is a virtual address. Provide DMA mapping operations that are
suitable for these drivers.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/dma-mapping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index ab8710888ddf..426c43d4fdbf 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -128,6 +128,7 @@ struct dma_map_ops {
 };
 
 extern const struct dma_map_ops dma_noop_ops;
+extern const struct dma_map_ops dma_virt_ops;
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
-- 
cgit v1.2.3


From 6db6f0eae6052b70885562e1733896647ec1d807 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 21 Jan 2017 21:01:32 +0100
Subject: bridge: multicast to unicast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements an optional, per bridge port flag and feature to deliver
multicast packets to any host on the according port via unicast
individually. This is done by copying the packet per host and
changing the multicast destination MAC to a unicast one accordingly.

multicast-to-unicast works on top of the multicast snooping feature of
the bridge. Which means unicast copies are only delivered to hosts which
are interested in it and signalized this via IGMP/MLD reports
previously.

This feature is intended for interface types which have a more reliable
and/or efficient way to deliver unicast packets than broadcast ones
(e.g. wifi).

However, it should only be enabled on interfaces where no IGMPv2/MLDv1
report suppression takes place. This feature is disabled by default.

The initial patch and idea is from Felix Fietkau.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[linus.luessing@c0d3.blue: various bug + style fixes, commit message]
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index c6587c01d951..debc9d5904e5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,6 +46,7 @@ struct br_ip_list {
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
 #define BR_MCAST_FLOOD		BIT(11)
+#define BR_MULTICAST_TO_UNICAST	BIT(12)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
-- 
cgit v1.2.3


From 059aa734824165507c65fd30a55ff000afd14983 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 22 Jan 2017 14:04:29 -0500
Subject: nfs: Don't increment lock sequence ID after NFS4ERR_MOVED

Xuan Qi reports that the Linux NFSv4 client failed to lock a file
that was migrated. The steps he observed on the wire:

1. The client sent a LOCK request to the source server
2. The source server replied NFS4ERR_MOVED
3. The client switched to the destination server
4. The client sent the same LOCK request to the destination
   server with a bumped lock sequence ID
5. The destination server rejected the LOCK request with
   NFS4ERR_BAD_SEQID

RFC 3530 section 8.1.5 provides a list of NFS errors which do not
bump a lock sequence ID.

However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section
9.1.7, this list has been updated by the addition of NFS4ERR_MOVED.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # v3.7+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs4.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bca536341d1a..1b1ca04820a3 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -282,7 +282,7 @@ enum nfsstat4 {
 
 static inline bool seqid_mutating_err(u32 err)
 {
-	/* rfc 3530 section 8.1.5: */
+	/* See RFC 7530, section 9.1.7 */
 	switch (err) {
 	case NFS4ERR_STALE_CLIENTID:
 	case NFS4ERR_STALE_STATEID:
@@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err)
 	case NFS4ERR_BADXDR:
 	case NFS4ERR_RESOURCE:
 	case NFS4ERR_NOFILEHANDLE:
+	case NFS4ERR_MOVED:
 		return false;
 	};
 	return true;
-- 
cgit v1.2.3


From b70f43a16182c7dbc0779bc5bd9f621711f13eb3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 22 Jan 2017 21:17:32 -0800
Subject: net: phy: Fix typo for MDIO module boilerplate comment

The module boilerplate macro is named mdio_module_driver and not
module_mdio_driver, fix that.

Fixes: a9049e0c513c ("mdio: Add support for mdio drivers.")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index b6587a4b32e7..55a80d73cfc1 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -265,7 +265,7 @@ bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
 struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr);
 
 /**
- * module_mdio_driver() - Helper macro for registering mdio drivers
+ * mdio_module_driver() - Helper macro for registering mdio drivers
  *
  * Helper macro for MDIO drivers which do not do anything special in module
  * init/exit. Each module may only use this macro once, and calling it
-- 
cgit v1.2.3


From c9497c98901c689bf6c357f812bf864ed8f50ace Mon Sep 17 00:00:00 2001
From: Mohamad Haj Yahia <mohamad@mellanox.com>
Date: Thu, 15 Dec 2016 14:02:53 +0200
Subject: net/mlx5: Add support for setting VF min rate

Add support for SRIOV VF min rate guarantee by using the TSAR BW share
weights mechanism.

The TSAR BW share vport attribute represents the weight of that vport
among the other vports weights which means that the actual vport BW
percentage is the same vport weight percentage among the total vports
weights sum.

Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d96ebc319d63..a919dfb920ae 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -547,7 +547,9 @@ struct mlx5_ifc_e_switch_cap_bits {
 struct mlx5_ifc_qos_cap_bits {
 	u8         packet_pacing[0x1];
 	u8         esw_scheduling[0x1];
-	u8         reserved_at_2[0x1e];
+	u8         esw_bw_share[0x1];
+	u8         esw_rate_limit[0x1];
+	u8         reserved_at_4[0x1c];
 
 	u8         reserved_at_20[0x20];
 
-- 
cgit v1.2.3


From 8c7245a60ef8f8c4a427349690c5a141cfed6217 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 30 Nov 2016 20:23:51 +0200
Subject: net/mlx5: Push min-inline mode resolution helper into the core

So we can use that from the IB driver too in downstream patches.

This patch doesn't change any functionality.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/vport.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index ec35157ea725..656c70b65dd2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -51,6 +51,7 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
 int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				    u16 vport, u8 *min_inline);
+void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline);
 int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From c929ea0b910355e1876c64431f3d5802f95b3d75 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 20 Jan 2017 16:48:39 +0800
Subject: SUNRPC: cleanup ida information when removing sunrpc module

After removing sunrpc module, I get many kmemleak information as,
unreferenced object 0xffff88003316b1e0 (size 544):
  comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffffb0cfb58a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffffb03507fe>] kmem_cache_alloc+0x15e/0x1f0
    [<ffffffffb0639baa>] ida_pre_get+0xaa/0x150
    [<ffffffffb0639cfd>] ida_simple_get+0xad/0x180
    [<ffffffffc06054fb>] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd]
    [<ffffffffc0605e1d>] lockd+0x4d/0x270 [lockd]
    [<ffffffffc06061e5>] param_set_timeout+0x55/0x100 [lockd]
    [<ffffffffc06cba24>] svc_defer+0x114/0x3f0 [sunrpc]
    [<ffffffffc06cbbe7>] svc_defer+0x2d7/0x3f0 [sunrpc]
    [<ffffffffc06c71da>] rpc_show_info+0x8a/0x110 [sunrpc]
    [<ffffffffb044a33f>] proc_reg_write+0x7f/0xc0
    [<ffffffffb038e41f>] __vfs_write+0xdf/0x3c0
    [<ffffffffb0390f1f>] vfs_write+0xef/0x240
    [<ffffffffb0392fbd>] SyS_write+0xad/0x130
    [<ffffffffb0d06c37>] entry_SYSCALL_64_fastpath+0x1a/0xa9
    [<ffffffffffffffff>] 0xffffffffffffffff

I found, the ida information (dynamic memory) isn't cleanup.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt")
Cc: stable@vger.kernel.org # v3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 85cc819676e8..333ad11b3dd9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
 void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
 bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
 			const struct sockaddr *sap);
+void rpc_cleanup_clids(void);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From 8a1f780e7f28c7c1d640118242cf68d528c456cd Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 24 Jan 2017 15:17:45 -0800
Subject: memory_hotplug: make zone_can_shift() return a boolean value

online_{kernel|movable} is used to change the memory zone to
ZONE_{NORMAL|MOVABLE} and online the memory.

To check that memory zone can be changed, zone_can_shift() is used.
Currently the function returns minus integer value, plus integer
value and 0. When the function returns minus or plus integer value,
it means that the memory zone can be changed to ZONE_{NORNAL|MOVABLE}.

But when the function returns 0, there are two meanings.

One of the meanings is that the memory zone does not need to be changed.
For example, when memory is in ZONE_NORMAL and onlined by online_kernel
the memory zone does not need to be changed.

Another meaning is that the memory zone cannot be changed. When memory
is in ZONE_NORMAL and onlined by online_movable, the memory zone may
not be changed to ZONE_MOVALBE due to memory online limitation(see
Documentation/memory-hotplug.txt). In this case, memory must not be
onlined.

The patch changes the return type of zone_can_shift() so that memory
online operation fails when memory zone cannot be changed as follows:

Before applying patch:
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320
   # echo online_movable > memory4097/state
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  8388608
           managed  8388608

   online_movable operation succeeded. But memory is onlined as
   ZONE_NORMAL, not ZONE_MOVABLE.

After applying patch:
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320
   # echo online_movable > memory4097/state
   bash: echo: write error: Invalid argument
   # grep -A 35 "Node 2" /proc/zoneinfo
   Node 2, zone   Normal
   <snip>
      node_scanned  0
           spanned  8388608
           present  7864320
           managed  7864320

   online_movable operation failed because of failure of changing
   the memory zone from ZONE_NORMAL to ZONE_MOVABLE

Fixes: df429ac03936 ("memory-hotplug: more general validation of zone during online")
Link: http://lkml.kernel.org/r/2f9c3837-33d7-b6e5-59c0-6ca4372b2d84@gmail.com
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Reviewed-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 01033fadea47..c1784c0b4f35 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 		unsigned long map_offset);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
-extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
-			  enum zone_type target);
+extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
+			  enum zone_type target, int *zone_shift);
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
-- 
cgit v1.2.3


From b94f51183b0617e7b9b4fb4137d4cf1cab7547c2 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 24 Jan 2017 15:17:53 -0800
Subject: kernel/watchdog: prevent false hardlockup on overloaded system

On an overloaded system, it is possible that a change in the watchdog
threshold can be delayed long enough to trigger a false positive.

This can easily be achieved by having a cpu spinning indefinitely on a
task, while another cpu updates watchdog threshold.

What happens is while trying to park the watchdog threads, the hrtimers
on the other cpus trigger and reprogram themselves with the new slower
watchdog threshold.  Meanwhile, the nmi watchdog is still programmed
with the old faster threshold.

Because the one cpu is blocked, it prevents the thread parking on the
other cpus from completing, which is needed to shutdown the nmi watchdog
and reprogram it correctly.  As a result, a false positive from the nmi
watchdog is reported.

Fix this by setting a park_in_progress flag to block all lockups until
the parking is complete.

Fix provided by Ulrich Obergfell.

[akpm@linux-foundation.org: s/park_in_progress/watchdog_park_in_progress/]
Link: http://lkml.kernel.org/r/1481041033-192236-1-git-send-email-dzickus@redhat.com
Signed-off-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aacca824a6ae..0a3fadc32693 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -110,6 +110,7 @@ extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
 extern unsigned long *watchdog_cpumask_bits;
+extern atomic_t watchdog_park_in_progress;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
-- 
cgit v1.2.3


From ea57485af8f4221312a5a95d63c382b45e7840dc Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 24 Jan 2017 15:18:32 -0800
Subject: mm, page_alloc: fix check for NULL preferred_zone

Patch series "fix premature OOM regression in 4.7+ due to cpuset races".

This is v2 of my attempt to fix the recent report based on LTP cpuset
stress test [1].  The intention is to go to stable 4.9 LTSS with this,
as triggering repeated OOMs is not nice.  That's why the patches try to
be not too intrusive.

Unfortunately why investigating I found that modifying the testcase to
use per-VMA policies instead of per-task policies will bring the OOM's
back, but that seems to be much older and harder to fix problem.  I have
posted a RFC [2] but I believe that fixing the recent regressions has a
higher priority.

Longer-term we might try to think how to fix the cpuset mess in a better
and less error prone way.  I was for example very surprised to learn,
that cpuset updates change not only task->mems_allowed, but also
nodemask of mempolicies.  Until now I expected the parameter to
alloc_pages_nodemask() to be stable.  I wonder why do we then treat
cpusets specially in get_page_from_freelist() and distinguish HARDWALL
etc, when there's unconditional intersection between mempolicy and
cpuset.  I would expect the nodemask adjustment for saving overhead in
g_p_f(), but that clearly doesn't happen in the current form.  So we
have both crazy complexity and overhead, AFAICS.

[1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com
[2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz

This patch (of 4):

Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first
zone in a zonelist twice") we have a wrong check for NULL preferred_zone,
which can theoretically happen due to concurrent cpuset modification.  We
check the zoneref pointer which is never NULL and we should check the zone
pointer.  Also document this in first_zones_zonelist() comment per Michal
Hocko.

Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice")
Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 36d9896fbc1e..f4aac87adcc3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
  * @zonelist - The zonelist to search for a suitable zone
  * @highest_zoneidx - The zone index of the highest zone to return
  * @nodes - An optional nodemask to filter the zonelist with
- * @zone - The first suitable zone found is returned via this parameter
+ * @return - Zoneref pointer for the first suitable zone found (see below)
  *
  * This function returns the first zone at or below a given zone index that is
  * within the allowed nodemask. The zoneref returned is a cursor that can be
  * used to iterate the zonelist with next_zones_zonelist by advancing it by
  * one before calling.
+ *
+ * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
+ * never NULL). This may happen either genuinely, or due to concurrent nodemask
+ * update due to cpuset modification.
  */
 static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 					enum zone_type highest_zoneidx,
-- 
cgit v1.2.3


From 08d62f58aa2587132a930afbe8664379b430e2dd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 17 Jan 2017 13:57:26 +0200
Subject: dmaengine: dw: register IRQ and DMA pool with instance ID

It is really useful not only for debugging to have an IRQ line and DMA
pool labeled with driver and its instance ID. Do this for DesignWare DMA
driver.

All current users of this IP would be enhanced later on.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dma/dw.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index ccfd0c3777df..b63b25814d77 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -23,6 +23,7 @@ struct dw_dma;
 /**
  * struct dw_dma_chip - representation of DesignWare DMA controller hardware
  * @dev:		struct device of the DMA controller
+ * @id:			instance ID
  * @irq:		irq line
  * @regs:		memory mapped I/O space
  * @clk:		hclk clock
@@ -31,6 +32,7 @@ struct dw_dma;
  */
 struct dw_dma_chip {
 	struct device	*dev;
+	int		id;
 	int		irq;
 	void __iomem	*regs;
 	struct clk	*clk;
-- 
cgit v1.2.3


From 199244d69458770770890f8b5988a1b6cad701ad Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 17 Jan 2017 13:57:31 +0200
Subject: dmaengine: dw: add support of iDMA 32-bit hardware

iDMA 32-bit is Intel designed DMA controller that behaves like Synopsys
Designware DMA. This patch adds a support of the new Intel hardware.

Due to iDMA 32-bit has no autoconfiguration the platform code must
provide a platform data to dw_dma_probe().

By default full FIFO (1024 bytes) is assigned to channel 0. Here we
slice FIFO on equal parts between channels for iDMA 32-bit case.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/platform_data/dma-dw.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index e69e415d0d98..896cb71a382c 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -41,6 +41,7 @@ struct dw_dma_slave {
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
  * @is_memcpy: The device channels do support memory-to-memory transfers.
+ * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -53,6 +54,7 @@ struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
 	bool		is_memcpy;
+	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
-- 
cgit v1.2.3


From 728bbe75c82fdc6bdf157652a4351856ed08afc4 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Thu, 12 Jan 2017 14:52:19 -0600
Subject: misc: sram: Introduce support code for protect-exec sram type

Some platforms, like many ARM SoCs, require the ability to run code from
on-chip memory like SRAM for tasks like reconfiguring the SDRAM
controller or entering low-power sleep modes. In order to do this we
must be able to allocate memory that the code can be copied to but then
change the mapping to be read-only and executable so that no memory is
both writable and executable at the same time to avoid opening any
unneccesary security holes.

By using the existing "pool" partition type that the SRAM driver allows
we can create a memory space that will already be exposed by the
genalloc framework to allow for allocating memory but we must extend
this to meet the executable requirements. By making use of various
set_memory_* APIs we can change the attributes of pages to make them
writable for code upload but then read-only and executable when we want
to actually run code.  Because SRAM is a shared resource we need a
centralized manager of these set memory calls. Because the SRAM driver
itself is responsible for allocating the memory we can introduce a
sram_copy_exec API for the driver that works like memcpy but also
manages the page attributes and locking to allow multiple users of the
same SRAM space to all copy their code over independent of other each
before starting execution.

It is maintained in a separate file from the core SRAM driver to allow
it to be selectively built depending on whether or not a platform has
the appropriate set_memory_* APIs. A future patch will integrate it with
the core SRAM driver.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sram.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/linux/sram.h

(limited to 'include/linux')

diff --git a/include/linux/sram.h b/include/linux/sram.h
new file mode 100644
index 000000000000..c97dcbe8ce25
--- /dev/null
+++ b/include/linux/sram.h
@@ -0,0 +1,27 @@
+/*
+ * Generic SRAM Driver Interface
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __LINUX_SRAM_H__
+#define __LINUX_SRAM_H__
+
+struct gen_pool;
+
+#ifdef CONFIG_SRAM_EXEC
+int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size);
+#else
+static inline int sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
+				 size_t size)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_SRAM_EXEC */
+#endif /* __LINUX_SRAM_H__ */
-- 
cgit v1.2.3


From bcd375f7f71f7106c97516bf5395149954ef8810 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= <manuel.schoelling@gmx.de>
Date: Fri, 13 Jan 2017 21:07:56 +0100
Subject: console: Add callback to flush scrollback buffer to consw struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This new callback is in preparation for persistent scrollback buffer
support for VGA consoles.
With a single scrollback buffer for all consoles, we could flush the
buffer just by invocating consw->con_switch(). But when each VGA console
has its own scrollback buffer, we need a new callback to tell the
video console driver which buffer to flush.

Signed-off-by: Manuel Schölling <manuel.schoelling@gmx.de>
Reviewed-by: Andrey Utkin <andrey_utkin@fastmail.com>
Tested-by: Andrey Utkin <andrey_utkin@fastmail.com>
Tested-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 9c26c6685587..5949d1855589 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -72,6 +72,10 @@ struct consw {
 	void	(*con_invert_region)(struct vc_data *, u16 *, int);
 	u16    *(*con_screen_pos)(struct vc_data *, int);
 	unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *);
+	/*
+	 * Flush the video console driver's scrollback buffer
+	 */
+	void	(*con_flush_scrollback)(struct vc_data *);
 	/*
 	 * Prepare the console for the debugger.  This includes, but is not
 	 * limited to, unblanking the console, loading an appropriate
-- 
cgit v1.2.3


From 93fbe91b552194af970256ce72934745d01df435 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Fri, 20 Jan 2017 10:15:07 +0100
Subject: iio: Add STM32 timer trigger driver

Timers IPs can be used to generate triggers for other IPs like
DAC or ADC.
Each trigger may result of timer internals signals like counter enable,
reset or edge, this configuration could be done through "master_mode"
device attribute.

Since triggers could be used by DAC or ADC their names are defined
in include/ nux/iio/timer/stm32-timer-trigger.h and is_stm32_iio_timer_trigger
function could be used to check if the trigger is valid or not.

"trgo" trigger have a "sampling_frequency" attribute which allow to configure
timer sampling frequency.

version 8:
- change kernel version from 4.10 to 4.11 in ABI documentation

version 7:
- remove all iio_device related code
- move driver into trigger directory

version 5:
- simplify tables of triggers
- only create an IIO device when needed

version 4:
- get triggers configuration from "reg" in DT
- add tables of triggers
- sampling frequency is enable/disable when writing in trigger
  sampling_frequency attribute
- no more use of interruptions

version 3:
- change compatible to "st,stm32-timer-trigger"
- fix attributes access right
- use string instead of int for master_mode and slave_mode
- document device attributes in sysfs-bus-iio-timer-stm32

version 2:
- keep only one compatible
- use st,input-triggers-names and st,output-triggers-names
  to know which triggers are accepted and/or create by the device

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@st.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/iio/timer/stm32-timer-trigger.h | 62 +++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/linux/iio/timer/stm32-timer-trigger.h

(limited to 'include/linux')

diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h
new file mode 100644
index 000000000000..55535aef2e6c
--- /dev/null
+++ b/include/linux/iio/timer/stm32-timer-trigger.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) STMicroelectronics 2016
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#ifndef _STM32_TIMER_TRIGGER_H_
+#define _STM32_TIMER_TRIGGER_H_
+
+#define TIM1_TRGO	"tim1_trgo"
+#define TIM1_CH1	"tim1_ch1"
+#define TIM1_CH2	"tim1_ch2"
+#define TIM1_CH3	"tim1_ch3"
+#define TIM1_CH4	"tim1_ch4"
+
+#define TIM2_TRGO	"tim2_trgo"
+#define TIM2_CH1	"tim2_ch1"
+#define TIM2_CH2	"tim2_ch2"
+#define TIM2_CH3	"tim2_ch3"
+#define TIM2_CH4	"tim2_ch4"
+
+#define TIM3_TRGO	"tim3_trgo"
+#define TIM3_CH1	"tim3_ch1"
+#define TIM3_CH2	"tim3_ch2"
+#define TIM3_CH3	"tim3_ch3"
+#define TIM3_CH4	"tim3_ch4"
+
+#define TIM4_TRGO	"tim4_trgo"
+#define TIM4_CH1	"tim4_ch1"
+#define TIM4_CH2	"tim4_ch2"
+#define TIM4_CH3	"tim4_ch3"
+#define TIM4_CH4	"tim4_ch4"
+
+#define TIM5_TRGO	"tim5_trgo"
+#define TIM5_CH1	"tim5_ch1"
+#define TIM5_CH2	"tim5_ch2"
+#define TIM5_CH3	"tim5_ch3"
+#define TIM5_CH4	"tim5_ch4"
+
+#define TIM6_TRGO	"tim6_trgo"
+
+#define TIM7_TRGO	"tim7_trgo"
+
+#define TIM8_TRGO	"tim8_trgo"
+#define TIM8_CH1	"tim8_ch1"
+#define TIM8_CH2	"tim8_ch2"
+#define TIM8_CH3	"tim8_ch3"
+#define TIM8_CH4	"tim8_ch4"
+
+#define TIM9_TRGO	"tim9_trgo"
+#define TIM9_CH1	"tim9_ch1"
+#define TIM9_CH2	"tim9_ch2"
+
+#define TIM12_TRGO	"tim12_trgo"
+#define TIM12_CH1	"tim12_ch1"
+#define TIM12_CH2	"tim12_ch2"
+
+bool is_stm32_timer_trigger(struct iio_trigger *trig);
+
+#endif
-- 
cgit v1.2.3


From 2acae0d5b0f73a8fb4b180bd13491feb96e55fc6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Jan 2017 02:28:16 +0100
Subject: trace: add variant without spacing in trace_print_hex_seq

For upcoming tracepoint support for BPF, we want to dump the program's
tag. Format should be similar to __print_hex(), but without spacing.
Add a __print_hex_str() variant for exactly that purpose that reuses
trace_print_hex_seq().

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trace_events.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index be007610ceb0..cfa475a0e9ca 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -33,7 +33,8 @@ const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 				    unsigned int bitmask_size);
 
 const char *trace_print_hex_seq(struct trace_seq *p,
-				const unsigned char *buf, int len);
+				const unsigned char *buf, int len,
+				bool spacing);
 
 const char *trace_print_array_seq(struct trace_seq *p,
 				   const void *buf, int count,
-- 
cgit v1.2.3


From a67edbf4fb6deadcfe57a04a134abed4a5ba3bb5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 25 Jan 2017 02:28:18 +0100
Subject: bpf: add initial bpf tracepoints

This work adds a number of tracepoints to paths that are either
considered slow-path or exception-like states, where monitoring or
inspecting them would be desirable.

For bpf(2) syscall, tracepoints have been placed for main commands
when they succeed. In XDP case, tracepoint is for exceptions, that
is, f.e. on abnormal BPF program exit such as unknown or XDP_ABORTED
return code, or when error occurs during XDP_TX action and the packet
could not be forwarded.

Both have been split into separate event headers, and can be further
extended. Worst case, if they unexpectedly should get into our way in
future, they can also removed [1]. Of course, these tracepoints (like
any other) can be analyzed by eBPF itself, etc. Example output:

  # ./perf record -a -e bpf:* sleep 10
  # ./perf script
  sock_example  6197 [005]   283.980322:      bpf:bpf_map_create: map type=ARRAY ufd=4 key=4 val=8 max=256 flags=0
  sock_example  6197 [005]   283.980721:       bpf:bpf_prog_load: prog=a5ea8fa30ea6849c type=SOCKET_FILTER ufd=5
  sock_example  6197 [005]   283.988423:   bpf:bpf_prog_get_type: prog=a5ea8fa30ea6849c type=SOCKET_FILTER
  sock_example  6197 [005]   283.988443: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[06 00 00 00] val=[00 00 00 00 00 00 00 00]
  [...]
  sock_example  6197 [005]   288.990868: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[01 00 00 00] val=[14 00 00 00 00 00 00 00]
       swapper     0 [005]   289.338243:    bpf:bpf_prog_put_rcu: prog=a5ea8fa30ea6849c type=SOCKET_FILTER

  [1] https://lwn.net/Articles/705270/

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf_trace.h | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 include/linux/bpf_trace.h

(limited to 'include/linux')

diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h
new file mode 100644
index 000000000000..b22efbdd2eb4
--- /dev/null
+++ b/include/linux/bpf_trace.h
@@ -0,0 +1,7 @@
+#ifndef __LINUX_BPF_TRACE_H__
+#define __LINUX_BPF_TRACE_H__
+
+#include <trace/events/bpf.h>
+#include <trace/events/xdp.h>
+
+#endif /* __LINUX_BPF_TRACE_H__ */
-- 
cgit v1.2.3


From 19f6d3f3c8422d65b5e3d2162e30ef07c6e21ea2 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Mon, 23 Jan 2017 10:59:22 -0800
Subject: net/tcp-fastopen: Add new API support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an
alternative way to perform Fast Open on the active side (client). Prior
to this patch, a client needs to replace the connect() call with
sendto(MSG_FASTOPEN). This can be cumbersome for applications who want
to use Fast Open: these socket operations are often done in lower layer
libraries used by many other applications. Changing these libraries
and/or the socket call sequences are not trivial. A more convenient
approach is to perform Fast Open by simply enabling a socket option when
the socket is created w/o changing other socket calls sequence:
  s = socket()
    create a new socket
  setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …);
    newly introduced sockopt
    If set, new functionality described below will be used.
    Return ENOTSUPP if TFO is not supported or not enabled in the
    kernel.

  connect()
    With cookie present, return 0 immediately.
    With no cookie, initiate 3WHS with TFO cookie-request option and
    return -1 with errno = EINPROGRESS.

  write()/sendmsg()
    With cookie present, send out SYN with data and return the number of
    bytes buffered.
    With no cookie, and 3WHS not yet completed, return -1 with errno =
    EINPROGRESS.
    No MSG_FASTOPEN flag is needed.

  read()
    Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but
    write() is not called yet.
    Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is
    established but no msg is received yet.
    Return number of bytes read if socket is established and there is
    msg received.

The new API simplifies life for applications that always perform a write()
immediately after a successful connect(). Such applications can now take
advantage of Fast Open by merely making one new setsockopt() call at the time
of creating the socket. Nothing else about the application's socket call
sequence needs to change.

Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5371b3d70cfe..f88f4649ba6f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -222,7 +222,8 @@ struct tcp_sock {
 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
 	u8	chrono_type:2,	/* current chronograph type */
 		rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
-		unused:5;
+		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
+		unused:4;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		unused1	    : 1,
-- 
cgit v1.2.3


From d6f8cfa3dea294eabf8f302e90176dd6381fb66e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 25 Jan 2017 11:39:49 +0100
Subject: net: phy: leds: Break dependency of phy.h on phy_led_triggers.h

<linux/phy.h> includes <linux/phy_led_triggers.h>, which is not really
needed.  Drop the include from <linux/phy.h>, and add it to all users
that didn't include it explicitly.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index f7d95f644eed..7fc1105605bf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -25,7 +25,6 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mod_devicetable.h>
-#include <linux/phy_led_triggers.h>
 
 #include <linux/atomic.h>
 
-- 
cgit v1.2.3


From 3c880eb0205222bb062970085ebedc73ec8dfd14 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 25 Jan 2017 11:39:50 +0100
Subject: net: phy: leds: Fix truncated LED trigger names

Commit 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and
bus_id") increased the size of MII bus IDs, but forgot to update the
private definition in <linux/phy_led_triggers.h>.
This may cause:
  1. Truncation of LED trigger names,
  2. Duplicate LED trigger names,
  3. Failures registering LED triggers,
  4. Crashes due to bad error handling in the LED trigger failure path.

To fix this, and prevent the definitions going out of sync again in the
future, let the PHY LED trigger code use the existing MII_BUS_ID_SIZE
definition.

Example:
  - Before I had triggers "ee700000.etherne:01:100Mbps" and
    "ee700000.etherne:01:10Mbps",
  - After the increase of MII_BUS_ID_SIZE, both became
    "ee700000.ethernet-ffffffff:01:" => FAIL,
  - Now, the triggers are "ee700000.ethernet-ffffffff:01:100Mbps" and
    "ee700000.ethernet-ffffffff:01:10Mbps", which are unique again.

Fixes: 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id")
Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_led_triggers.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h
index a2daea0a37d2..b37b05bfd1a6 100644
--- a/include/linux/phy_led_triggers.h
+++ b/include/linux/phy_led_triggers.h
@@ -18,11 +18,11 @@ struct phy_device;
 #ifdef CONFIG_LED_TRIGGER_PHY
 
 #include <linux/leds.h>
+#include <linux/phy.h>
 
 #define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE	10
-#define PHY_MII_BUS_ID_SIZE	(20 - 3)
 
-#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \
+#define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \
 				       FIELD_SIZEOF(struct mdio_device, addr)+\
 				       PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE)
 
-- 
cgit v1.2.3


From f2c4689640e9a34bc45c013032185ed4ce47e7ff Mon Sep 17 00:00:00 2001
From: Lance Roy <ldr709@gmail.com>
Date: Mon, 23 Jan 2017 13:35:18 -0800
Subject: srcu: Implement more-efficient reader counts

SRCU uses two per-cpu counters: a nesting counter to count the number of
active critical sections, and a sequence counter to ensure that the nesting
counters don't change while they are being added together in
srcu_readers_active_idx_check().

This patch instead uses per-cpu lock and unlock counters. Because both
counters only increase and srcu_readers_active_idx_check() reads the unlock
counter before the lock counter, this achieves the same end without having
to increment two different counters in srcu_read_lock(). This also saves a
smp_mb() in srcu_readers_active_idx_check().

Possible bug: There is no guarantee that the lock counter won't overflow
during srcu_readers_active_idx_check(), as there are no memory barriers
around srcu_flip() (see comment in srcu_readers_active_idx_check() for
details). However, this problem was already present before this patch.

Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Lance Roy <ldr709@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index dc8eb63c6568..a598cf3ac70c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -33,9 +33,9 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
-struct srcu_struct_array {
-	unsigned long c[2];
-	unsigned long seq[2];
+struct srcu_array {
+	unsigned long lock_count[2];
+	unsigned long unlock_count[2];
 };
 
 struct rcu_batch {
@@ -46,7 +46,7 @@ struct rcu_batch {
 
 struct srcu_struct {
 	unsigned long completed;
-	struct srcu_struct_array __percpu *per_cpu_ref;
+	struct srcu_array __percpu *per_cpu_ref;
 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
 	bool running;
 	/* callbacks just queued */
@@ -118,7 +118,7 @@ void process_srcu(struct work_struct *work);
  * See include/linux/percpu-defs.h for the rules on per-CPU variables.
  */
 #define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
-- 
cgit v1.2.3


From d85b62f18d543c663cbdd6061054efeb9e66cee7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 28 Nov 2016 12:08:49 -0800
Subject: srcu: Force full grace-period ordering

If a process invokes synchronize_srcu(), is delayed just the right amount
of time, and thus does not sleep when waiting for the grace period to
complete, there is no ordering between the end of the grace period and
the code following the synchronize_srcu().  Similarly, there can be a
lack of ordering between the end of the SRCU grace period and callback
invocation.

This commit adds the necessary ordering.

Reported-by: Lance Roy <ldr709@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Further smp_mb() adjustment per email with Lance Roy. ]
---
 include/linux/rcupdate.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 01f71e1d2e94..6ade6a52d9d4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1161,5 +1161,17 @@ do { \
 		ftrace_dump(oops_dump_mode); \
 } while (0)
 
+/*
+ * Place this after a lock-acquisition primitive to guarantee that
+ * an UNLOCK+LOCK pair acts as a full barrier.  This guarantee applies
+ * if the UNLOCK and LOCK are executed by the same CPU or if the
+ * UNLOCK and LOCK operate on the same lock variable.
+ */
+#ifdef CONFIG_PPC
+#define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
+#else /* #ifdef CONFIG_PPC */
+#define smp_mb__after_unlock_lock()	do { } while (0)
+#endif /* #else #ifdef CONFIG_PPC */
+
 
 #endif /* __LINUX_RCUPDATE_H */
-- 
cgit v1.2.3


From 85b4685da52f6680635370c2cfb9a42f04ac9652 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 25 Jan 2017 21:00:25 +0100
Subject: net: phy: broadcom: use auxctl reading helper in BCM54612E code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with commit 5b4e29005123 ("net: phy: broadcom: add
bcm54xx_auxctl_read") we have a reading helper so use it and avoid code
duplication.
It also means we don't need MII_BCM54XX_AUXCTL_SHDWSEL_MISC define as
it's the same as MII_BCM54XX_AUXCTL_SHDWSEL_MISC just for reading needs
(same value shifted by 12 bits).

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 295fb3e73de5..34e61004b9dc 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -111,7 +111,6 @@
 #define MII_BCM54XX_AUXCTL_MISC_WREN	0x8000
 #define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW	0x0100
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX	0x0200
-#define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC	0x7000
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	(1 << 8)
-- 
cgit v1.2.3


From 8293c7bcdef1b866610bf21dee3eb0d181cee753 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 25 Jan 2017 21:00:26 +0100
Subject: net: phy: broadcom: drop duplicated define for RGMII SKEW delay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We had two defines for the same bit (both were used with the
MII_BCM54XX_AUXCTL_SHDWSEL_MISC register).

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 34e61004b9dc..f9cb73df127e 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -109,7 +109,6 @@
 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA	0x0800
 
 #define MII_BCM54XX_AUXCTL_MISC_WREN	0x8000
-#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW	0x0100
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX	0x0200
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
-- 
cgit v1.2.3


From 5e7bfa6cb0a94c673f6d565e58353366d88e2aa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Wed, 25 Jan 2017 21:00:27 +0100
Subject: net: phy: bcm-phy-lib: clean up remaining AUXCTL register defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1) Use 0x%02x format for register number. This follows some other
   defines and makes it easier to distinct register from values.
2) Put register define above values and sort the values. It makes
   reading header code easier.
3) Use 0x%04x format for all values. It's about consistency with other
   values (and most of the header) not a personal preference.
4) Separate define for reading shift value with an extre empty line.
   It's user for all AUXCTL registers in a bcm54xx_auxctl_read.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index f9cb73df127e..cf93f1399d3e 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -104,17 +104,17 @@
 /*
  * AUXILIARY CONTROL SHADOW ACCESS REGISTERS.  (PHY REG 0x18)
  */
-#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x0000
+#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x00
 #define MII_BCM54XX_AUXCTL_ACTL_TX_6DB		0x0400
 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA	0x0800
 
-#define MII_BCM54XX_AUXCTL_MISC_WREN	0x8000
-#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX	0x0200
-#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC	0x0007
-#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
-#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	(1 << 8)
-#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN	(1 << 4)
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC			0x07
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN	0x0010
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN	0x0100
+#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX		0x0200
+#define MII_BCM54XX_AUXCTL_MISC_WREN			0x8000
 
+#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT	12
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK	0x0007
 
 /*
-- 
cgit v1.2.3


From a264d10ff45c688293d9112fddd8d29c819e0853 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 9 Jan 2017 16:02:28 +0200
Subject: gpiolib: Convert fwnode_get_named_gpiod() to configure GPIO

Make fwnode_get_named_gpiod() consistent with the rest of
gpiod_get() like API, i.e. configure GPIO pin immediately after
request.

Besides obvious clean up it will help to configure pins based
on firmware provided resources.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index fb0fde686cb1..930d10049d8d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -135,10 +135,12 @@ int desc_to_gpio(const struct gpio_desc *desc);
 struct fwnode_handle;
 
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
-					 const char *propname);
+					 const char *propname,
+					 enum gpiod_flags dflags);
 struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
 					    const char *con_id,
-					    struct fwnode_handle *child);
+					    struct fwnode_handle *child,
+					    enum gpiod_flags flags);
 #else /* CONFIG_GPIOLIB */
 
 static inline int gpiod_count(struct device *dev, const char *con_id)
@@ -411,14 +413,19 @@ static inline int desc_to_gpio(const struct gpio_desc *desc)
 /* Child properties interface */
 struct fwnode_handle;
 
-static inline struct gpio_desc *fwnode_get_named_gpiod(
-	struct fwnode_handle *fwnode, const char *propname)
+static inline
+struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
+					 const char *propname,
+					 enum gpiod_flags dflags)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
-static inline struct gpio_desc *devm_get_gpiod_from_child(
-	struct device *dev, const char *con_id, struct fwnode_handle *child)
+static inline
+struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
+					    const char *con_id,
+					    struct fwnode_handle *child,
+					    enum gpiod_flags flags)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3


From b2987d7438e0ca949d81774ca8b43d370a1f9947 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@systec-electronic.com>
Date: Thu, 12 Jan 2017 17:39:24 +0100
Subject: gpio: Pass GPIO label down to gpiod_request

Currently all users of fwnode_get_named_gpiod() have no way to
specify a label for the GPIO. So GPIOs listed in debugfs are shown
with label "?". With this change a proper label is used.

Also adjust all users so they can pass a label, properly retrieved
from device tree properties.

Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 930d10049d8d..80bad7ebde04 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -136,11 +136,13 @@ struct fwnode_handle;
 
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					 const char *propname,
-					 enum gpiod_flags dflags);
+					 enum gpiod_flags dflags,
+					 const char *label);
 struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
 					    const char *con_id,
 					    struct fwnode_handle *child,
-					    enum gpiod_flags flags);
+					    enum gpiod_flags flags,
+					    const char *label);
 #else /* CONFIG_GPIOLIB */
 
 static inline int gpiod_count(struct device *dev, const char *con_id)
@@ -416,7 +418,8 @@ struct fwnode_handle;
 static inline
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					 const char *propname,
-					 enum gpiod_flags dflags)
+					 enum gpiod_flags dflags,
+					 const char *label)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -425,7 +428,8 @@ static inline
 struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
 					    const char *con_id,
 					    struct fwnode_handle *child,
-					    enum gpiod_flags flags)
+					    enum gpiod_flags flags,
+					    const char *label)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3


From 53d333ac93911dab22d12a78b0a6414f9afb0117 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 17 Jan 2017 21:49:12 +0530
Subject: gpio: davinci: Remove unwanted blank line

Remove redundant blank line.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 44ca5303849c..18127c4aa4ba 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -26,7 +26,6 @@ struct davinci_gpio_platform_data {
 	u32	gpio_unbanked;
 };
 
-
 struct davinci_gpio_controller {
 	struct gpio_chip	chip;
 	struct irq_domain	*irq_domain;
-- 
cgit v1.2.3


From b5cf3fd827d2e11355c126b44ea625650ebf4d39 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Fri, 13 Jan 2017 09:50:12 +0530
Subject: gpio: davinci: Redesign driver to accommodate ngpios in one gpio chip

The Davinci GPIO driver is implemented to work with one monolithic
Davinci GPIO platform device which may have up to Y(144) gpios.
The Davinci GPIO driver instantiates number of GPIO chips with
max 32 gpio pins per each during initialization and one IRQ domain.
So, the current GPIO's  opjects structure is:

<platform device> Davinci GPIO controller
 |- <gpio0_chip0> ------|
 ...                    |--- irq_domain (hwirq [0..143])
 |- <gpio0_chipN> ------|

Current driver creates one chip for every 32 GPIOs in a controller.
This was a limitation earlier now there is no need for that. Hence
redesigning the driver to create one gpio chip for all the ngpio
in the controller.

|- <gpio0_chip0> ------|--- irq_domain (hwirq [0..143]).

The previous discussion on this can be found here:
https://www.spinics.net/lists/linux-omap/msg132869.html

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 18127c4aa4ba..c62a9438976d 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -21,19 +21,27 @@
 
 #include <asm-generic/gpio.h>
 
+#define MAX_REGS_BANKS		5
+
 struct davinci_gpio_platform_data {
 	u32	ngpio;
 	u32	gpio_unbanked;
 };
 
+struct davinci_gpio_irq_data {
+	void __iomem			*regs;
+	struct davinci_gpio_controller	*chip;
+	int				bank_num;
+};
+
 struct davinci_gpio_controller {
 	struct gpio_chip	chip;
 	struct irq_domain	*irq_domain;
 	/* Serialize access to GPIO registers */
 	spinlock_t		lock;
-	void __iomem		*regs;
+	void __iomem		*regs[MAX_REGS_BANKS];
 	int			gpio_unbanked;
-	unsigned		gpio_irq;
+	unsigned int		base_irq;
 };
 
 /*
-- 
cgit v1.2.3


From 8e11047b8f3cc0dc6df956cf01915077a574168e Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 17 Jan 2017 21:49:14 +0530
Subject: gpio: davinci: Add support for multiple GPIO controllers

Update GPIO driver to support Multiple GPIO controllers by updating
the base of subsequent GPIO chips with total of previous chips
gpio count so that gpio_add_chip gets unique numbers.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index c62a9438976d..90ae19ca828f 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -42,6 +42,7 @@ struct davinci_gpio_controller {
 	void __iomem		*regs[MAX_REGS_BANKS];
 	int			gpio_unbanked;
 	unsigned int		base_irq;
+	unsigned int		base;
 };
 
 /*
-- 
cgit v1.2.3


From 58957d2edfa19e9b8f80385ba042495058e5e60e Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 23 Jan 2017 15:34:32 +0300
Subject: pinctrl: Widen the generic pinconf argument from 16 to 24 bits

The current pinconf packed format allows only 16-bit argument limiting
the maximum value 65535. For most types this is enough. However,
debounce time can be in range of hundreths of milliseconds in case of
mechanical switches so we cannot represent the worst case using the
current format.

In order to support larger values change the packed format so that the
lower 8 bits are used as type which leaves 24 bits for the argument.
This allows representing values up to 16777215 and debounce times up to
16 seconds.

We also convert the existing users to use 32-bit integer when extracting
argument from the packed configuration value.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 12343caa114e..9a09107c890e 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -92,6 +92,8 @@
  * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if
  *	you need to pass in custom configurations to the pin controller, use
  *	PIN_CONFIG_END+1 as the base offset.
+ * @PIN_CONFIG_MAX: this is the maximum configuration value that can be
+ *	presented using the packed format.
  */
 enum pin_config_param {
 	PIN_CONFIG_BIAS_BUS_HOLD,
@@ -112,7 +114,8 @@ enum pin_config_param {
 	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_POWER_SOURCE,
 	PIN_CONFIG_SLEW_RATE,
-	PIN_CONFIG_END = 0x7FFF,
+	PIN_CONFIG_END = 0x7F,
+	PIN_CONFIG_MAX = 0xFF,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -130,27 +133,27 @@ struct pin_config_item {
 /*
  * Helpful configuration macro to be used in tables etc.
  */
-#define PIN_CONF_PACKED(p, a) ((a << 16) | ((unsigned long) p & 0xffffUL))
+#define PIN_CONF_PACKED(p, a) ((a << 8) | ((unsigned long) p & 0xffUL))
 
 /*
  * The following inlines stuffs a configuration parameter and data value
  * into and out of an unsigned long argument, as used by the generic pin config
- * system. We put the parameter in the lower 16 bits and the argument in the
- * upper 16 bits.
+ * system. We put the parameter in the lower 8 bits and the argument in the
+ * upper 24 bits.
  */
 
 static inline enum pin_config_param pinconf_to_config_param(unsigned long config)
 {
-	return (enum pin_config_param) (config & 0xffffUL);
+	return (enum pin_config_param) (config & 0xffUL);
 }
 
-static inline u16 pinconf_to_config_argument(unsigned long config)
+static inline u32 pinconf_to_config_argument(unsigned long config)
 {
-	return (enum pin_config_param) ((config >> 16) & 0xffffUL);
+	return (u32) ((config >> 8) & 0xffffffUL);
 }
 
 static inline unsigned long pinconf_to_config_packed(enum pin_config_param param,
-						     u16 argument)
+						     u32 argument)
 {
 	return PIN_CONF_PACKED(param, argument);
 }
-- 
cgit v1.2.3


From 15381bc7c7f52d56f87c56dd7c948ad78704b852 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 23 Jan 2017 15:34:33 +0300
Subject: pinctrl: Allow configuration of pins from gpiolib based drivers

When a GPIO driver is backed by a pinctrl driver the GPIO driver
sometimes needs to call the pinctrl driver to configure certain things,
like whether the pin is used as input or output. In addition to this
there are other configurations applicable to GPIOs such as setting
debounce time of the GPIO.

To support this we introduce a new function pinctrl_gpio_set_config()
that can be used by gpiolib based driver to pass configuration requests
to the backing pinctrl driver.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/consumer.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index d7e5d608faa7..a0f2aba72fa9 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -29,6 +29,7 @@ extern int pinctrl_request_gpio(unsigned gpio);
 extern void pinctrl_free_gpio(unsigned gpio);
 extern int pinctrl_gpio_direction_input(unsigned gpio);
 extern int pinctrl_gpio_direction_output(unsigned gpio);
+extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config);
 
 extern struct pinctrl * __must_check pinctrl_get(struct device *dev);
 extern void pinctrl_put(struct pinctrl *p);
@@ -80,6 +81,11 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio)
 	return 0;
 }
 
+static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config)
+{
+	return 0;
+}
+
 static inline struct pinctrl * __must_check pinctrl_get(struct device *dev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 2956b5d94a76b596fa5057c2b3ca915cb27d7652 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 23 Jan 2017 15:34:34 +0300
Subject: pinctrl / gpio: Introduce .set_config() callback for GPIO chips

Currently we already have two pin configuration related callbacks
available for GPIO chips .set_single_ended() and .set_debounce(). In
future we expect to have even more, which does not scale well if we need
to add yet another callback to the GPIO chip structure for each possible
configuration parameter.

Better solution is to reuse what we already have available in the
generic pinconf.

To support this, we introduce a new .set_config() callback for GPIO
chips. The callback takes a single packed pin configuration value as
parameter. This can then be extended easily beyond what is currently
supported by just adding new types to the generic pinconf enum.

If the GPIO driver is backed up by a pinctrl driver the GPIO driver can
just assign gpiochip_generic_config() (introduced in this patch) to
.set_config and that will take care configuration requests are directed
to the pinctrl driver.

We then convert the existing drivers over .set_config() and finally
remove the .set_single_ended() and .set_debounce() callbacks.

Suggested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h             | 37 +++++++--------------------------
 include/linux/pinctrl/pinconf-generic.h | 33 +++++++++++++----------------
 2 files changed, 23 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c2748accea71..db2022910caf 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -8,6 +8,7 @@
 #include <linux/irqdomain.h>
 #include <linux/lockdep.h>
 #include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf-generic.h>
 
 struct gpio_desc;
 struct of_phandle_args;
@@ -18,18 +19,6 @@ struct module;
 
 #ifdef CONFIG_GPIOLIB
 
-/**
- * enum single_ended_mode - mode for single ended operation
- * @LINE_MODE_PUSH_PULL: normal mode for a GPIO line, drive actively high/low
- * @LINE_MODE_OPEN_DRAIN: set line to be open drain
- * @LINE_MODE_OPEN_SOURCE: set line to be open source
- */
-enum single_ended_mode {
-	LINE_MODE_PUSH_PULL,
-	LINE_MODE_OPEN_DRAIN,
-	LINE_MODE_OPEN_SOURCE,
-};
-
 /**
  * struct gpio_chip - abstract a GPIO controller
  * @label: a functional name for the GPIO device, such as a part
@@ -48,16 +37,8 @@ enum single_ended_mode {
  * @get: returns value for signal "offset", 0=low, 1=high, or negative error
  * @set: assigns output value for signal "offset"
  * @set_multiple: assigns output values for multiple signals defined by "mask"
- * @set_debounce: optional hook for setting debounce time for specified gpio in
- *	interrupt triggered gpio chips
- * @set_single_ended: optional hook for setting a line as open drain, open
- *	source, or non-single ended (restore from open drain/source to normal
- *	push-pull mode) this should be implemented if the hardware supports
- *	open drain or open source settings. The GPIOlib will otherwise try
- *	to emulate open drain/source by not actively driving lines high/low
- *	if a consumer request this. The driver may return -ENOTSUPP if e.g.
- *	it supports just open drain but not open source and is called
- *	with LINE_MODE_OPEN_SOURCE as mode argument.
+ * @set_config: optional hook for all kinds of settings. Uses the same
+ *	packed config format as generic pinconf.
  * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
  *	implementation may not sleep
  * @dbg_show: optional routine to show contents in debugfs; default code
@@ -150,13 +131,9 @@ struct gpio_chip {
 	void			(*set_multiple)(struct gpio_chip *chip,
 						unsigned long *mask,
 						unsigned long *bits);
-	int			(*set_debounce)(struct gpio_chip *chip,
-						unsigned offset,
-						unsigned debounce);
-	int			(*set_single_ended)(struct gpio_chip *chip,
-						unsigned offset,
-						enum single_ended_mode mode);
-
+	int			(*set_config)(struct gpio_chip *chip,
+					      unsigned offset,
+					      unsigned long config);
 	int			(*to_irq)(struct gpio_chip *chip,
 						unsigned offset);
 
@@ -310,6 +287,8 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 
 int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset);
 void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset);
+int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset,
+			    unsigned long config);
 
 #ifdef CONFIG_PINCTRL
 
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 9a09107c890e..7620eb127cff 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -12,12 +12,6 @@
 #ifndef __LINUX_PINCTRL_PINCONF_GENERIC_H
 #define __LINUX_PINCTRL_PINCONF_GENERIC_H
 
-/*
- * You shouldn't even be able to compile with these enums etc unless you're
- * using generic pin config. That is why this is defined out.
- */
-#ifdef CONFIG_GENERIC_PINCONF
-
 /**
  * enum pin_config_param - possible pin configuration parameters
  * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it
@@ -118,18 +112,6 @@ enum pin_config_param {
 	PIN_CONFIG_MAX = 0xFF,
 };
 
-#ifdef CONFIG_DEBUG_FS
-#define PCONFDUMP(a, b, c, d) { .param = a, .display = b, .format = c, \
-				.has_arg = d }
-
-struct pin_config_item {
-	const enum pin_config_param param;
-	const char * const display;
-	const char * const format;
-	bool has_arg;
-};
-#endif /* CONFIG_DEBUG_FS */
-
 /*
  * Helpful configuration macro to be used in tables etc.
  */
@@ -158,6 +140,21 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param
 	return PIN_CONF_PACKED(param, argument);
 }
 
+#ifdef CONFIG_GENERIC_PINCONF
+
+#ifdef CONFIG_DEBUG_FS
+#define PCONFDUMP(a, b, c, d) {					\
+	.param = a, .display = b, .format = c, .has_arg = d	\
+	}
+
+struct pin_config_item {
+	const enum pin_config_param param;
+	const char * const display;
+	const char * const format;
+	bool has_arg;
+};
+#endif /* CONFIG_DEBUG_FS */
+
 #ifdef CONFIG_OF
 
 #include <linux/device.h>
-- 
cgit v1.2.3


From 228c8c6b1f4376788e9d5ab00d50b10228eb40d3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Jan 2017 17:15:44 +0100
Subject: wireless: define cipher/AKM suites using a macro

The spec writes cipher/AKM suites as something like 00-0F-AC:9,
but the part after the colon isn't hex, it's decimal, so that
we've already had a few mistakes (in other code, or unmerged
patches) to e.g. write 0x000FAC10 instead of 0x000FAC0A.

Use a macro to avoid that problem.

Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 87d1937e4671..02768de209d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2324,31 +2324,33 @@ enum ieee80211_sa_query_action {
 };
 
 
+#define SUITE(oui, id)	(((oui) << 8) | (id))
+
 /* cipher suite selectors */
-#define WLAN_CIPHER_SUITE_USE_GROUP	0x000FAC00
-#define WLAN_CIPHER_SUITE_WEP40		0x000FAC01
-#define WLAN_CIPHER_SUITE_TKIP		0x000FAC02
-/* reserved: 				0x000FAC03 */
-#define WLAN_CIPHER_SUITE_CCMP		0x000FAC04
-#define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
-#define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
-#define WLAN_CIPHER_SUITE_GCMP		0x000FAC08
-#define WLAN_CIPHER_SUITE_GCMP_256	0x000FAC09
-#define WLAN_CIPHER_SUITE_CCMP_256	0x000FAC0A
-#define WLAN_CIPHER_SUITE_BIP_GMAC_128	0x000FAC0B
-#define WLAN_CIPHER_SUITE_BIP_GMAC_256	0x000FAC0C
-#define WLAN_CIPHER_SUITE_BIP_CMAC_256	0x000FAC0D
-
-#define WLAN_CIPHER_SUITE_SMS4		0x00147201
+#define WLAN_CIPHER_SUITE_USE_GROUP	SUITE(0x000FAC, 0)
+#define WLAN_CIPHER_SUITE_WEP40		SUITE(0x000FAC, 1)
+#define WLAN_CIPHER_SUITE_TKIP		SUITE(0x000FAC, 2)
+/* reserved: 				SUITE(0x000FAC, 3) */
+#define WLAN_CIPHER_SUITE_CCMP		SUITE(0x000FAC, 4)
+#define WLAN_CIPHER_SUITE_WEP104	SUITE(0x000FAC, 5)
+#define WLAN_CIPHER_SUITE_AES_CMAC	SUITE(0x000FAC, 6)
+#define WLAN_CIPHER_SUITE_GCMP		SUITE(0x000FAC, 8)
+#define WLAN_CIPHER_SUITE_GCMP_256	SUITE(0x000FAC, 9)
+#define WLAN_CIPHER_SUITE_CCMP_256	SUITE(0x000FAC, 10)
+#define WLAN_CIPHER_SUITE_BIP_GMAC_128	SUITE(0x000FAC, 11)
+#define WLAN_CIPHER_SUITE_BIP_GMAC_256	SUITE(0x000FAC, 12)
+#define WLAN_CIPHER_SUITE_BIP_CMAC_256	SUITE(0x000FAC, 13)
+
+#define WLAN_CIPHER_SUITE_SMS4		SUITE(0x001472, 1)
 
 /* AKM suite selectors */
-#define WLAN_AKM_SUITE_8021X		0x000FAC01
-#define WLAN_AKM_SUITE_PSK		0x000FAC02
-#define WLAN_AKM_SUITE_8021X_SHA256	0x000FAC05
-#define WLAN_AKM_SUITE_PSK_SHA256	0x000FAC06
-#define WLAN_AKM_SUITE_TDLS		0x000FAC07
-#define WLAN_AKM_SUITE_SAE		0x000FAC08
-#define WLAN_AKM_SUITE_FT_OVER_SAE	0x000FAC09
+#define WLAN_AKM_SUITE_8021X		SUITE(0x000FAC, 1)
+#define WLAN_AKM_SUITE_PSK		SUITE(0x000FAC, 2)
+#define WLAN_AKM_SUITE_8021X_SHA256	SUITE(0x000FAC, 5)
+#define WLAN_AKM_SUITE_PSK_SHA256	SUITE(0x000FAC, 6)
+#define WLAN_AKM_SUITE_TDLS		SUITE(0x000FAC, 7)
+#define WLAN_AKM_SUITE_SAE		SUITE(0x000FAC, 8)
+#define WLAN_AKM_SUITE_FT_OVER_SAE	SUITE(0x000FAC, 9)
 
 #define WLAN_MAX_KEY_LEN		32
 
-- 
cgit v1.2.3


From 65e251a4634c5644efca6f7e15803f0962d8943d Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 3 Jan 2017 17:34:56 +0000
Subject: iommu: Drop the of_iommu_{set/get}_ops() interface

With the introduction of the new iommu_{register/get}_instance()
interface in commit e4f10ffe4c9b ("iommu: Make of_iommu_set/get_ops() DT
agnostic") (based on struct fwnode_handle as look-up token, so firmware
agnostic) to register IOMMU instances with the core IOMMU layer there is
no reason to keep the old OF based interface around any longer.

Convert all the IOMMU drivers (and OF IOMMU core code) that rely on the
of_iommu_{set/get}_ops() to the new kernel interface to register/retrieve
IOMMU instances and remove the of_iommu_{set/get}_ops() remaining glue
code in order to complete the interface rework.

Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Sricharan R <sricharan@codeaurora.org>
Tested-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/of_iommu.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 6a7fc5051099..13394ac83c66 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,17 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-static inline void of_iommu_set_ops(struct device_node *np,
-				    const struct iommu_ops *ops)
-{
-	iommu_register_instance(&np->fwnode, ops);
-}
-
-static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
-{
-	return iommu_get_instance(&np->fwnode);
-}
-
 extern struct of_device_id __iommu_of_table;
 
 typedef int (*of_iommu_init_fn)(struct device_node *);
-- 
cgit v1.2.3


From 1141d9d08184565b71a943a50ffe712b2dfc697f Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Mon, 23 Jan 2017 12:07:42 -0600
Subject: clk: x86: Add Atom PMC platform clocks

The BayTrail and CherryTrail platforms provide platform clocks
through their Power Management Controller (PMC).

The SoC supports up to 6 clocks (PMC_PLT_CLK[0..5]) with a
frequency of either 19.2 MHz (PLL) or 25 MHz (XTAL) for BayTrail
and a frequency of 19.2 MHz (XTAL) for CherryTrail. These clocks
are available for general system use, where appropriate, and each
have Control & Frequency register fields associated with them.

Port from legacy by Pierre Bossart, integration in clock framework
by Irina Tirdea

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/platform_data/x86/clk-pmc-atom.h | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 include/linux/platform_data/x86/clk-pmc-atom.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h
new file mode 100644
index 000000000000..3ab892208343
--- /dev/null
+++ b/include/linux/platform_data/x86/clk-pmc-atom.h
@@ -0,0 +1,44 @@
+/*
+ * Intel Atom platform clocks for BayTrail and CherryTrail SoC.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Irina Tirdea <irina.tirdea@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PLATFORM_DATA_X86_CLK_PMC_ATOM_H
+#define __PLATFORM_DATA_X86_CLK_PMC_ATOM_H
+
+/**
+ * struct pmc_clk - PMC platform clock configuration
+ *
+ * @name:	identified, typically pmc_plt_clk_<x>, x=[0..5]
+ * @freq:	in Hz, 19.2MHz  and 25MHz (Baytrail only) supported
+ * @parent_name: one of 'xtal' or 'osc'
+ */
+struct pmc_clk {
+	const char *name;
+	unsigned long freq;
+	const char *parent_name;
+};
+
+/**
+ * struct pmc_clk_data - common PMC clock configuration
+ *
+ * @base:	PMC clock register base offset
+ * @clks:	pointer to set of registered clocks, typically 0..5
+ */
+struct pmc_clk_data {
+	void __iomem *base;
+	const struct pmc_clk *clks;
+};
+
+#endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
-- 
cgit v1.2.3


From 80a7581f38c0b2e83dc883a2125340b90b5635ec Mon Sep 17 00:00:00 2001
From: Irina Tirdea <irina.tirdea@intel.com>
Date: Mon, 23 Jan 2017 12:07:43 -0600
Subject: arch/x86/platform/atom: Move pmc_atom to drivers/platform/x86

The pmc_atom driver does not contain any architecture specific
code. It only enables the SoC Power Management Controller driver
for BayTrail and CherryTrail platforms.

Move the pmc_atom driver from arch/x86/platform/atom to
drivers/platform/x86. Also clean-up and reorder include files by
alphabetical order in pmc_atom.h

Signed-off-by: Irina Tirdea <irina.tirdea@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/platform_data/x86/pmc_atom.h | 158 +++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 include/linux/platform_data/x86/pmc_atom.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
new file mode 100644
index 000000000000..aa8744c77c6d
--- /dev/null
+++ b/include/linux/platform_data/x86/pmc_atom.h
@@ -0,0 +1,158 @@
+/*
+ * Intel Atom SOC Power Management Controller Header File
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef PMC_ATOM_H
+#define PMC_ATOM_H
+
+/* ValleyView Power Control Unit PCI Device ID */
+#define	PCI_DEVICE_ID_VLV_PMC	0x0F1C
+/* CherryTrail Power Control Unit PCI Device ID */
+#define	PCI_DEVICE_ID_CHT_PMC	0x229C
+
+/* PMC Memory mapped IO registers */
+#define	PMC_BASE_ADDR_OFFSET	0x44
+#define	PMC_BASE_ADDR_MASK	0xFFFFFE00
+#define	PMC_MMIO_REG_LEN	0x100
+#define	PMC_REG_BIT_WIDTH	32
+
+/* BIOS uses FUNC_DIS to disable specific function */
+#define	PMC_FUNC_DIS		0x34
+#define	PMC_FUNC_DIS_2		0x38
+
+/* CHT specific bits in FUNC_DIS2 register */
+#define	BIT_FD_GMM		BIT(3)
+#define	BIT_FD_ISH		BIT(4)
+
+/* S0ix wake event control */
+#define	PMC_S0IX_WAKE_EN	0x3C
+
+#define	BIT_LPC_CLOCK_RUN		BIT(4)
+#define	BIT_SHARED_IRQ_GPSC		BIT(5)
+#define	BIT_ORED_DEDICATED_IRQ_GPSS	BIT(18)
+#define	BIT_ORED_DEDICATED_IRQ_GPSC	BIT(19)
+#define	BIT_SHARED_IRQ_GPSS		BIT(20)
+
+#define	PMC_WAKE_EN_SETTING	~(BIT_LPC_CLOCK_RUN | \
+				BIT_SHARED_IRQ_GPSC | \
+				BIT_ORED_DEDICATED_IRQ_GPSS | \
+				BIT_ORED_DEDICATED_IRQ_GPSC | \
+				BIT_SHARED_IRQ_GPSS)
+
+/* The timers acumulate time spent in sleep state */
+#define	PMC_S0IR_TMR		0x80
+#define	PMC_S0I1_TMR		0x84
+#define	PMC_S0I2_TMR		0x88
+#define	PMC_S0I3_TMR		0x8C
+#define	PMC_S0_TMR		0x90
+/* Sleep state counter is in units of of 32us */
+#define	PMC_TMR_SHIFT		5
+
+/* Power status of power islands */
+#define	PMC_PSS			0x98
+
+#define PMC_PSS_BIT_GBE			BIT(0)
+#define PMC_PSS_BIT_SATA		BIT(1)
+#define PMC_PSS_BIT_HDA			BIT(2)
+#define PMC_PSS_BIT_SEC			BIT(3)
+#define PMC_PSS_BIT_PCIE		BIT(4)
+#define PMC_PSS_BIT_LPSS		BIT(5)
+#define PMC_PSS_BIT_LPE			BIT(6)
+#define PMC_PSS_BIT_DFX			BIT(7)
+#define PMC_PSS_BIT_USH_CTRL		BIT(8)
+#define PMC_PSS_BIT_USH_SUS		BIT(9)
+#define PMC_PSS_BIT_USH_VCCS		BIT(10)
+#define PMC_PSS_BIT_USH_VCCA		BIT(11)
+#define PMC_PSS_BIT_OTG_CTRL		BIT(12)
+#define PMC_PSS_BIT_OTG_VCCS		BIT(13)
+#define PMC_PSS_BIT_OTG_VCCA_CLK	BIT(14)
+#define PMC_PSS_BIT_OTG_VCCA		BIT(15)
+#define PMC_PSS_BIT_USB			BIT(16)
+#define PMC_PSS_BIT_USB_SUS		BIT(17)
+
+/* CHT specific bits in PSS register */
+#define	PMC_PSS_BIT_CHT_UFS		BIT(7)
+#define	PMC_PSS_BIT_CHT_UXD		BIT(11)
+#define	PMC_PSS_BIT_CHT_UXD_FD		BIT(12)
+#define	PMC_PSS_BIT_CHT_UX_ENG		BIT(15)
+#define	PMC_PSS_BIT_CHT_USB_SUS		BIT(16)
+#define	PMC_PSS_BIT_CHT_GMM		BIT(17)
+#define	PMC_PSS_BIT_CHT_ISH		BIT(18)
+#define	PMC_PSS_BIT_CHT_DFX_MASTER	BIT(26)
+#define	PMC_PSS_BIT_CHT_DFX_CLUSTER1	BIT(27)
+#define	PMC_PSS_BIT_CHT_DFX_CLUSTER2	BIT(28)
+#define	PMC_PSS_BIT_CHT_DFX_CLUSTER3	BIT(29)
+#define	PMC_PSS_BIT_CHT_DFX_CLUSTER4	BIT(30)
+#define	PMC_PSS_BIT_CHT_DFX_CLUSTER5	BIT(31)
+
+/* These registers reflect D3 status of functions */
+#define	PMC_D3_STS_0		0xA0
+
+#define	BIT_LPSS1_F0_DMA	BIT(0)
+#define	BIT_LPSS1_F1_PWM1	BIT(1)
+#define	BIT_LPSS1_F2_PWM2	BIT(2)
+#define	BIT_LPSS1_F3_HSUART1	BIT(3)
+#define	BIT_LPSS1_F4_HSUART2	BIT(4)
+#define	BIT_LPSS1_F5_SPI	BIT(5)
+#define	BIT_LPSS1_F6_XXX	BIT(6)
+#define	BIT_LPSS1_F7_XXX	BIT(7)
+#define	BIT_SCC_EMMC		BIT(8)
+#define	BIT_SCC_SDIO		BIT(9)
+#define	BIT_SCC_SDCARD		BIT(10)
+#define	BIT_SCC_MIPI		BIT(11)
+#define	BIT_HDA			BIT(12)
+#define	BIT_LPE			BIT(13)
+#define	BIT_OTG			BIT(14)
+#define	BIT_USH			BIT(15)
+#define	BIT_GBE			BIT(16)
+#define	BIT_SATA		BIT(17)
+#define	BIT_USB_EHCI		BIT(18)
+#define	BIT_SEC			BIT(19)
+#define	BIT_PCIE_PORT0		BIT(20)
+#define	BIT_PCIE_PORT1		BIT(21)
+#define	BIT_PCIE_PORT2		BIT(22)
+#define	BIT_PCIE_PORT3		BIT(23)
+#define	BIT_LPSS2_F0_DMA	BIT(24)
+#define	BIT_LPSS2_F1_I2C1	BIT(25)
+#define	BIT_LPSS2_F2_I2C2	BIT(26)
+#define	BIT_LPSS2_F3_I2C3	BIT(27)
+#define	BIT_LPSS2_F4_I2C4	BIT(28)
+#define	BIT_LPSS2_F5_I2C5	BIT(29)
+#define	BIT_LPSS2_F6_I2C6	BIT(30)
+#define	BIT_LPSS2_F7_I2C7	BIT(31)
+
+#define	PMC_D3_STS_1		0xA4
+#define	BIT_SMB			BIT(0)
+#define	BIT_OTG_SS_PHY		BIT(1)
+#define	BIT_USH_SS_PHY		BIT(2)
+#define	BIT_DFX			BIT(3)
+
+/* CHT specific bits in PMC_D3_STS_1 register */
+#define	BIT_STS_GMM		BIT(1)
+#define	BIT_STS_ISH		BIT(2)
+
+/* PMC I/O Registers */
+#define	ACPI_BASE_ADDR_OFFSET	0x40
+#define	ACPI_BASE_ADDR_MASK	0xFFFFFE00
+#define	ACPI_MMIO_REG_LEN	0x100
+
+#define	PM1_CNT			0x4
+#define	SLEEP_TYPE_MASK		0xFFFFECFF
+#define	SLEEP_TYPE_S5		0x1C00
+#define	SLEEP_ENABLE		0x2000
+
+extern int pmc_atom_read(int offset, u32 *value);
+extern int pmc_atom_write(int offset, u32 value);
+
+#endif /* PMC_ATOM_H */
-- 
cgit v1.2.3


From 2e6f38cef8838a5edd6051c3110ecf06f37ec544 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 23 Jan 2017 12:07:45 -0600
Subject: platform/x86: fix typo in comment

s/Acumulate/Accumulate/

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 include/linux/platform_data/x86/pmc_atom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h
index aa8744c77c6d..e4905fe69c38 100644
--- a/include/linux/platform_data/x86/pmc_atom.h
+++ b/include/linux/platform_data/x86/pmc_atom.h
@@ -50,7 +50,7 @@
 				BIT_ORED_DEDICATED_IRQ_GPSC | \
 				BIT_SHARED_IRQ_GPSS)
 
-/* The timers acumulate time spent in sleep state */
+/* The timers accumulate time spent in sleep state */
 #define	PMC_S0IR_TMR		0x80
 #define	PMC_S0I1_TMR		0x84
 #define	PMC_S0I2_TMR		0x88
-- 
cgit v1.2.3


From 4cb3e3782776f82400a5d135909dda466b813d45 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 25 Jan 2017 21:34:51 +0200
Subject: soc: samsung: pmu: Remove unused and duplicated defines

The exynos-regs-pmu.h was never a complete list of PMU registers.  It
contained a lot of holes for registers which were not used.  However, a
lot of unused defines came along with porting the code from vendor
kernel.  Few of defines were also duplicated.

Remove them so the file will be slightly smaller.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 72 +++--------------------------
 1 file changed, 7 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index d30186e2b609..9793502a6a57 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -7,7 +7,13 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
-*/
+ *
+ *
+ * Notice:
+ * This is not a list of all Exynos Power Management Unit SFRs.
+ * There are too many of them, not mentioning subtle differences
+ * between SoCs. For now, put here only the used registers.
+ */
 
 #ifndef __LINUX_SOC_EXYNOS_REGS_PMU_H
 #define __LINUX_SOC_EXYNOS_REGS_PMU_H __FILE__
@@ -38,7 +44,6 @@
 #define EXYNOS_CORE_PO_RESET(n)			((1 << 4) << n)
 #define EXYNOS_WAKEUP_FROM_LOWPWR		(1 << 28)
 #define EXYNOS_SWRESET				0x0400
-#define EXYNOS5440_SWRESET			0x00C4
 
 #define S5P_WAKEUP_STAT				0x0600
 #define S5P_EINT_WAKEUP_MASK			0x0604
@@ -136,12 +141,6 @@
 #define EXYNOS_COMMON_OPTION(_nr)		\
 			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
 
-#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
-
-#define EXYNOS_ARM_COMMON_STATUS		0x2504
-#define EXYNOS_COMMON_OPTION(_nr)		\
-			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8)
-
 #define EXYNOS_ARM_L2_CONFIGURATION		0x2600
 #define EXYNOS_L2_CONFIGURATION(_nr)		\
 			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
@@ -149,18 +148,10 @@
 			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
 #define EXYNOS_L2_OPTION(_nr)			\
 			(EXYNOS_L2_CONFIGURATION(_nr) + 0x8)
-#define EXYNOS_L2_COMMON_PWR_EN			0x3
-
-#define EXYNOS_ARM_CORE_X_STATUS_OFFSET		0x4
-
-#define EXYNOS5_APLL_SYSCLK_CONFIGURATION	0x2A00
-#define EXYNOS5_APLL_SYSCLK_STATUS		0x2A04
 
 #define EXYNOS5_ARM_L2_OPTION			0x2608
 #define EXYNOS5_USE_RETENTION			BIT(4)
 
-#define EXYNOS5_L2RSTDISABLE_VALUE		BIT(3)
-
 #define S5P_PAD_RET_MAUDIO_OPTION		0x3028
 #define S5P_PAD_RET_MMC2_OPTION			0x30c8
 #define S5P_PAD_RET_GPIO_OPTION			0x3108
@@ -411,7 +402,6 @@
 #define EXYNOS5_SATA_MEM_SYS_PWR_REG				0x11FC
 #define EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG			0x1200
 #define EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG			0x1204
-#define EXYNOS5_PAD_RETENTION_EFNAND_SYS_PWR_REG		0x1208
 #define EXYNOS5_PAD_RETENTION_GPIO_SYS_PWR_REG			0x1220
 #define EXYNOS5_PAD_RETENTION_UART_SYS_PWR_REG			0x1224
 #define EXYNOS5_PAD_RETENTION_MMCA_SYS_PWR_REG			0x1228
@@ -485,7 +475,6 @@
 #define EXYNOS5420_SWRESET_KFC_SEL				0x3
 
 /* Only for EXYNOS5420 */
-#define EXYNOS5420_ISP_ARM_OPTION				0x2488
 #define EXYNOS5420_L2RSTDISABLE_VALUE				BIT(3)
 
 #define EXYNOS5420_LPI_MASK					0x0004
@@ -494,9 +483,6 @@
 #define EXYNOS5420_ATB_KFC					BIT(13)
 #define EXYNOS5420_ATB_ISP_ARM					BIT(19)
 #define EXYNOS5420_EMULATION					BIT(31)
-#define ATB_ISP_ARM						BIT(12)
-#define ATB_KFC							BIT(13)
-#define ATB_NOC							BIT(14)
 
 #define EXYNOS5420_ARM_INTR_SPREAD_ENABLE			0x0100
 #define EXYNOS5420_ARM_INTR_SPREAD_USE_STANDBYWFI		0x0104
@@ -510,11 +496,6 @@
 #define EXYNOS5420_KFC_CORE_RESET(_nr)				\
 	((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr))
 
-#define EXYNOS5420_BB_CON1					0x0784
-#define EXYNOS5420_BB_SEL_EN					BIT(31)
-#define EXYNOS5420_BB_PMOS_EN					BIT(7)
-#define EXYNOS5420_BB_1300X					0XF
-
 #define EXYNOS5420_ARM_CORE2_SYS_PWR_REG			0x1020
 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG		0x1024
 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG	0x1028
@@ -546,15 +527,6 @@
 #define EXYNOS5420_SPLL_SYSCLK_SYS_PWR_REG                      0x1178
 #define EXYNOS5420_INTRAM_MEM_SYS_PWR_REG                       0x11B8
 #define EXYNOS5420_INTROM_MEM_SYS_PWR_REG                       0x11BC
-#define EXYNOS5420_ONENANDXL_MEM_SYS_PWR			0x11C0
-#define EXYNOS5420_USBDEV_MEM_SYS_PWR				0x11CC
-#define EXYNOS5420_USBDEV1_MEM_SYS_PWR				0x11D0
-#define EXYNOS5420_SDMMC_MEM_SYS_PWR				0x11D4
-#define EXYNOS5420_CSSYS_MEM_SYS_PWR				0x11D8
-#define EXYNOS5420_SECSS_MEM_SYS_PWR				0x11DC
-#define EXYNOS5420_ROTATOR_MEM_SYS_PWR				0x11E0
-#define EXYNOS5420_INTRAM_MEM_SYS_PWR				0x11E4
-#define EXYNOS5420_INTROM_MEM_SYS_PWR				0x11E8
 #define EXYNOS5420_PAD_RETENTION_JTAG_SYS_PWR_REG		0x1208
 #define EXYNOS5420_PAD_RETENTION_DRAM_SYS_PWR_REG		0x1210
 #define EXYNOS5420_PAD_RETENTION_UART_SYS_PWR_REG		0x1214
@@ -605,13 +577,7 @@
 #define EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG			0x159C
 #define EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG			0x15A0
 #define EXYNOS5420_SFR_AXI_CGDIS1				0x15E4
-#define EXYNOS_ARM_CORE2_CONFIGURATION				0x2100
-#define EXYNOS5420_ARM_CORE2_OPTION				0x2108
-#define EXYNOS_ARM_CORE3_CONFIGURATION				0x2180
-#define EXYNOS5420_ARM_CORE3_OPTION				0x2188
-#define EXYNOS5420_ARM_COMMON_STATUS				0x2504
 #define EXYNOS5420_ARM_COMMON_OPTION				0x2508
-#define EXYNOS5420_KFC_COMMON_STATUS				0x2584
 #define EXYNOS5420_KFC_COMMON_OPTION				0x2588
 #define EXYNOS5420_LOGIC_RESET_DURATION3			0x2D1C
 
@@ -626,33 +592,9 @@
 #define EXYNOS_PAD_RET_DRAM_OPTION				0x3008
 #define EXYNOS_PAD_RET_MAUDIO_OPTION				0x3028
 #define EXYNOS_PAD_RET_JTAG_OPTION				0x3048
-#define EXYNOS_PAD_RET_GPIO_OPTION				0x3108
-#define EXYNOS_PAD_RET_UART_OPTION				0x3128
-#define EXYNOS_PAD_RET_MMCA_OPTION				0x3148
-#define EXYNOS_PAD_RET_MMCB_OPTION				0x3168
 #define EXYNOS_PAD_RET_EBIA_OPTION				0x3188
 #define EXYNOS_PAD_RET_EBIB_OPTION				0x31A8
 
-#define EXYNOS_PS_HOLD_CONTROL					0x330C
-
-/* For SYS_PWR_REG */
-#define EXYNOS_SYS_PWR_CFG					BIT(0)
-
-#define EXYNOS5420_MFC_CONFIGURATION				0x4060
-#define EXYNOS5420_MFC_STATUS					0x4064
-#define EXYNOS5420_MFC_OPTION					0x4068
-#define EXYNOS5420_G3D_CONFIGURATION				0x4080
-#define EXYNOS5420_G3D_STATUS					0x4084
-#define EXYNOS5420_G3D_OPTION					0x4088
-#define EXYNOS5420_DISP0_CONFIGURATION				0x40A0
-#define EXYNOS5420_DISP0_STATUS					0x40A4
-#define EXYNOS5420_DISP0_OPTION					0x40A8
-#define EXYNOS5420_DISP1_CONFIGURATION				0x40C0
-#define EXYNOS5420_DISP1_STATUS					0x40C4
-#define EXYNOS5420_DISP1_OPTION					0x40C8
-#define EXYNOS5420_MAU_CONFIGURATION				0x40E0
-#define EXYNOS5420_MAU_STATUS					0x40E4
-#define EXYNOS5420_MAU_OPTION					0x40E8
 #define EXYNOS5420_FSYS2_OPTION					0x4168
 #define EXYNOS5420_PSGEN_OPTION					0x4188
 
-- 
cgit v1.2.3


From ee55ae6194a5439bde3a3b8ee0abda63c610e740 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 25 Jan 2017 21:09:44 +0200
Subject: soc: samsung: pmu: Remove duplicated define for ARM_L2_OPTION
 register

The register ARM_L2_OPTION (0x2608 in Exynos4 and Exynos5 PMU) was
defined twice.  Both names were used in the Exynos542x code.  Simplify
this.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index 9793502a6a57..9786c62d7159 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -149,8 +149,7 @@
 #define EXYNOS_L2_OPTION(_nr)			\
 			(EXYNOS_L2_CONFIGURATION(_nr) + 0x8)
 
-#define EXYNOS5_ARM_L2_OPTION			0x2608
-#define EXYNOS5_USE_RETENTION			BIT(4)
+#define EXYNOS_L2_USE_RETENTION			BIT(4)
 
 #define S5P_PAD_RET_MAUDIO_OPTION		0x3028
 #define S5P_PAD_RET_MMC2_OPTION			0x30c8
-- 
cgit v1.2.3


From 4f497a4310646f14ab518ab6108dbef07f4606af Mon Sep 17 00:00:00 2001
From: Corentin LABBE <clabbe.montjoie@gmail.com>
Date: Thu, 15 Dec 2016 11:15:25 +0100
Subject: PM / QoS: Remove unneeded linux/miscdevice.h include

pm_qos.h does not use any miscdevice, so this patch
remove this unnecessary inclusion.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_qos.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 0f65d36c2a75..d4d34791e463 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -6,7 +6,6 @@
  */
 #include <linux/plist.h>
 #include <linux/notifier.h>
-#include <linux/miscdevice.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
 
-- 
cgit v1.2.3


From 3aa26a3b2ea63c4d09420e74421370655aa1cf8f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 2 Jan 2017 14:41:02 +0530
Subject: PM / OPP: Rename dev_pm_opp_get_suspend_opp() and return OPP rate

There is only one user of dev_pm_opp_get_suspend_opp() and that uses it
to get the OPP rate for the suspend_opp.

Rename dev_pm_opp_get_suspend_opp() as dev_pm_opp_get_suspend_opp_freq()
and return the rate directly from it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0edd88f93904..7483ff291a8e 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -88,7 +88,7 @@ int dev_pm_opp_get_opp_count(struct device *dev);
 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev);
 unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev);
 unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev);
-struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev);
+unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
@@ -159,9 +159,9 @@ static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device
 	return 0;
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
+static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
 {
-	return NULL;
+	return 0;
 }
 
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
-- 
cgit v1.2.3


From dc2c9ad52af45ebecf9743aacb1916ebb2f1e848 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 2 Jan 2017 14:41:03 +0530
Subject: PM / OPP: Don't expose srcu_head to register notifiers

Let the OPP core provide helpers to register notifiers for any device,
instead of exposing srcu_head outside of the core.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 7483ff291a8e..66a02deeb03f 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -108,7 +108,9 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 
 int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 
-struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
+int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb);
+int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb);
+
 int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 				unsigned int count);
 void dev_pm_opp_put_supported_hw(struct device *dev);
@@ -202,10 +204,14 @@ static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 	return 0;
 }
 
-static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
-							struct device *dev)
+static inline int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return -ENOTSUPP;
+}
+
+static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb)
+{
+	return -ENOTSUPP;
 }
 
 static inline int dev_pm_opp_set_supported_hw(struct device *dev,
-- 
cgit v1.2.3


From 6a4bc2b4c3c9afd643e8da44dd2318a5c5f22cd0 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 26 Jan 2017 14:44:17 -0800
Subject: net: Fix ndo_setup_tc comment

Commit 16e5cc647173 ("net: rework setup_tc ndo op to consume
general tc operand") changed the ndo_setup_tc() signature, but did not
update the comments in netdevice.h, so do that now.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3868c32d98af..d63cacb67ea6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -964,11 +964,12 @@ struct netdev_xdp {
  *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
- * 	Called to setup 'tc' number of traffic classes in the net device. This
- * 	is always called from the stack with the rtnl lock held and netif tx
- * 	queues stopped. This allows the netdevice to perform queue management
- * 	safely.
+ * int (*ndo_setup_tc)(struct net_device *dev, u32 handle,
+ *		       __be16 protocol, struct tc_to_netdev *tc);
+ *	Called to setup any 'tc' scheduler, classifier or action on @dev.
+ *	This is always called from the stack with the rtnl lock held and netif
+ *	tx queues stopped. This allows the netdevice to perform queue
+ *	management safely.
  *
  *	Fiber Channel over Ethernet (FCoE) offload functions.
  * int (*ndo_fcoe_enable)(struct net_device *dev);
-- 
cgit v1.2.3


From 07e4fead45e6e1932f0b960655ab554b6aab6a08 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 25 Jan 2017 08:06:40 -0800
Subject: blk-mq: create debugfs directory tree

In preparation for putting blk-mq debugging information in debugfs,
create a directory tree mirroring the one in sysfs:

    # tree -d /sys/kernel/debug/block
    /sys/kernel/debug/block
    |-- nvme0n1
    |   `-- mq
    |       |-- 0
    |       |   `-- cpu0
    |       |-- 1
    |       |   `-- cpu1
    |       |-- 2
    |       |   `-- cpu2
    |       `-- 3
    |           `-- cpu3
    `-- vda
        `-- mq
            `-- 0
                |-- cpu0
                |-- cpu1
                |-- cpu2
                `-- cpu3

Also add the scaffolding for the actual files that will go in here,
either under the hardware queue or software queue directories.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25564857f5f8..0ee283f3cffe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -571,6 +571,11 @@ struct request_queue {
 	struct list_head	tag_set_list;
 	struct bio_set		*bio_split;
 
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		*debugfs_dir;
+	struct dentry		*mq_debugfs_dir;
+#endif
+
 	bool			mq_sysfs_init_done;
 };
 
-- 
cgit v1.2.3


From 24af1ccfe12adddbe17d11801e1689791a4cc282 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 25 Jan 2017 14:32:13 -0800
Subject: sbitmap: add helpers for dumping to a seq_file

This is useful debugging information that will be used in the blk-mq
debugfs directory.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>

Changed 'weight' to 'busy'.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/sbitmap.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index f017fd6e69c4..d4e0a204c118 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -258,6 +258,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
 
 unsigned int sbitmap_weight(const struct sbitmap *sb);
 
+/**
+ * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
+ * @sb: Bitmap to show.
+ * @m: struct seq_file to write to.
+ *
+ * This is intended for debugging. The format may change at any time.
+ */
+void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
+
+/**
+ * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
+ * seq_file.
+ * @sb: Bitmap to show.
+ * @m: struct seq_file to write to.
+ *
+ * This is intended for debugging. The output isn't guaranteed to be internally
+ * consistent.
+ */
+void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
+
 /**
  * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
  * memory node.
@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
  */
 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
 
+/**
+ * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
+ * seq_file.
+ * @sbq: Bitmap queue to show.
+ * @m: struct seq_file to write to.
+ *
+ * This is intended for debugging. The format may change at any time.
+ */
+void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
+
 #endif /* __LINUX_SCALE_BITMAP_H */
-- 
cgit v1.2.3


From 50e1dab86aa2c10cbca2f754aae9542169403141 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 26 Jan 2017 14:42:34 -0700
Subject: blk-mq-sched: fix starvation for multiple hardware queues and shared
 tags

If we have both multiple hardware queues and shared tag map between
devices, we need to ensure that we propagate the hardware queue
restart bit higher up. This is because we can get into a situation
where we don't have any IO pending on a hardware queue, yet we fail
getting a tag to start new IO. If that happens, it's not enough to
mark the hardware queue as needing a restart, we need to bubble
that up to the higher level queue as well.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Tested-by: Hannes Reinecke <hare@suse.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0ee283f3cffe..883b8abe4305 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -607,6 +607,7 @@ struct request_queue {
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
+#define QUEUE_FLAG_RESTART     28	/* queue needs restart at completion */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit v1.2.3


From c13660a08c8b3bb49def4374bfd414aaaa564662 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 26 Jan 2017 12:40:07 -0700
Subject: blk-mq-sched: change ->dispatch_requests() to ->dispatch_request()

When we invoke dispatch_requests(), the scheduler empties everything
into the passed in list. This isn't always a good thing, since it
means that we remove items that we could have potentially merged
with.

Change the function to dispatch single requests at the time. If
we do that, we can backoff exactly at the point where the device
can't consume more IO, and leave the rest with the scheduler for
better merging and future dispatch decision making.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Tested-by: Hannes Reinecke <hare@suse.com>
---
 include/linux/elevator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ecb96fd67c6d..b5825c4f06f7 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -92,7 +92,7 @@ struct elevator_mq_ops {
 	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
 	void (*put_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
-	void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *);
+	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
 	void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
 	void (*started_request)(struct request *);
-- 
cgit v1.2.3


From d35a00b8e33dab7385f724e713ae71c8be0a49f4 Mon Sep 17 00:00:00 2001
From: Felix Jia <felix.jia@alliedtelesis.co.nz>
Date: Thu, 26 Jan 2017 16:59:17 +1300
Subject: net/ipv6: allow sysctl to change link-local address generation mode

The address generation mode for IPv6 link-local can only be configured
by netlink messages. This patch adds the ability to change the address
generation mode via sysctl.

v1 -> v2
Removed the rtnl lock and switch to use RCU lock to iterate through
the netdev list.

v2 -> v3
Removed the addrgenmode variable from the idev structure and use the
systcl storage for the flag.

Simplifed the logic for sysctl handling by removing the supported
for all operation.

Added support for more types of tunnel interfaces for link-local
address generation.

Based the patches from net-next.

v3 -> v4
Removed unnecessary whitespace changes.

Signed-off-by: Felix Jia <felix.jia@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 671d014e6429..71be5b330d21 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -69,6 +69,7 @@ struct ipv6_devconf {
 	__s32		seg6_require_hmac;
 #endif
 	__u32		enhanced_dad;
+	__u32		addr_gen_mode;
 
 	struct ctl_table_header *sysctl_header;
 };
-- 
cgit v1.2.3


From f73f44eb00cb136990cfb7d40e436c13d7669ec8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Jan 2017 08:30:47 -0700
Subject: block: add a op_is_flush helper

This centralizes the checks for bios that needs to be go into the flush
state machine.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0e5b1cd5113c..37c9a43c5e78 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -220,6 +220,15 @@ static inline bool op_is_write(unsigned int op)
 	return (op & 1);
 }
 
+/*
+ * Check if the bio or request is one that needs special treatment in the
+ * flush state machine.
+ */
+static inline bool op_is_flush(unsigned int op)
+{
+	return op & (REQ_FUA | REQ_PREFLUSH);
+}
+
 /*
  * Reads are always treated as synchronous, as are requests with the FUA or
  * PREFLUSH flag.  Other operations may be marked as synchronous using the
-- 
cgit v1.2.3


From 9d162ed69f51cbd9ee5a0c7e82aba7acc96362ff Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Fri, 27 Jan 2017 08:46:23 +0100
Subject: net: phy: micrel: add support for KSZ8795

This is adds support for the PHYs in the KSZ8795 5port managed switch.

It will allow to detect the link between the switch and the soc
and uses the same read_status functions as the KSZ8873MLL switch.

Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 257173e0095e..f541da68d1e7 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -35,6 +35,8 @@
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
 
+#define PHY_ID_KSZ8795		0x00221550
+
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
 #define MICREL_PHY_FXEN		0x00000002
-- 
cgit v1.2.3


From 0fc9ae107669760c2a8658cb5b5876dbe525e08d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 27 Jan 2017 14:07:01 +0100
Subject: net: phy: broadcom: add support for BCM54210E
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's Broadcom PHY simply described as single-port
RGMII 10/100/1000BASE-T PHY. It requires disabling delay skew and GTXCLK
bits.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index cf93f1399d3e..5881d1fdc1fb 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -17,6 +17,7 @@
 #define PHY_ID_BCM5482			0x0143bcb0
 #define PHY_ID_BCM5411			0x00206070
 #define PHY_ID_BCM5421			0x002060e0
+#define PHY_ID_BCM54210E		0x600d84a0
 #define PHY_ID_BCM5464			0x002060b0
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM54612E		0x03625e60
-- 
cgit v1.2.3


From 2b368b234ec43abbf46f2983478c438d36e58134 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 27 Jan 2017 15:26:32 +0100
Subject: net: wan: Remove unused stats member from struct frad_local

The stats member of struct frad_locl is used neither by the dlci nor the
sdla driver, so it might as well be removed.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_frad.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 4316aa173dde..46df7e565d6f 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -66,8 +66,6 @@ struct dlci_local
 
 struct frad_local
 {
-   struct net_device_stats stats;
-
    /* devices which this FRAD is slaved to */
    struct net_device     *master[CONFIG_DLCI_MAX];
    short             dlci[CONFIG_DLCI_MAX];
-- 
cgit v1.2.3


From f617f27653c4d9f5b2aa43d567ac0405df889944 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 27 Jan 2017 15:02:26 +0000
Subject: net: implement netif_cond_dbg macro

For reporting things that may or may not be serious, depending on some
 condition, netif_cond_dbg will check the condition and print the report
 at either dbg (if the condition is true) or the specified level.

Suggested-by: Jon Cooper <jcooper@solarflare.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d63cacb67ea6..9511e5a1cfc6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4347,6 +4347,15 @@ do {								\
 })
 #endif
 
+/* if @cond then downgrade to debug, else print at @level */
+#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...)     \
+	do {                                                              \
+		if (cond)                                                 \
+			netif_dbg(priv, type, netdev, fmt, ##args);       \
+		else                                                      \
+			netif_ ## level(priv, type, netdev, fmt, ##args); \
+	} while (0)
+
 #if defined(VERBOSE_DEBUG)
 #define netif_vdbg	netif_dbg
 #else
-- 
cgit v1.2.3


From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 6 Jan 2017 19:32:01 +0100
Subject: random: use chacha20 for get_random_int/long

Now that our crng uses chacha20, we can rely on its speedy
characteristics for replacing MD5, while simultaneously achieving a
higher security guarantee. Before the idea was to use these functions if
you wanted random integers that aren't stupidly insecure but aren't
necessarily secure either, a vague gray zone, that hopefully was "good
enough" for its users. With chacha20, we can strengthen this claim,
since either we're using an rdrand-like instruction, or we're using the
same crng as /dev/urandom. And it's faster than what was before.

We could have chosen to replace this with a SipHash-derived function,
which might be slightly faster, but at the cost of having yet another
RNG construction in the kernel. By moving to chacha20, we have a single
RNG to analyze and verify, and we also already get good performance
improvements on all platforms.

Implementation-wise, rather than use a generic buffer for both
get_random_int/long and memcpy based on the size needs, we use a
specific buffer for 32-bit reads and for 64-bit reads. This way, we're
guaranteed to always have aligned accesses on all platforms. While
slightly more verbose in C, the assembly this generates is a lot
simpler than otherwise.

Finally, on 32-bit platforms where longs and ints are the same size,
we simply alias get_random_int to get_random_long.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Suggested-by: Theodore Ts'o <tytso@mit.edu>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/random.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 7bd2403e4fef..16ab429735a7 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
-- 
cgit v1.2.3


From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 22 Jan 2017 16:34:08 +0100
Subject: random: convert get_random_int/long into get_random_u32/u64

Many times, when a user wants a random number, he wants a random number
of a guaranteed size. So, thinking of get_random_int and get_random_long
in terms of get_random_u32 and get_random_u64 makes it much easier to
achieve this. It also makes the code simpler.

On 32-bit platforms, get_random_int and get_random_long are both aliased
to get_random_u32. On 64-bit platforms, int->u32 and long->u64.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/random.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 16ab429735a7..ed5c3838780d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes);
 extern const struct file_operations random_fops, urandom_fops;
 #endif
 
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+	return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+	return get_random_u64();
+#else
+	return get_random_u32();
+#endif
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
-- 
cgit v1.2.3


From b18b6a9cef7f30e9a8b7738d5fc8d568cf660855 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Sat, 21 Jan 2017 00:09:08 -0500
Subject: timers: Omit POSIX timer stuff from task_struct when disabled

When CONFIG_POSIX_TIMERS is disabled, it is preferable to remove related
structures from struct task_struct and struct signal_struct as they
won't contain anything useful and shouldn't be relied upon by mistake.
Code still referencing those structures is also disabled here.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/init_task.h | 40 +++++++++++++++++++++++++---------------
 include/linux/sched.h     | 13 ++++++++++---
 2 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 325f649d77ff..3a85d61f7614 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -42,6 +42,27 @@ extern struct fs_struct init_fs;
 #define INIT_PREV_CPUTIME(x)
 #endif
 
+#ifdef CONFIG_POSIX_TIMERS
+#define INIT_POSIX_TIMERS(s)						\
+	.posix_timers = LIST_HEAD_INIT(s.posix_timers),
+#define INIT_CPU_TIMERS(s)						\
+	.cpu_timers = {							\
+		LIST_HEAD_INIT(s.cpu_timers[0]),			\
+		LIST_HEAD_INIT(s.cpu_timers[1]),			\
+		LIST_HEAD_INIT(s.cpu_timers[2]),								\
+	},
+#define INIT_CPUTIMER(s)						\
+	.cputimer	= { 						\
+		.cputime_atomic	= INIT_CPUTIME_ATOMIC,			\
+		.running	= false,				\
+		.checking_timer = false,				\
+	},
+#else
+#define INIT_POSIX_TIMERS(s)
+#define INIT_CPU_TIMERS(s)
+#define INIT_CPUTIMER(s)
+#endif
+
 #define INIT_SIGNALS(sig) {						\
 	.nr_threads	= 1,						\
 	.thread_head	= LIST_HEAD_INIT(init_task.thread_node),	\
@@ -49,14 +70,10 @@ extern struct fs_struct init_fs;
 	.shared_pending	= { 						\
 		.list = LIST_HEAD_INIT(sig.shared_pending.list),	\
 		.signal =  {{0}}},					\
-	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
-	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
+	INIT_POSIX_TIMERS(sig)						\
+	INIT_CPU_TIMERS(sig)						\
 	.rlim		= INIT_RLIMITS,					\
-	.cputimer	= { 						\
-		.cputime_atomic	= INIT_CPUTIME_ATOMIC,			\
-		.running	= false,				\
-		.checking_timer = false,				\
-	},								\
+	INIT_CPUTIMER(sig)						\
 	INIT_PREV_CPUTIME(sig)						\
 	.cred_guard_mutex =						\
 		 __MUTEX_INITIALIZER(sig.cred_guard_mutex),		\
@@ -247,7 +264,7 @@ extern struct task_group root_task_group;
 	.blocked	= {{0}},					\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
 	.journal_info	= NULL,						\
-	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
+	INIT_CPU_TIMERS(tsk)						\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
 	.timer_slack_ns = 50000, /* 50 usec default slack */		\
 	.pids = {							\
@@ -274,13 +291,6 @@ extern struct task_group root_task_group;
 }
 
 
-#define INIT_CPU_TIMERS(cpu_timers)					\
-{									\
-	LIST_HEAD_INIT(cpu_timers[0]),					\
-	LIST_HEAD_INIT(cpu_timers[1]),					\
-	LIST_HEAD_INIT(cpu_timers[2]),					\
-}
-
 /* Attach to the init_task data structure for proper alignment */
 #define __init_task_data __attribute__((__section__(".data..init_task")))
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d1905245c7a..e8f6af59726d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,13 +734,14 @@ struct signal_struct {
 	unsigned int		is_child_subreaper:1;
 	unsigned int		has_child_subreaper:1;
 
+#ifdef CONFIG_POSIX_TIMERS
+
 	/* POSIX.1b Interval Timers */
 	int			posix_timer_id;
 	struct list_head	posix_timers;
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
-	struct pid *leader_pid;
 	ktime_t it_real_incr;
 
 	/*
@@ -759,12 +760,16 @@ struct signal_struct {
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
 
+	struct list_head cpu_timers[3];
+
+#endif
+
+	struct pid *leader_pid;
+
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
 #endif
 
-	struct list_head cpu_timers[3];
-
 	struct pid *tty_old_pgrp;
 
 	/* boolean value for session group leader */
@@ -1681,8 +1686,10 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
+#ifdef CONFIG_POSIX_TIMERS
 	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
+#endif
 
 /* process credentials */
 	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
-- 
cgit v1.2.3


From 5ea708d15a928f7a479987704203616d3274c03b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jan 2017 14:52:44 +0300
Subject: block: simplify blk_init_allocated_queue

Return an errno value instead of the passed in queue so that the callers
don't have to keep track of two queues, and move the assignment of the
request_fn and lock to the caller as passing them as argument doesn't
simplify anything.  While we're at it also remove two pointless NULL
assignments, given that the request structure is zeroed on allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 05675b1dfd20..6b1efc5760ea 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1137,8 +1137,7 @@ extern void blk_unprep_request(struct request *);
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
-extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
-						      request_fn_proc *, spinlock_t *);
+extern int blk_init_allocated_queue(struct request_queue *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-- 
cgit v1.2.3


From 6d247d7f71d1fa4b66a5f4da7b1daa21510d529b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Jan 2017 09:51:45 -0700
Subject: block: allow specifying size for extra command data

This mirrors the blk-mq capabilities to allocate extra drivers-specific
data behind struct request by setting a cmd_size field, as well as having
a constructor / destructor for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6b1efc5760ea..461b7cf6af1d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -273,6 +273,8 @@ typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 typedef int (bsg_job_fn) (struct bsg_job *);
+typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
+typedef void (exit_rq_fn)(struct request_queue *, struct request *);
 
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
@@ -408,6 +410,8 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	init_rq_fn		*init_rq_fn;
+	exit_rq_fn		*exit_rq_fn;
 
 	const struct blk_mq_ops	*mq_ops;
 
@@ -577,6 +581,9 @@ struct request_queue {
 #endif
 
 	bool			mq_sysfs_init_done;
+
+	size_t			cmd_size;
+	void			*rq_alloc_data;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From 48b77ad6084481ef9330a5d2bee289966da0975b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Jan 2017 09:35:54 +0100
Subject: block: cleanup tracing

A couple tweaks to the tracing code:

 - trace the request size for all requests
 - trace request sector and nr_sectors only for fs requests, enforced by
   helpers
 - drop SCSI CDB tracing - we have SCSI tracing for this and are going
   to me the CDB out of the generic struct request soon.

With this the tracing code stops to know about BLOCK_PC requests entirely,
it's just FS vs passthrough requests now, where the latter includes any
driver-private requests.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blktrace_api.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index e417f080219a..a143a67a6f33 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -110,16 +110,16 @@ struct compat_blk_user_trace_setup {
 
 #endif
 
-#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
+extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
-static inline int blk_cmd_buf_len(struct request *rq)
+static inline sector_t blk_rq_trace_sector(struct request *rq)
 {
-	return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1;
+	return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_pos(rq);
 }
 
-extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
-
-#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
+static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)
+{
+	return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_sectors(rq);
+}
 
 #endif
-- 
cgit v1.2.3


From eb8db831be80692bf4bda3dfc55001daf64ec299 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 22 Jan 2017 18:32:46 +0100
Subject: dm: always defer request allocation to the owner of the request_queue

DM already calls blk_mq_alloc_request on the request_queue of the
underlying device if it is a blk-mq device.  But now that we allow drivers
to allocate additional data and initialize it ahead of time we need to do
the same for all drivers.   Doing so and using the new cmd_size
infrastructure in the block layer greatly simplifies the dm-rq and mpath
code, and should also make arbitrary combinations of SQ and MQ devices
with SQ or MQ device mapper tables easily possible as a further step.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/device-mapper.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ef7962e84444..a7e6903866fd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  * = 2: The target wants to push back the io
  */
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
-typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
-				  union map_info *map_context);
 typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
 					    struct request *rq,
 					    union map_info *map_context,
@@ -163,7 +161,6 @@ struct target_type {
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
-	dm_map_request_fn map_rq;
 	dm_clone_and_map_request_fn clone_and_map_rq;
 	dm_release_clone_request_fn release_clone_rq;
 	dm_endio_fn end_io;
-- 
cgit v1.2.3


From 8ae94eb65be9425af4d57a4f4cfebfdf03081e93 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jan 2017 15:25:02 +0300
Subject: block/bsg: move queue creation into bsg_setup_queue

Simply the boilerplate code needed for bsg nodes a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bsg-lib.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 657a718c27d2..e34dde2da0ef 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -66,9 +66,8 @@ struct bsg_job {
 
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len);
-int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
-		    bsg_job_fn *job_fn, int dd_job_size);
-void bsg_request_fn(struct request_queue *q);
+struct request_queue *bsg_setup_queue(struct device *dev, char *name,
+		bsg_job_fn *job_fn, int dd_job_size);
 void bsg_job_put(struct bsg_job *job);
 int __must_check bsg_job_get(struct bsg_job *job);
 
-- 
cgit v1.2.3


From 82ed4db499b8598f16f8871261bff088d6b0597f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Jan 2017 09:46:29 +0100
Subject: block: split scsi_request out of struct request

And require all drivers that want to support BLOCK_PC to allocate it
as the first thing of their private data.  To support this the legacy
IDE and BSG code is switched to set cmd_size on their queues to let
the block layer allocate the additional space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 13 -------------
 include/linux/ide.h    |  8 +++++++-
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 461b7cf6af1d..e4c5f284fe2d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -128,8 +128,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_NOMERGE_FLAGS \
 	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
-#define BLK_MAX_CDB	16
-
 /*
  * Try to put the fields that are referenced together in the same cacheline.
  *
@@ -227,17 +225,7 @@ struct request {
 
 	int errors;
 
-	/*
-	 * when request is used as a packet command carrier
-	 */
-	unsigned char __cmd[BLK_MAX_CDB];
-	unsigned char *cmd;
-	unsigned short cmd_len;
-
 	unsigned int extra_len;	/* length of alignment and padding */
-	unsigned int sense_len;
-	unsigned int resid_len;	/* residual count */
-	void *sense;
 
 	unsigned long deadline;
 	struct list_head timeout_list;
@@ -925,7 +913,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
-extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a633898f36ac..086fbe172817 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -20,6 +20,7 @@
 #include <linux/mutex.h>
 /* for request_sense */
 #include <linux/cdrom.h>
+#include <scsi/scsi_cmnd.h>
 #include <asm/byteorder.h>
 #include <asm/io.h>
 
@@ -52,6 +53,11 @@ enum ata_cmd_type_bits {
 	((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
 	 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
 
+struct ide_request {
+	struct scsi_request sreq;
+	u8 sense[SCSI_SENSE_BUFFERSIZE];
+};
+
 /* Error codes returned in rq->errors to the higher part of the driver. */
 enum {
 	IDE_DRV_ERROR_GENERAL	= 101,
@@ -579,7 +585,7 @@ struct ide_drive_s {
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
-	struct request sense_rq;
+	struct request *sense_rq;
 	struct request_sense sense_data;
 };
 
-- 
cgit v1.2.3


From 15f2e88ddd4bc9b2c6b6236162993b5caa80abb9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 14:46:09 -0500
Subject: radix tree: Add some implicit includes

We were using spinlock_t and INIT_LIST_HEAD without including spinlock.h
or list.h.  They were being implicitly included through some other header
file, but that's fragile.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 52bda854593b..13d8d741ca34 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -22,11 +22,13 @@
 #define _LINUX_RADIX_TREE_H
 
 #include <linux/bitops.h>
-#include <linux/preempt.h>
-#include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/preempt.h>
 #include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
-- 
cgit v1.2.3


From 35534c869c62f59203c1822769bbef14e894a9e9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 17:43:19 -0500
Subject: radix tree: constify some pointers

If we're just getting the value of a tag, or looking up an entry,
we won't modify the radix tree, so we can declare these functions as
taking a const pointer.  Mostly for documentation purposes, though it
might help code generation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 13d8d741ca34..32683c7c2e0d 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -125,7 +125,7 @@ do {									\
 	(root)->rnode = NULL;						\
 } while (0)
 
-static inline bool radix_tree_empty(struct radix_tree_root *root)
+static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
 	return root->rnode == NULL;
 }
@@ -294,10 +294,10 @@ static inline int radix_tree_insert(struct radix_tree_root *root,
 {
 	return __radix_tree_insert(root, index, 0, entry);
 }
-void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
+void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 			  struct radix_tree_node **nodep, void ***slotp);
-void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long);
 typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
@@ -316,10 +316,10 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 void radix_tree_clear_tags(struct radix_tree_root *root,
 			   struct radix_tree_node *node,
 			   void **slot);
-unsigned int radix_tree_gang_lookup(struct radix_tree_root *root,
+unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
 			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
@@ -330,19 +330,19 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
-int radix_tree_tag_get(struct radix_tree_root *root,
+int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 void radix_tree_iter_tag_set(struct radix_tree_root *root,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
 unsigned int
-radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
-int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
@@ -395,7 +395,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  * Also it fills @iter with data about chunk: position in the tree (index),
  * its end (next_index), and constructs a bit mask for tagged iterating (tags).
  */
-void **radix_tree_next_chunk(struct radix_tree_root *root,
+void **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
-- 
cgit v1.2.3


From 966d2b04e070bc040319aaebfec09e0144dc3341 Mon Sep 17 00:00:00 2001
From: Douglas Miller <dougmill@linux.vnet.ibm.com>
Date: Sat, 28 Jan 2017 06:42:20 -0600
Subject: percpu-refcount: fix reference leak during percpu-atomic transition

percpu_ref_tryget() and percpu_ref_tryget_live() should return
"true" IFF they acquire a reference. But the return value from
atomic_long_inc_not_zero() is a long and may have high bits set,
e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines
is bool so the reference may actually be acquired but the routines
return "false" which results in a reference leak since the caller
assumes it does not need to do a corresponding percpu_ref_put().

This was seen when performing CPU hotplug during I/O, as hangs in
blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start)
raced with percpu_ref_tryget (blk_mq_timeout_work).
Sample stack trace:

__switch_to+0x2c0/0x450
__schedule+0x2f8/0x970
schedule+0x48/0xc0
blk_mq_freeze_queue_wait+0x94/0x120
blk_mq_queue_reinit_work+0xb8/0x180
blk_mq_queue_reinit_prepare+0x84/0xa0
cpuhp_invoke_callback+0x17c/0x600
cpuhp_up_callbacks+0x58/0x150
_cpu_up+0xf0/0x1c0
do_cpu_up+0x120/0x150
cpu_subsys_online+0x64/0xe0
device_online+0xb4/0x120
online_store+0xb4/0xc0
dev_attr_store+0x68/0xa0
sysfs_kf_write+0x80/0xb0
kernfs_fop_write+0x17c/0x250
__vfs_write+0x6c/0x1e0
vfs_write+0xd0/0x270
SyS_write+0x6c/0x110
system_call+0x38/0xe0

Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS,
and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests.
However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0
and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set.

The fix is to make the tryget routines use an actual boolean internally instead
of the atomic long result truncated to a int.

Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints
Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751
Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints")
Cc: stable@vger.kernel.org # v3.18+
---
 include/linux/percpu-refcount.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 1c7eec09e5eb..3a481a49546e 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret;
+	bool ret;
 
 	rcu_read_lock_sched();
 
@@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret = false;
+	bool ret = false;
 
 	rcu_read_lock_sched();
 
-- 
cgit v1.2.3


From d732248fdb5c5434f2ab0c258ce25a7e2ff2521a Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Tue, 24 Jan 2017 14:41:41 +0100
Subject: iio: cros_ec: Add cros_ec barometer driver

Handle the barometer sensor presented by the ChromeOS EC Sensor hub.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Enric Balletbo Serra <enric.balletbo@collabora.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/mfd/cros_ec_commands.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 1683003603f3..098c3501ad2c 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1441,7 +1441,8 @@ enum motionsensor_type {
 	MOTIONSENSE_TYPE_PROX = 3,
 	MOTIONSENSE_TYPE_LIGHT = 4,
 	MOTIONSENSE_TYPE_ACTIVITY = 5,
-	MOTIONSENSE_TYPE_MAX
+	MOTIONSENSE_TYPE_BARO = 6,
+	MOTIONSENSE_TYPE_MAX,
 };
 
 /* List of motion sensor locations. */
-- 
cgit v1.2.3


From cefae80249f6bf3fdbc7c3a00bc43deb4c1bccf0 Mon Sep 17 00:00:00 2001
From: Luis Oliveira <Luis.Oliveira@synopsys.com>
Date: Thu, 26 Jan 2017 17:45:32 +0000
Subject: i2c: core: helper function to detect slave mode

This function has the purpose of mode detection by checking the
device nodes for a reg matching with the I2C_OWN_SLAVE_ADDREESS flag.
Currently only checks using OF functions (ACPI slave not supported yet).

Signed-off-by: Luis Oliveira <lolivei@synopsys.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4b45ec46161f..f0ba4bac7452 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -282,6 +282,7 @@ enum i2c_slave_event {
 
 extern int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb);
 extern int i2c_slave_unregister(struct i2c_client *client);
+extern bool i2c_detect_slave_mode(struct device *dev);
 
 static inline int i2c_slave_event(struct i2c_client *client,
 				  enum i2c_slave_event event, u8 *val)
-- 
cgit v1.2.3


From 0a595ebaaa6b53a2226d3fee2a2fd616ea5ba378 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 14 Dec 2016 10:12:56 -0800
Subject: f2fs: support IO alignment for DATA and NODE writes

This patch implements IO alignment by filling dummy blocks in DATA and NODE
write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
we can eliminate underlying dummy page problem which FTL conducts in order to
close MLC or TLC partial written pages.

Note that,
 - it requires "-o mode=lfs".
 - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
 - read IO is still 4KB.
 - do checkpoint at fsync, if dummy NODE page was written.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index cea41a124a80..f0748524ca8c 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,12 @@
 #define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
 
+#define F2FS_IO_SIZE(sbi)	(1 << (sbi)->write_io_size_bits) /* Blocks */
+#define F2FS_IO_SIZE_KB(sbi)	(1 << ((sbi)->write_io_size_bits + 2)) /* KB */
+#define F2FS_IO_SIZE_BYTES(sbi)	(1 << ((sbi)->write_io_size_bits + 12)) /* B */
+#define F2FS_IO_SIZE_BITS(sbi)	((sbi)->write_io_size_bits) /* power of 2 */
+#define F2FS_IO_SIZE_MASK(sbi)	(F2FS_IO_SIZE(sbi) - 1)
+
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
 #define GFP_F2FS_HIGH_ZERO	(GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
-- 
cgit v1.2.3


From 0cb8eb30d425d2d2ae28ab630596c44a158784c4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 29 Jan 2017 14:42:52 +0100
Subject: leds: class: Add new optional brightness_hw_changed attribute

Some LEDs may have their brightness level changed autonomously
(outside of kernel control) by hardware / firmware. This commit
adds support for an optional brightness_hw_changed attribute to
signal such changes to userspace (if a driver can detect them):

What:		/sys/class/leds/<led>/brightness_hw_changed
Date:		January 2017
KernelVersion:	4.11
Description:
		Last hardware set brightness level for this LED. Some LEDs
		may be changed autonomously by hardware/firmware. Only LEDs
		where this happens and the driver can detect this, will
		have this file.

		This file supports poll() to detect when the hardware
		changes the brightness.

		Reading this file will return the last brightness level set
		by the hardware, this may be different from the current
		brightness.

Drivers which want to support this, simply add LED_BRIGHT_HW_CHANGED to
their flags field and call led_classdev_notify_brightness_hw_changed()
with the hardware set brightness when they detect a hardware / firmware
triggered brightness change.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index bb50d0151e75..38c0bd7ca107 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,6 +13,7 @@
 #define __LINUX_LEDS_H_INCLUDED
 
 #include <linux/device.h>
+#include <linux/kernfs.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
@@ -47,6 +48,7 @@ struct led_classdev {
 #define LED_DEV_CAP_FLASH	(1 << 18)
 #define LED_HW_PLUGGABLE	(1 << 19)
 #define LED_PANIC_INDICATOR	(1 << 20)
+#define LED_BRIGHT_HW_CHANGED	(1 << 21)
 
 	/* set_brightness_work / blink_timer flags, atomic, private. */
 	unsigned long		work_flags;
@@ -111,6 +113,11 @@ struct led_classdev {
 	bool			activated;
 #endif
 
+#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
+	int			 brightness_hw_changed;
+	struct kernfs_node	*brightness_hw_changed_kn;
+#endif
+
 	/* Ensures consistent access to the LED Flash Class device */
 	struct mutex		led_access;
 };
@@ -423,4 +430,12 @@ static inline void ledtrig_cpu(enum cpu_led_event evt)
 }
 #endif
 
+#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
+extern void led_classdev_notify_brightness_hw_changed(
+	struct led_classdev *led_cdev, enum led_brightness brightness);
+#else
+static inline void led_classdev_notify_brightness_hw_changed(
+	struct led_classdev *led_cdev, enum led_brightness brightness) { }
+#endif
+
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Jan 2017 08:11:44 -0800
Subject: can: Fix kernel panic at security_sock_rcv_skb

Zhang Yanmin reported crashes [1] and provided a patch adding a
synchronize_rcu() call in can_rx_unregister()

The main problem seems that the sockets themselves are not RCU
protected.

If CAN uses RCU for delivery, then sockets should be freed only after
one RCU grace period.

Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's
ease stable backports with the following fix instead.

[1]
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81495e25>] selinux_socket_sock_rcv_skb+0x65/0x2a0

Call Trace:
 <IRQ>
 [<ffffffff81485d8c>] security_sock_rcv_skb+0x4c/0x60
 [<ffffffff81d55771>] sk_filter+0x41/0x210
 [<ffffffff81d12913>] sock_queue_rcv_skb+0x53/0x3a0
 [<ffffffff81f0a2b3>] raw_rcv+0x2a3/0x3c0
 [<ffffffff81f06eab>] can_rcv_filter+0x12b/0x370
 [<ffffffff81f07af9>] can_receive+0xd9/0x120
 [<ffffffff81f07beb>] can_rcv+0xab/0x100
 [<ffffffff81d362ac>] __netif_receive_skb_core+0xd8c/0x11f0
 [<ffffffff81d36734>] __netif_receive_skb+0x24/0xb0
 [<ffffffff81d37f67>] process_backlog+0x127/0x280
 [<ffffffff81d36f7b>] net_rx_action+0x33b/0x4f0
 [<ffffffff810c88d4>] __do_softirq+0x184/0x440
 [<ffffffff81f9e86c>] do_softirq_own_stack+0x1c/0x30
 <EOI>
 [<ffffffff810c76fb>] do_softirq.part.18+0x3b/0x40
 [<ffffffff810c8bed>] do_softirq+0x1d/0x20
 [<ffffffff81d30085>] netif_rx_ni+0xe5/0x110
 [<ffffffff8199cc87>] slcan_receive_buf+0x507/0x520
 [<ffffffff8167ef7c>] flush_to_ldisc+0x21c/0x230
 [<ffffffff810e3baf>] process_one_work+0x24f/0x670
 [<ffffffff810e44ed>] worker_thread+0x9d/0x6f0
 [<ffffffff810e4450>] ? rescuer_thread+0x480/0x480
 [<ffffffff810ebafc>] kthread+0x12c/0x150
 [<ffffffff81f9ccef>] ret_from_fork+0x3f/0x70

Reported-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index a0875001b13c..df08a41d5be5 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -45,10 +45,9 @@ struct can_proto {
 extern int  can_proto_register(const struct can_proto *cp);
 extern void can_proto_unregister(const struct can_proto *cp);
 
-extern int  can_rx_register(struct net_device *dev, canid_t can_id,
-			    canid_t mask,
-			    void (*func)(struct sk_buff *, void *),
-			    void *data, char *ident);
+int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
+		    void (*func)(struct sk_buff *, void *),
+		    void *data, char *ident, struct sock *sk);
 
 extern void can_rx_unregister(struct net_device *dev, canid_t can_id,
 			      canid_t mask,
-- 
cgit v1.2.3


From 40be0dda0725886b623d67868db3219a2e74683b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Sat, 28 Jan 2017 15:15:42 +0100
Subject: net: add devm version of alloc_etherdev_mqs function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds devm_alloc_etherdev_mqs function and devm_alloc_etherdev
macro. These can be used for simpler netdev allocation without having to
care about calling free_netdev.

Thanks to this change drivers, their error paths and removal paths may
get simpler by a bit.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 42add77ae47d..c62b709b1ce0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -54,6 +54,11 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
+					   unsigned int txqs,
+					   unsigned int rxqs);
+#define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1)
+
 struct sk_buff **eth_gro_receive(struct sk_buff **head,
 				 struct sk_buff *skb);
 int eth_gro_complete(struct sk_buff *skb, int nhoff);
-- 
cgit v1.2.3


From f067a982cefa8df5642212bb0c7e25974831f781 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 23 Jan 2017 10:11:42 +0530
Subject: PM / OPP: Add 'struct kref' to OPP table

Add kref to struct opp_table for easier accounting of the OPP table.

Note that the new routine dev_pm_opp_get_opp_table() takes the reference
from under the opp_table_lock, which guarantees that the OPP table
doesn't get freed unless dev_pm_opp_put_opp_table() is called for the
OPP table.

Two separate release mechanisms are added: locked and unlocked. In
unlocked version the routines aren't required to take/drop
opp_table_lock as the callers have already done that. This is required
to avoid breaking git bisect, otherwise we may get lockdeps between
commits. Once all the users of OPP table are updated the unlocked
version shall be removed.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 66a02deeb03f..d867c6b25f9a 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -78,6 +78,9 @@ struct dev_pm_set_opp_data {
 
 #if defined(CONFIG_PM_OPP)
 
+struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
+void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
+
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
 
 unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
@@ -126,6 +129,13 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 void dev_pm_opp_remove_table(struct device *dev);
 void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
 #else
+static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
+
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
 	return 0;
-- 
cgit v1.2.3


From fa30184d192ec78d443cf6d3abc37d9eb3b9253e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 23 Jan 2017 10:11:43 +0530
Subject: PM / OPP: Return opp_table from dev_pm_opp_set_*() routines

Now that we have proper kernel reference infrastructure in place for OPP
tables, use it to guarantee that the OPP table isn't freed while being
used by the callers of dev_pm_opp_set_*() APIs.

Make them all return the pointer to the OPP table after taking its
reference and put the reference back with dev_pm_opp_put_*() APIs.

Now that the OPP table wouldn't get freed while these routines are
executing after dev_pm_opp_get_opp_table() is called, there is no need
to take opp_table_lock. Drop them as well.

Remove the rcu specific comments from these routines as they aren't
relevant anymore.

Note that prototypes of dev_pm_opp_{set|put}_regulators() were already
updated by another patch.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index d867c6b25f9a..99787cbcaab2 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -114,15 +114,14 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq);
 int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb);
 int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb);
 
-int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
-				unsigned int count);
-void dev_pm_opp_put_supported_hw(struct device *dev);
-int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
-void dev_pm_opp_put_prop_name(struct device *dev);
+struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count);
+void dev_pm_opp_put_supported_hw(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name);
+void dev_pm_opp_put_prop_name(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count);
 void dev_pm_opp_put_regulators(struct opp_table *opp_table);
-int dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
-void dev_pm_opp_register_put_opp_helper(struct device *dev);
+struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
+void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -224,29 +223,29 @@ static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct noti
 	return -ENOTSUPP;
 }
 
-static inline int dev_pm_opp_set_supported_hw(struct device *dev,
-					      const u32 *versions,
-					      unsigned int count)
+static inline struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev,
+							    const u32 *versions,
+							    unsigned int count)
 {
-	return -ENOTSUPP;
+	return ERR_PTR(-ENOTSUPP);
 }
 
-static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
+static inline void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) {}
 
-static inline int dev_pm_opp_register_set_opp_helper(struct device *dev,
+static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev,
 			int (*set_opp)(struct dev_pm_set_opp_data *data))
 {
-	return -ENOTSUPP;
+	return ERR_PTR(-ENOTSUPP);
 }
 
-static inline void dev_pm_opp_register_put_opp_helper(struct device *dev) {}
+static inline void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) {}
 
-static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
-	return -ENOTSUPP;
+	return ERR_PTR(-ENOTSUPP);
 }
 
-static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
+static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {}
 
 static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count)
 {
-- 
cgit v1.2.3


From 7034764a1e4a6edbb60914e89aad8384e3fe5d17 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 23 Jan 2017 10:11:46 +0530
Subject: PM / OPP: Add 'struct kref' to struct dev_pm_opp

Add kref to struct dev_pm_opp for easier accounting of the OPPs.

Note that the OPPs are freed under the opp_table->lock mutex only.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 99787cbcaab2..731d548657aa 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -102,6 +102,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 
 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					     unsigned long *freq);
+void dev_pm_opp_put(struct dev_pm_opp *opp);
 
 int dev_pm_opp_add(struct device *dev, unsigned long freq,
 		   unsigned long u_volt);
@@ -193,6 +194,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {}
+
 static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
 					unsigned long u_volt)
 {
-- 
cgit v1.2.3


From b526a314263ea217b8fa9758dca5dc245fd49997 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 22 Jan 2017 17:14:08 +0100
Subject: pwm: Try to load modules during pwm_get()

Add a module name string to the pwm_lookup struct and if specified try
to load the module using request_module() if pwmchip_find_by_name() is
unable to find the PWM chip.

This is a last resort to work around drivers that can't - and can't be
made to - deal with deferred probe.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[thierry.reding@gmail.com: rename new macro, reword commit message]
[thierry.reding@gmail.com: add comment explaining use-case]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index eae215ef1b2c..08fad7c6a471 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -603,18 +603,25 @@ struct pwm_lookup {
 	const char *con_id;
 	unsigned int period;
 	enum pwm_polarity polarity;
+	const char *module; /* optional, may be NULL */
 };
 
-#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
-	{						\
-		.provider = _provider,			\
-		.index = _index,			\
-		.dev_id = _dev_id,			\
-		.con_id = _con_id,			\
-		.period = _period,			\
-		.polarity = _polarity			\
+#define PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id,	\
+			       _period, _polarity, _module)		\
+	{								\
+		.provider = _provider,					\
+		.index = _index,					\
+		.dev_id = _dev_id,					\
+		.con_id = _con_id,					\
+		.period = _period,					\
+		.polarity = _polarity,					\
+		.module = _module,					\
 	}
 
+#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
+	PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \
+			       _polarity, NULL)
+
 #if IS_ENABLED(CONFIG_PWM)
 void pwm_add_table(struct pwm_lookup *table, size_t num);
 void pwm_remove_table(struct pwm_lookup *table, size_t num);
-- 
cgit v1.2.3


From 61babe943938bb41fcd7e2b1dec7d1537ea3892f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 21 Nov 2016 18:32:08 -0800
Subject: mtd: nand: fix nand.h kernel-doc warnings

Fix kernel-doc warnings in <linux/mtd/nand.h>:

..//include/linux/mtd/nand.h:658: warning: No description found for parameter 'tCEH_min'
..//include/linux/mtd/nand.h:877: warning: No description found for parameter 'data_interface'

Fixes: eee64b700e26 ("mtd: nand: Introduce nand_data_interface")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/mtd/nand.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c5f3a012ae62..f67915c7726f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -615,7 +615,7 @@ struct nand_buffers {
  * @tALS_min: ALE setup time
  * @tAR_min: ALE to RE# delay
  * @tCEA_max: CE# access time
- * @tCEH_min:
+ * @tCEH_min: CE# high hold time
  * @tCH_min:  CE# hold time
  * @tCHZ_max: CE# high to output hi-Z
  * @tCLH_min: CLE hold time
@@ -801,6 +801,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *			supported, 0 otherwise.
  * @jedec_params:	[INTERN] holds the JEDEC parameter page when JEDEC is
  *			supported, 0 otherwise.
+ * @data_interface:	[INTERN] NAND interface timing information
  * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
-- 
cgit v1.2.3


From 4404d7d821c33ac8105f1d52deb60f736d7c6a06 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 18 Dec 2016 12:34:55 +0100
Subject: mtd: nand: fsmc: remove stale non-DT probe path

The FSMC driver has an execution path and a header file in
<linux/mtd/fsmc.h> that serves to support passing in platform
data through board files, albeit no upstream users of this
mechanism exist.

The header file also contains function headers for functions that
do not exist in the kernel.

Delete this and move the platform data struct, parsing and
handling into the driver, assume we are using OF and make the
driver depend on OF, remove the ifdefs making that optional.

Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Stefan Roese <sr@denx.de>
Cc: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Stefan Roese <sr@denx.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/mtd/fsmc.h | 156 -----------------------------------------------
 1 file changed, 156 deletions(-)
 delete mode 100644 include/linux/mtd/fsmc.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
deleted file mode 100644
index ad3c3488073c..000000000000
--- a/include/linux/mtd/fsmc.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * incude/mtd/fsmc.h
- *
- * ST Microelectronics
- * Flexible Static Memory Controller (FSMC)
- * platform data interface and header file
- *
- * Copyright © 2010 ST Microelectronics
- * Vipin Kumar <vipin.kumar@st.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __MTD_FSMC_H
-#define __MTD_FSMC_H
-
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/physmap.h>
-#include <linux/types.h>
-#include <linux/mtd/partitions.h>
-#include <asm/param.h>
-
-#define FSMC_NAND_BW8		1
-#define FSMC_NAND_BW16		2
-
-#define FSMC_MAX_NOR_BANKS	4
-#define FSMC_MAX_NAND_BANKS	4
-
-#define FSMC_FLASH_WIDTH8	1
-#define FSMC_FLASH_WIDTH16	2
-
-/* fsmc controller registers for NOR flash */
-#define CTRL			0x0
-	/* ctrl register definitions */
-	#define BANK_ENABLE		(1 << 0)
-	#define MUXED			(1 << 1)
-	#define NOR_DEV			(2 << 2)
-	#define WIDTH_8			(0 << 4)
-	#define WIDTH_16		(1 << 4)
-	#define RSTPWRDWN		(1 << 6)
-	#define WPROT			(1 << 7)
-	#define WRT_ENABLE		(1 << 12)
-	#define WAIT_ENB		(1 << 13)
-
-#define CTRL_TIM		0x4
-	/* ctrl_tim register definitions */
-
-#define FSMC_NOR_BANK_SZ	0x8
-#define FSMC_NOR_REG_SIZE	0x40
-
-#define FSMC_NOR_REG(base, bank, reg)		(base + \
-						FSMC_NOR_BANK_SZ * (bank) + \
-						reg)
-
-/* fsmc controller registers for NAND flash */
-#define PC			0x00
-	/* pc register definitions */
-	#define FSMC_RESET		(1 << 0)
-	#define FSMC_WAITON		(1 << 1)
-	#define FSMC_ENABLE		(1 << 2)
-	#define FSMC_DEVTYPE_NAND	(1 << 3)
-	#define FSMC_DEVWID_8		(0 << 4)
-	#define FSMC_DEVWID_16		(1 << 4)
-	#define FSMC_ECCEN		(1 << 6)
-	#define FSMC_ECCPLEN_512	(0 << 7)
-	#define FSMC_ECCPLEN_256	(1 << 7)
-	#define FSMC_TCLR_1		(1)
-	#define FSMC_TCLR_SHIFT		(9)
-	#define FSMC_TCLR_MASK		(0xF)
-	#define FSMC_TAR_1		(1)
-	#define FSMC_TAR_SHIFT		(13)
-	#define FSMC_TAR_MASK		(0xF)
-#define STS			0x04
-	/* sts register definitions */
-	#define FSMC_CODE_RDY		(1 << 15)
-#define COMM			0x08
-	/* comm register definitions */
-	#define FSMC_TSET_0		0
-	#define FSMC_TSET_SHIFT		0
-	#define FSMC_TSET_MASK		0xFF
-	#define FSMC_TWAIT_6		6
-	#define FSMC_TWAIT_SHIFT	8
-	#define FSMC_TWAIT_MASK		0xFF
-	#define FSMC_THOLD_4		4
-	#define FSMC_THOLD_SHIFT	16
-	#define FSMC_THOLD_MASK		0xFF
-	#define FSMC_THIZ_1		1
-	#define FSMC_THIZ_SHIFT		24
-	#define FSMC_THIZ_MASK		0xFF
-#define ATTRIB			0x0C
-#define IOATA			0x10
-#define ECC1			0x14
-#define ECC2			0x18
-#define ECC3			0x1C
-#define FSMC_NAND_BANK_SZ	0x20
-
-#define FSMC_NAND_REG(base, bank, reg)		(base + FSMC_NOR_REG_SIZE + \
-						(FSMC_NAND_BANK_SZ * (bank)) + \
-						reg)
-
-#define FSMC_BUSY_WAIT_TIMEOUT	(1 * HZ)
-
-struct fsmc_nand_timings {
-	uint8_t tclr;
-	uint8_t tar;
-	uint8_t thiz;
-	uint8_t thold;
-	uint8_t twait;
-	uint8_t tset;
-};
-
-enum access_mode {
-	USE_DMA_ACCESS = 1,
-	USE_WORD_ACCESS,
-};
-
-/**
- * fsmc_nand_platform_data - platform specific NAND controller config
- * @nand_timings: timing setup for the physical NAND interface
- * @partitions: partition table for the platform, use a default fallback
- * if this is NULL
- * @nr_partitions: the number of partitions in the previous entry
- * @options: different options for the driver
- * @width: bus width
- * @bank: default bank
- * @select_bank: callback to select a certain bank, this is
- * platform-specific. If the controller only supports one bank
- * this may be set to NULL
- */
-struct fsmc_nand_platform_data {
-	struct fsmc_nand_timings *nand_timings;
-	struct mtd_partition	*partitions;
-	unsigned int		nr_partitions;
-	unsigned int		options;
-	unsigned int		width;
-	unsigned int		bank;
-
-	enum access_mode	mode;
-
-	void			(*select_bank)(uint32_t bank, uint32_t busw);
-
-	/* priv structures for dma accesses */
-	void			*read_dma_priv;
-	void			*write_dma_priv;
-};
-
-extern int __init fsmc_nor_init(struct platform_device *pdev,
-		unsigned long base, uint32_t bank, uint32_t width);
-extern void __init fsmc_init_board_info(struct platform_device *pdev,
-		struct mtd_partition *partitions, unsigned int nr_partitions,
-		unsigned int width);
-
-#endif /* __MTD_FSMC_H */
-- 
cgit v1.2.3


From 058fe1c0440e68a1ba3c2270ae43e9f0298b27d8 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Wed, 18 Jan 2017 11:24:53 -0800
Subject: perf/core: Make cgroup switch visit only cpuctxs with cgroup events

This patch follows from a conversation in CQM/CMT's last series about
speeding up the context switch for cgroup events:

  https://patchwork.kernel.org/patch/9478617/

This is a low-hanging fruit optimization. It replaces the iteration over
the "pmus" list in cgroup switch by an iteration over a new list that
contains only cpuctxs with at least one cgroup event.

This is necessary because the number of PMUs have increased over the years
e.g modern x86 server systems have well above 50 PMUs.

The iteration over the full PMU list is unneccessary and can be costly in
heavy cache contention scenarios.

Below are some instrumentation measurements with 10, 50 and 90 percentiles
of the total cost of context switch before and after this optimization for
a simple array read/write microbenchark.

  Contention
    Level    Nr events      Before (us)            After (us)       Median
  L2    L3     types      (10%, 50%, 90%)       (10%, 50%, 90%     Speedup
  --------------------------------------------------------------------------
  Low   Low       1       (1.72, 2.42, 5.85)    (1.35, 1.64, 5.46)     29%
  High  Low       1       (2.08, 4.56, 19.8)    (1720, 2.20, 13.7)     51%
  High  High      1       (2.86, 10.4, 12.7)    (2.54, 4.32, 12.1)     58%

  Low   Low       2       (1.98, 3.20, 6.89)    (1.68, 2.41, 8.89)     24%
  High  Low       2       (2.48, 5.28, 22.4)    (2150, 3.69, 14.6)     30%
  High  High      2       (3.32, 8.09, 13.9)    (2.80, 5.15, 13.7)     36%

where:

  1 event type  = cycles
  2 event types = cycles,intel_cqm/llc_occupancy/

   Contention L2 Low: workset  <  L2 cache size.
                 High:  "     >>  L2   "     " .
   Contention L3 Low: workset of task on all sockets  <  L3 cache size.
                 High:   "     "   "   "   "    "    >>  L3   "     " .

   Median Speedup is (50%ile Before - 50%ile After) /  50%ile Before

Unsurprisingly, the benefits of this optimization decrease with the number
of cpuctxs with a cgroup events, yet, is never detrimental.

Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20170118192454.58008-2-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 78ed8105e64d..dfa725723f28 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -788,6 +788,7 @@ struct perf_cpu_context {
 	struct pmu			*unique_pmu;
 #ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
+	struct list_head		cgrp_cpuctx_entry;
 #endif
 
 	struct list_head		sched_cb_entry;
-- 
cgit v1.2.3


From 1fd7e416995401ec082fc0fe6090a223969beda5 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Wed, 18 Jan 2017 11:24:54 -0800
Subject: perf/core: Remove perf_cpu_context::unique_pmu

cpuctx->unique_pmu was originally introduced as a way to identify cpuctxs
with shared pmus in order to avoid visiting the same cpuctx more than once
in a for_each_pmu loop.

cpuctx->unique_pmu == cpuctx->pmu in non-software task contexts since they
have only one pmu per cpuctx. Since perf_pmu_sched_task() is only called in
hw contexts, this patch replaces cpuctx->unique_pmu by cpuctx->pmu in it.

The change above, together with the previous patch in this series, removed
the remaining uses of cpuctx->unique_pmu, so we remove it altogether.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Vince Weaver <vince@deater.net>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20170118192454.58008-3-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dfa725723f28..5c58e93c130c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -785,7 +785,6 @@ struct perf_cpu_context {
 	ktime_t				hrtimer_interval;
 	unsigned int			hrtimer_active;
 
-	struct pmu			*unique_pmu;
 #ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
 	struct list_head		cgrp_cpuctx_entry;
-- 
cgit v1.2.3


From 5c34153704898ed7ec0f8c0dceb651cbe4b713fd Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Date: Thu, 26 Jan 2017 19:50:49 +0530
Subject: irqchip/gic-v3: Add missing system register definitions

Define register definitions for ICH_VMCR_EL2, ICC_CTLR_EL1 and
ICH_VTR_EL2, ICC_BPR0_EL1, ICC_BPR1_EL1 registers.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 43 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index e808f8ae6f14..7f6d904a62c6 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -352,8 +352,30 @@
 /*
  * CPU interface registers
  */
-#define ICC_CTLR_EL1_EOImode_drop_dir	(0U << 1)
-#define ICC_CTLR_EL1_EOImode_drop	(1U << 1)
+#define ICC_CTLR_EL1_EOImode_SHIFT	(1)
+#define ICC_CTLR_EL1_EOImode_drop_dir	(0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop	(1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK	(1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT		0
+#define ICC_CTLR_EL1_CBPR_MASK		(1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT	8
+#define ICC_CTLR_EL1_PRI_BITS_MASK	(0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT	11
+#define ICC_CTLR_EL1_ID_BITS_MASK	(0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT		14
+#define ICC_CTLR_EL1_SEIS_MASK		(0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT		15
+#define ICC_CTLR_EL1_A3V_MASK		(0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_PMR_EL1_SHIFT		0
+#define ICC_PMR_EL1_MASK		(0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT		0
+#define ICC_BPR0_EL1_MASK		(0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT		0
+#define ICC_BPR1_EL1_MASK		(0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT		0
+#define ICC_IGRPEN0_EL1_MASK		(1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT		0
+#define ICC_IGRPEN1_EL1_MASK		(1 << ICC_IGRPEN1_EL1_SHIFT)
 #define ICC_SRE_EL1_SRE			(1U << 0)
 
 /*
@@ -384,12 +406,29 @@
 
 #define ICH_VMCR_CTLR_SHIFT		0
 #define ICH_VMCR_CTLR_MASK		(0x21f << ICH_VMCR_CTLR_SHIFT)
+#define ICH_VMCR_CBPR_SHIFT		4
+#define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
+#define ICH_VMCR_EOIM_SHIFT		9
+#define ICH_VMCR_EOIM_MASK		(1 << ICH_VMCR_EOIM_SHIFT)
 #define ICH_VMCR_BPR1_SHIFT		18
 #define ICH_VMCR_BPR1_MASK		(7 << ICH_VMCR_BPR1_SHIFT)
 #define ICH_VMCR_BPR0_SHIFT		21
 #define ICH_VMCR_BPR0_MASK		(7 << ICH_VMCR_BPR0_SHIFT)
 #define ICH_VMCR_PMR_SHIFT		24
 #define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
+#define ICH_VMCR_ENG0_SHIFT		0
+#define ICH_VMCR_ENG0_MASK		(1 << ICH_VMCR_ENG0_SHIFT)
+#define ICH_VMCR_ENG1_SHIFT		1
+#define ICH_VMCR_ENG1_MASK		(1 << ICH_VMCR_ENG1_SHIFT)
+
+#define ICH_VTR_PRI_BITS_SHIFT		29
+#define ICH_VTR_PRI_BITS_MASK		(7 << ICH_VTR_PRI_BITS_SHIFT)
+#define ICH_VTR_ID_BITS_SHIFT		23
+#define ICH_VTR_ID_BITS_MASK		(7 << ICH_VTR_ID_BITS_SHIFT)
+#define ICH_VTR_SEIS_SHIFT		22
+#define ICH_VTR_SEIS_MASK		(1 << ICH_VTR_SEIS_SHIFT)
+#define ICH_VTR_A3V_SHIFT		21
+#define ICH_VTR_A3V_MASK		(1 << ICH_VTR_A3V_SHIFT)
 
 #define ICC_IAR1_EL1_SPURIOUS		0x3ff
 
-- 
cgit v1.2.3


From 5fb247d79c04240dce86c842976cde1edde7f7ed Mon Sep 17 00:00:00 2001
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Date: Thu, 26 Jan 2017 19:50:50 +0530
Subject: KVM: arm/arm64: vgic: Introduce VENG0 and VENG1 fields to vmcr struct

ICC_VMCR_EL2 supports virtual access to ICC_IGRPEN1_EL1.Enable
and ICC_IGRPEN0_EL1.Enable fields. Add grpen0 and grpen1 member
variables to struct vmcr to support read and write of these fields.

Also refactor vgic_set_vmcr and vgic_get_vmcr() code.
Drop ICH_VMCR_CTLR_SHIFT and ICH_VMCR_CTLR_MASK macros and instead
use ICH_VMCR_EOI* and ICH_VMCR_CBPR* macros.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 7f6d904a62c6..170e00a40826 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -404,8 +404,6 @@
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
 
-#define ICH_VMCR_CTLR_SHIFT		0
-#define ICH_VMCR_CTLR_MASK		(0x21f << ICH_VMCR_CTLR_SHIFT)
 #define ICH_VMCR_CBPR_SHIFT		4
 #define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
 #define ICH_VMCR_EOIM_SHIFT		9
-- 
cgit v1.2.3


From 9d93dc1c96ec446bef9c34a189ea24556f1af89a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 25 Jan 2017 13:47:43 +0000
Subject: arm/arm64: KVM: Get rid of KVM_MEMSLOT_INCOHERENT

KVM_MEMSLOT_INCOHERENT is not used anymore, as we've killed its
only use in the arm/arm64 MMU code. Let's remove the last artifacts.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c5190dab2c1..cda457bcedc1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -45,7 +45,6 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
-#define KVM_MEMSLOT_INCOHERENT	(1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
-- 
cgit v1.2.3


From 08d85f3ea99f1eeafc4e8507936190e86a16ee8c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 17 Jan 2017 16:00:48 +0000
Subject: irqdomain: Avoid activating interrupts more than once

Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are
activated early"), we can end-up activating a PCI/MSI twice (once
at allocation time, and once at startup time).

This is normally of no consequences, except that there is some
HW out there that may misbehave if activate is used more than once
(the GICv3 ITS, for example, uses the activate callback
to issue the MAPVI command, and the architecture spec says that
"If there is an existing mapping for the EventID-DeviceID
combination, behavior is UNPREDICTABLE").

While this could be worked around in each individual driver, it may
make more sense to tackle the issue at the core level. In order to
avoid getting in that situation, let's have a per-interrupt flag
to remember if we have already activated that interrupt or not.

Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-and-tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e79875574b39..39e3254e5769 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -184,6 +184,7 @@ struct irq_data {
  *
  * IRQD_TRIGGER_MASK		- Mask for the trigger type bits
  * IRQD_SETAFFINITY_PENDING	- Affinity setting is pending
+ * IRQD_ACTIVATED		- Interrupt has already been activated
  * IRQD_NO_BALANCING		- Balancing disabled for this IRQ
  * IRQD_PER_CPU			- Interrupt is per cpu
  * IRQD_AFFINITY_SET		- Interrupt affinity was set
@@ -202,6 +203,7 @@ struct irq_data {
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
 	IRQD_SETAFFINITY_PENDING	= (1 <<  8),
+	IRQD_ACTIVATED			= (1 <<  9),
 	IRQD_NO_BALANCING		= (1 << 10),
 	IRQD_PER_CPU			= (1 << 11),
 	IRQD_AFFINITY_SET		= (1 << 12),
@@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
 }
 
+static inline bool irqd_is_activated(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_ACTIVATED;
+}
+
+static inline void irqd_set_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_ACTIVATED;
+}
+
+static inline void irqd_clr_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
-- 
cgit v1.2.3


From a92def1becf33e91fc460c7ae575aa9210ba8f40 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Mon, 19 Dec 2016 18:48:29 -0200
Subject: [media] ir-rx51: port to rc-core

This driver was written using lirc since rc-core did not support
transmitter-only hardware at that time. Now that it does, port
this driver.

Compile tested only.

Signed-off-by: Sean Young <sean@mess.org>
Cc: Timo Kokkonen <timo.t.kokkonen@iki.fi>
Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/linux/platform_data/media/ir-rx51.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/media/ir-rx51.h b/include/linux/platform_data/media/ir-rx51.h
index 812d87307877..2c94ab568bfa 100644
--- a/include/linux/platform_data/media/ir-rx51.h
+++ b/include/linux/platform_data/media/ir-rx51.h
@@ -1,7 +1,7 @@
-#ifndef _LIRC_RX51_H
-#define _LIRC_RX51_H
+#ifndef _IR_RX51_H
+#define _IR_RX51_H
 
-struct lirc_rx51_platform_data {
+struct ir_rx51_platform_data {
 	int(*set_max_mpu_wakeup_lat)(struct device *dev, long t);
 };
 
-- 
cgit v1.2.3


From ddeaa6379d50a530f9f57b3d12f7940e079b052c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 15 Oct 2016 13:59:03 -0700
Subject: sunrpc & nfs: Add and use dprintk_cont macros

Allow line continuations to work properly with KERN_CONT.

Signed-off-by: Joe Perches <joe@perches.com>
[Anna: Add fallback dprintk_cont() for when CONFIG_SUNRPC_DEBUG=n]
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/debug.h | 58 ++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 59a7889e15db..8da0f37f3bdc 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -20,33 +20,55 @@ extern unsigned int		nfsd_debug;
 extern unsigned int		nlm_debug;
 #endif
 
-#define dprintk(args...)	dfprintk(FACILITY, ## args)
-#define dprintk_rcu(args...)	dfprintk_rcu(FACILITY, ## args)
+#define dprintk(fmt, ...)						\
+	dfprintk(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_cont(fmt, ...)						\
+	dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_rcu(fmt, ...)						\
+	dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_rcu_cont(fmt, ...)					\
+	dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__)
 
 #undef ifdebug
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define ifdebug(fac)		if (unlikely(rpc_debug & RPCDBG_##fac))
 
-# define dfprintk(fac, args...)	\
-	do { \
-		ifdebug(fac) \
-			printk(KERN_DEFAULT args); \
-	} while (0)
-
-# define dfprintk_rcu(fac, args...)	\
-	do { \
-		ifdebug(fac) { \
-			rcu_read_lock(); \
-			printk(KERN_DEFAULT args); \
-			rcu_read_unlock(); \
-		} \
-	} while (0)
+# define dfprintk(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac)							\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
+} while (0)
+
+# define dfprintk_cont(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac)							\
+		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+} while (0)
+
+# define dfprintk_rcu(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac) {							\
+		rcu_read_lock();					\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
+		rcu_read_unlock();					\
+	}								\
+} while (0)
+
+# define dfprintk_rcu_cont(fac, fmt, ...)				\
+do {									\
+	ifdebug(fac) {							\
+		rcu_read_lock();					\
+		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+		rcu_read_unlock();					\
+	}								\
+} while (0)
 
 # define RPC_IFDEBUG(x)		x
 #else
 # define ifdebug(fac)		if (0)
-# define dfprintk(fac, args...)	do {} while (0)
-# define dfprintk_rcu(fac, args...)	do {} while (0)
+# define dfprintk(fac, fmt, ...)	do {} while (0)
+# define dfprintk_cont(fac, fmt, ...)	do {} while (0)
+# define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
 # define RPC_IFDEBUG(x)
 #endif
 
-- 
cgit v1.2.3


From 297e1cf29eac13d3e5bb896d18c64a50b6bb48eb Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@mellanox.com>
Date: Sun, 29 Jan 2017 18:56:17 +0200
Subject: net/mlx4_en: Adding support of turning off link autonegotiation via
 ethtool

This feature will allow the user to disable auto negotiation
on the port for mlx4 devices while setting the speed is limited
to 1GbE speeds.
Other speeds will not be accepted in autoneg off mode.

This functionality is permitted providing that the firmware
is compatible with this feature.
The above is determined by querying a new dedicated capability
bit in the device.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6533c16e27ad..c3ac945b2759 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1539,8 +1539,13 @@ enum mlx4_ptys_proto {
 	MLX4_PTYS_EN = 1<<2,
 };
 
+enum mlx4_ptys_flags {
+	MLX4_PTYS_AN_DISABLE_CAP   = 1 << 5,
+	MLX4_PTYS_AN_DISABLE_ADMIN = 1 << 6,
+};
+
 struct mlx4_ptys_reg {
-	u8 resrvd1;
+	u8 flags;
 	u8 local_port;
 	u8 resrvd2;
 	u8 proto_mask;
-- 
cgit v1.2.3


From 40fb4fc1e18bc641a0d0887ac21943fd194c1fa9 Mon Sep 17 00:00:00 2001
From: Shaker Daibes <shakerd@mellanox.com>
Date: Sun, 29 Jan 2017 18:56:18 +0200
Subject: net/mlx4_en: Pass user MTU value to Firmware at set port command

When starting the port, driver will inform Firmware about the actual MTU
which does not include implicit headers, such as FCS or VLAN tags.

Signed-off-by: Shaker Daibes <shakerd@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c3ac945b2759..7e66e4f62858 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1374,6 +1374,7 @@ int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
 int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu);
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
 int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time);
-- 
cgit v1.2.3


From 9e9fc6f00a54f7064dc681ac187be6498d566a4f Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Wed, 25 Jan 2017 14:06:27 +0530
Subject: cpuidle:powernv: Add helper function to populate powernv idle states.

In the current code for powernv_add_idle_states, there is a lot of code
duplication while initializing an idle state in powernv_states table.

Add an inline helper function to populate the powernv_states[] table
for a given idle state. Invoke this for populating the "Nap",
"Fastsleep" and the stop states in powernv_add_idle_states.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/linux/cpuidle.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index da346f2817a8..fc1e5d7fc1c7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -62,6 +62,7 @@ struct cpuidle_state {
 };
 
 /* Idle State Flags */
+#define CPUIDLE_FLAG_NONE       (0x00)
 #define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
 #define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */
 
-- 
cgit v1.2.3


From 608d792192d7136d354bc1b44585c441164dc54e Mon Sep 17 00:00:00 2001
From: Sarangdhar Joshi <spjoshi@codeaurora.org>
Date: Mon, 23 Jan 2017 17:53:18 -0800
Subject: remoteproc: Add RPROC_DELETED state

Add new state RPROC_DELETED to handle synchronization
between rproc_del() and other operations on rproc. This
state represents the rproc device that has been "deleted".

CC: Loic Pallardy <loic.pallardy@st.com>
CC: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Sarangdhar Joshi <spjoshi@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8265d351c9f0..e691f64fc7be 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -346,6 +346,7 @@ struct rproc_ops {
  *			a message.
  * @RPROC_RUNNING:	device is up and running
  * @RPROC_CRASHED:	device has crashed; need to start recovery
+ * @RPROC_DELETED:	device is deleted
  * @RPROC_LAST:		just keep this one at the end
  *
  * Please note that the values of these states are used as indices
@@ -359,7 +360,8 @@ enum rproc_state {
 	RPROC_SUSPENDED	= 1,
 	RPROC_RUNNING	= 2,
 	RPROC_CRASHED	= 3,
-	RPROC_LAST	= 4,
+	RPROC_DELETED	= 4,
+	RPROC_LAST	= 5,
 };
 
 /**
-- 
cgit v1.2.3


From 2099c77d4af7d1582db6d4437014cf18fe62e74b Mon Sep 17 00:00:00 2001
From: Sarangdhar Joshi <spjoshi@codeaurora.org>
Date: Mon, 23 Jan 2017 17:53:19 -0800
Subject: remoteproc: Drop firmware_loading_complete

firmware_loading_complete is used to synchronize operations
on rproc while asynchronous firmware loading is in progress.
However, rproc_boot() no longer waits on
firmware_loading_complete. Hence drop this completion
variable altogether and handle the race between rproc_del()
and rproc_boot() using new state RPROC_DELETED.

The request_firmware_nowait() will hold the reference to
rproc device by using a get_device()/put_device(), so the
rproc struct will remain valid even when we return from
rproc_del() before the asynchronous call to
rproc_fw_config_virtio() completes.

CC: Loic Pallardy <loic.pallardy@st.com>
CC: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Sarangdhar Joshi <spjoshi@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e691f64fc7be..81da49564ff4 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -399,7 +399,6 @@ enum rproc_crash_type {
  * @num_traces: number of trace buffers
  * @carveouts: list of physically contiguous memory allocations
  * @mappings: list of iommu mappings we initiated, needed on shutdown
- * @firmware_loading_complete: marks e/o asynchronous firmware loading
  * @bootaddr: address of first instruction to boot rproc with (optional)
  * @rvdevs: list of remote virtio devices
  * @subdevs: list of subdevices, to following the running state
@@ -431,7 +430,6 @@ struct rproc {
 	int num_traces;
 	struct list_head carveouts;
 	struct list_head mappings;
-	struct completion firmware_loading_complete;
 	u32 bootaddr;
 	struct list_head rvdevs;
 	struct list_head subdevs;
-- 
cgit v1.2.3


From 8ff6daa17b6a64e59bbabaa116b9bd854fa4da1f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 27 Jan 2017 23:20:26 -0800
Subject: iomap: constify struct iomap_ops

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/dax.h   |  8 ++++----
 include/linux/iomap.h | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 24ad71173995..2983e52efd07 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -37,9 +37,9 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
 }
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
-		struct iomap_ops *ops);
+		const struct iomap_ops *ops);
 int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-			struct iomap_ops *ops);
+			const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -72,7 +72,7 @@ static inline unsigned int dax_radix_order(void *entry)
 	return 0;
 }
 int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-		pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
+		pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops);
 #else
 static inline unsigned int dax_radix_order(void *entry)
 {
@@ -80,7 +80,7 @@ static inline unsigned int dax_radix_order(void *entry)
 }
 static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmd, unsigned int flags,
-		struct iomap_ops *ops)
+		const struct iomap_ops *ops)
 {
 	return VM_FAULT_FALLBACK;
 }
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a4c94b86401e..891459caa278 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -72,17 +72,17 @@ struct iomap_ops {
 };
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
-		struct iomap_ops *ops);
+		const struct iomap_ops *ops);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
-		struct iomap_ops *ops);
+		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-		bool *did_zero, struct iomap_ops *ops);
+		bool *did_zero, const struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
-		struct iomap_ops *ops);
+		const struct iomap_ops *ops);
 int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-		struct iomap_ops *ops);
+		const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-		loff_t start, loff_t len, struct iomap_ops *ops);
+		loff_t start, loff_t len, const struct iomap_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io:
@@ -92,6 +92,6 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
 		unsigned flags);
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
 
 #endif /* LINUX_IOMAP_H */
-- 
cgit v1.2.3


From 77d65d6f3d60cebb2dc24cf05408255a21bb6409 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 27 Jan 2017 17:42:01 +0100
Subject: dmaengine: Provide a wrapper for memcpy operations

Almost all ->device_prep_dma_xx() methods have a wrapper defined in
dmaengine.h. Add one for  ->device_prep_dma_memcpy().

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index feee6ec6a13b..533680860865 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -894,6 +894,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
 						    len, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy)
+		return NULL;
+
+	return chan->device->device_prep_dma_memcpy(chan, dest, src,
+						    len, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
 		struct dma_chan *chan,
 		struct scatterlist *dst_sg, unsigned int dst_nents,
-- 
cgit v1.2.3


From bcf23c79c4e46130701370af4383b61a3cba755c Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 31 Jan 2017 15:38:16 +0900
Subject: PM / devfreq: Fix available_governor sysfs

The devfreq using passive governor is not able to change the governor.
So, the user can not change the governor through 'available_governor' sysfs
entry. Also, the devfreq which don't use the passive governor is not able to
change to 'passive' governor on the fly.

Fixes: 996133119f57 ("PM / devfreq: Add new passive governor")
Cc: stable@vger.kernel.org
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/devfreq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 2de4e2eea180..e0acb0e5243b 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -104,6 +104,8 @@ struct devfreq_dev_profile {
  * struct devfreq_governor - Devfreq policy governor
  * @node:		list node - contains registered devfreq governors
  * @name:		Governor's name
+ * @immutable:		Immutable flag for governor. If the value is 1,
+ *			this govenror is never changeable to other governor.
  * @get_target_freq:	Returns desired operating frequency for the device.
  *			Basically, get_target_freq will run
  *			devfreq_dev_profile.get_dev_status() to get the
@@ -121,6 +123,7 @@ struct devfreq_governor {
 	struct list_head node;
 
 	const char name[DEVFREQ_NAME_LEN];
+	const unsigned int immutable;
 	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
 	int (*event_handler)(struct devfreq *devfreq,
 				unsigned int event, void *data);
-- 
cgit v1.2.3


From 433e19cf33d34bb6751c874a9c00980552fe508c Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 28 Jan 2017 11:46:02 -0700
Subject: Drivers: hv: vmbus: finally fix hv_need_to_signal_on_read()

Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")
added the proper mb(), but removed the test "prev_write_sz < pending_sz"
when making the signal decision.

As a result, the guest can signal the host unnecessarily,
and then the host can throttle the guest because the host
thinks the guest is buggy or malicious; finally the user
running stress test can perceive intermittent freeze of
the guest.

This patch brings back the test, and properly handles the
in-place consumption APIs used by NetVSC (see get_next_pkt_raw(),
put_pkt_raw() and commit_rd_index()).

Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42fe43fb0c80..183efde54269 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -128,6 +128,7 @@ struct hv_ring_buffer_info {
 	u32 ring_data_startoffset;
 	u32 priv_write_index;
 	u32 priv_read_index;
+	u32 cached_read_index;
 };
 
 /*
@@ -180,6 +181,19 @@ static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi)
 	return write;
 }
 
+static inline u32 hv_get_cached_bytes_to_write(
+	const struct hv_ring_buffer_info *rbi)
+{
+	u32 read_loc, write_loc, dsize, write;
+
+	dsize = rbi->ring_datasize;
+	read_loc = rbi->cached_read_index;
+	write_loc = rbi->ring_buffer->write_index;
+
+	write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+		read_loc - write_loc;
+	return write;
+}
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
@@ -1488,7 +1502,7 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
 
 static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 {
-	u32 cur_write_sz;
+	u32 cur_write_sz, cached_write_sz;
 	u32 pending_sz;
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
@@ -1512,12 +1526,24 @@ static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 
 	cur_write_sz = hv_get_bytes_to_write(rbi);
 
-	if (cur_write_sz >= pending_sz)
+	if (cur_write_sz < pending_sz)
+		return;
+
+	cached_write_sz = hv_get_cached_bytes_to_write(rbi);
+	if (cached_write_sz < pending_sz)
 		vmbus_setevent(channel);
 
 	return;
 }
 
+static inline void
+init_cached_read_index(struct vmbus_channel *channel)
+{
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	rbi->cached_read_index = rbi->ring_buffer->read_index;
+}
+
 /*
  * An API to support in-place processing of incoming VMBUS packets.
  */
@@ -1569,6 +1595,8 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
  * This call commits the read index and potentially signals the host.
  * Here is the pattern for using the "in-place" consumption APIs:
  *
+ * init_cached_read_index();
+ *
  * while (get_next_pkt_raw() {
  *	process the packet "in-place";
  *	put_pkt_raw();
-- 
cgit v1.2.3


From a1656454131880980bc3a5313c8bf66ef5990c91 Mon Sep 17 00:00:00 2001
From: Alex Ng <alexng@messages.microsoft.com>
Date: Sat, 28 Jan 2017 12:37:17 -0700
Subject: Drivers: hv: vmbus: Use all supported IC versions to negotiate

Previously, we were assuming that each IC protocol version was tied to a
specific host version. For example, some Windows 10 preview hosts only
support v3 TimeSync even though driver assumes v4 is supported by all
Windows 10 hosts.

The guest will stop trying to negotiate even though older supported
versions may still be offered by the host.

Make IC version negotiation more robust by going through all versions
that are supported by the guest.

Fixes: 3da0401b4d0e ("Drivers: hv: utils: Fix the mapping between host
version and protocol to use")

Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: Alex Ng <alexng@messages.microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 7ea20bd7cdd1..85b26f06e172 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1445,9 +1445,10 @@ struct hyperv_service_callback {
 };
 
 #define MAX_SRV_VER	0x7ffffff
-extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *,
-					struct icmsg_negotiate *, u8 *, int,
-					int);
+extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
+				const int *fw_version, int fw_vercnt,
+				const int *srv_version, int srv_vercnt,
+				int *nego_fw_version, int *nego_srv_version);
 
 void hv_event_tasklet_disable(struct vmbus_channel *channel);
 void hv_event_tasklet_enable(struct vmbus_channel *channel);
-- 
cgit v1.2.3


From aaa59306b0b7e0ca4ba92cc04c5db101cbb1c096 Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Mon, 30 Jan 2017 09:39:52 -0800
Subject: iommu/vt-d: Fix some macros that are incorrectly specified in
 intel-iommu

Some of the macros are incorrect with wrong bit-shifts resulting in picking
the incorrect invalidation granularity. Incorrect Source-ID in extended
devtlb invalidation caused device side errors.

To: Joerg Roedel <joro@8bytes.org>
To: David Woodhouse <dwmw2@infradead.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Cc: CQ Tang <cq.tang@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>

Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support")
Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Tested-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c6cdc7..23e129ef6726 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
 #define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
 #define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
 #define DMA_TLB_READ_DRAIN (((u64)1) << 49)
 #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
 #define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
@@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 
 /* INVALID_DESC */
 #define DMA_CCMD_INVL_GRANU_OFFSET  61
-#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
+#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 4)
 #define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
 #define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
 #define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
@@ -316,8 +316,8 @@ enum {
 #define QI_DEV_EIOTLB_SIZE	(((u64)1) << 11)
 #define QI_DEV_EIOTLB_GLOB(g)	((u64)g)
 #define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
-#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
-#define QI_DEV_EIOTLB_QDEP(qd)	(((qd) & 0x1f) << 16)
+#define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd)	((u64)((qd) & 0x1f) << 4)
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
 #define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-- 
cgit v1.2.3


From ade69e2432b795c76653e1dfa09c684549826a50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:09 +0100
Subject: lightnvm: merge gennvm with core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For the first iteration of Open-Channel SSDs, it was anticipated that
there could be various media managers on top of an open-channel SSD,
such to allow vendors to plug in their own host-side FTLs, without the
media manager in between.

Now that an Open-Channel SSD is exposed as a traditional block device,
there is no longer a need for this. Therefore lets merge the gennvm code
with core and simplify the stack.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 97 ++++--------------------------------------------
 1 file changed, 7 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 7c273bbc5351..84309fe27472 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -80,8 +80,6 @@ struct nvm_dev_ops {
 	unsigned int		max_phys_sect;
 };
 
-
-
 #ifdef CONFIG_NVM
 
 #include <linux/blkdev.h>
@@ -272,15 +270,6 @@ enum {
 	NVM_BLK_ST_BAD =	0x8,	/* Bad block */
 };
 
-/* system block cpu representation */
-struct nvm_sb_info {
-	unsigned long		seqnr;
-	unsigned long		erase_cnt;
-	unsigned int		version;
-	char			mmtype[NVM_MMTYPE_LEN];
-	struct ppa_addr		fs_ppa;
-};
-
 /* Device generic information */
 struct nvm_geo {
 	int nr_chnls;
@@ -308,6 +297,7 @@ struct nvm_geo {
 	int sec_per_lun;
 };
 
+/* sub-device structure */
 struct nvm_tgt_dev {
 	/* Device information */
 	struct nvm_geo geo;
@@ -329,17 +319,10 @@ struct nvm_dev {
 
 	struct list_head devices;
 
-	/* Media manager */
-	struct nvmm_type *mt;
-	void *mp;
-
-	/* System blocks */
-	struct nvm_sb_info sb;
-
 	/* Device information */
 	struct nvm_geo geo;
 
-	/* lower page table */
+	  /* lower page table */
 	int lps_per_blk;
 	int *lptbl;
 
@@ -359,6 +342,10 @@ struct nvm_dev {
 
 	struct mutex mlock;
 	spinlock_t lock;
+
+	/* target management */
+	struct list_head area_list;
+	struct list_head targets;
 };
 
 static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
@@ -452,11 +439,6 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
 					(ppa1.g.blk == ppa2.g.blk));
 }
 
-static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
-{
-	return dev->lptbl[slc_pg];
-}
-
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
@@ -487,49 +469,6 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
 extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
 extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
 
-typedef int (nvmm_register_fn)(struct nvm_dev *);
-typedef void (nvmm_unregister_fn)(struct nvm_dev *);
-
-typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
-typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
-typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
-typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
-typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
-					    struct ppa_addr, int);
-typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
-
-enum {
-	TRANS_TGT_TO_DEV =	0x0,
-	TRANS_DEV_TO_TGT =	0x1,
-};
-
-struct nvmm_type {
-	const char *name;
-	unsigned int version[3];
-
-	nvmm_register_fn *register_mgr;
-	nvmm_unregister_fn *unregister_mgr;
-
-	nvmm_create_tgt_fn *create_tgt;
-	nvmm_remove_tgt_fn *remove_tgt;
-
-	nvmm_submit_io_fn *submit_io;
-	nvmm_erase_blk_fn *erase_blk;
-
-	nvmm_get_area_fn *get_area;
-	nvmm_put_area_fn *put_area;
-
-	nvmm_trans_ppa_fn *trans_ppa;
-	nvmm_part_to_tgt_fn *part_to_tgt;
-
-	struct list_head list;
-};
-
-extern int nvm_register_mgr(struct nvmm_type *);
-extern void nvm_unregister_mgr(struct nvmm_type *);
-
 extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
@@ -559,31 +498,9 @@ extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
 extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
-/* sysblk.c */
-#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
-
-/* system block on disk representation */
-struct nvm_system_block {
-	__be32			magic;		/* magic signature */
-	__be32			seqnr;		/* sequence number */
-	__be32			erase_cnt;	/* erase count */
-	__be16			version;	/* version number */
-	u8			mmtype[NVM_MMTYPE_LEN]; /* media manager name */
-	__be64			fs_ppa;		/* PPA for media manager
-						 * superblock */
-};
-
-extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *);
-extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *);
-extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
-
 extern int nvm_dev_factory(struct nvm_dev *, int flags);
 
-#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid)			\
-	for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls;	\
-					(chid)++, (ppa).g.ch = (chid))	\
-		for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl;	\
-					(lunid)++, (ppa).g.lun = (lunid))
+extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
 
 #else /* CONFIG_NVM */
 struct nvm_dev_ops;
-- 
cgit v1.2.3


From 10995c3dc9d7f47b92ff3e74b4bd191ddb7991ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:10 +0100
Subject: lightnvm: collapse nvm_erase_ppa and nvm_erase_blk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After gennvm and core have been merged, there are no more callers to
nvm_erase_ppa. Therefore collapse the device specific and target
specific erase functions.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 84309fe27472..f2007b2c4979 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -483,7 +483,6 @@ extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
 extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
 extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
 			   void *);
-- 
cgit v1.2.3


From 583b7058b2e8071f59646c8fb027f6c3597417ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:11 +0100
Subject: lightnvm: remove nvm_submit_ppa* functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nvm_submit_ppa* functions are no longer needed after gennvm and core
have been merged.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index f2007b2c4979..abb3d55c107f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -489,10 +489,6 @@ extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
 extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
 extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
-extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
-								void *, int);
-extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
-							int, void *, int);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
 extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
-- 
cgit v1.2.3


From 8f4fe008fb256649bd0e16c96a6eafa3bd916ac3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:12 +0100
Subject: lightnvm: remove nvm_get_bb_tbl and nvm_set_bb_tbl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the merge of gennvm and core, there is no longer a need for the
device specific bad block functions.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index abb3d55c107f..cad1e1cb0635 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -473,7 +473,6 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
 extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
 			      int, int);
 extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
@@ -490,7 +489,6 @@ extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
 extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
-extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
 extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
 extern int nvm_dev_factory(struct nvm_dev *, int flags);
-- 
cgit v1.2.3


From dab8ee9e8a30620a5b5f22d6c0b3749217093803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:14 +0100
Subject: lightnvm: cleanup nvm transformation functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Going from target specific ppa addresses to device was accomplished by
first converting target to generic ppa addresses and generic to device
addresses. The conversion was either open-coded or used the built-in
nvm_trans_* and nvm_map_* functions for conversion. Simplify the
interface and cleanup the calls to provide clean functions that now
either take a list of ppas or a nvm_rq, and is exposed through:

 void nvm_ppa_* - target to/from device with a list of PPAs,
 void nvm_rq_* - target to/from device with a nvm_rq.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cad1e1cb0635..faa0fbfe339a 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -378,10 +378,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
 	return l;
 }
 
-static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
-						struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev,
+						  struct ppa_addr r)
 {
-	struct nvm_geo *geo = &dev->geo;
+	struct nvm_geo *geo = &tgt_dev->geo;
 	struct ppa_addr l;
 
 	l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
@@ -394,10 +394,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 	return l;
 }
 
-static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
-						struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev,
+						  struct ppa_addr r)
 {
-	struct nvm_geo *geo = &dev->geo;
+	struct nvm_geo *geo = &tgt_dev->geo;
 	struct ppa_addr l;
 
 	l.ppa = 0;
@@ -477,8 +477,6 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
 			      int, int);
 extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
-extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
-extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-- 
cgit v1.2.3


From 19bd6fe73ca812964963aa30827cff9aae64a715 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:15 +0100
Subject: lightnvm: reduce number of nvm_id groups to one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The number of configuration groups has been limited to one in current
code, even if there is support for up to four. With the introduction
of the open-channel SSD 1.3 specification, only a single
group is exposed onwards. Reflect this in the nvm_id structure.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index faa0fbfe339a..ce0b2dac84ac 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -200,11 +200,10 @@ struct nvm_addr_format {
 struct nvm_id {
 	u8	ver_id;
 	u8	vmnt;
-	u8	cgrps;
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
-	struct nvm_id_group groups[4];
+	struct nvm_id_group grp;
 } __packed;
 
 struct nvm_target {
-- 
cgit v1.2.3


From 06894efea706b3cd4ce31e341ec51b4c62c34a86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <matias@cnexlabs.com>
Date: Tue, 31 Jan 2017 13:17:17 +0100
Subject: lightnvm: use end_io callback instead of instance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the lightnvm core had the "gennvm" layer between the device and the
target, there was a need for the core to be able to figure out which
target it should send an end_io callback to. Leading to a "double"
end_io, first for the media manager instance, and then for the target
instance. Now that core and gennvm is merged, there is no longer a need
for this, and a single end_io callback will do.

Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index ce0b2dac84ac..17cd454f0d87 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -213,10 +213,6 @@ struct nvm_target {
 	struct gendisk *disk;
 };
 
-struct nvm_tgt_instance {
-	struct nvm_tgt_type *tt;
-};
-
 #define ADDR_EMPTY (~0ULL)
 
 #define NVM_VERSION_MAJOR 1
@@ -227,7 +223,6 @@ struct nvm_rq;
 typedef void (nvm_end_io_fn)(struct nvm_rq *);
 
 struct nvm_rq {
-	struct nvm_tgt_instance *ins;
 	struct nvm_tgt_dev *dev;
 
 	struct bio *bio;
@@ -251,6 +246,8 @@ struct nvm_rq {
 
 	u64 ppa_status; /* ppa media status */
 	int error;
+
+	void *private;
 };
 
 static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
@@ -450,7 +447,6 @@ struct nvm_tgt_type {
 	/* target entry points */
 	nvm_tgt_make_rq_fn *make_rq;
 	nvm_tgt_capacity_fn *capacity;
-	nvm_end_io_fn *end_io;
 
 	/* module-specific init/teardown */
 	nvm_tgt_init_fn *init;
@@ -484,7 +480,7 @@ extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
 			   void *);
 extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
 extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
-extern void nvm_end_io(struct nvm_rq *, int);
+extern void nvm_end_io(struct nvm_rq *);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
-- 
cgit v1.2.3


From 38ea2f7656f815e7330868cbec7bada0fd7933a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Tue, 31 Jan 2017 13:17:18 +0100
Subject: lightnvm: Add CRC read error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let the host differentiate between a read error and a CRC check error on
the device side.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 17cd454f0d87..bc282d26017a 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -107,6 +107,7 @@ enum {
 	NVM_RSP_ERR_FAILWRITE	= 0x40ff,
 	NVM_RSP_ERR_EMPTYPAGE	= 0x42ff,
 	NVM_RSP_ERR_FAILECC	= 0x4281,
+	NVM_RSP_ERR_FAILCRC	= 0x4004,
 	NVM_RSP_WARN_HIGHECC	= 0x4700,
 
 	/* Device opcodes */
-- 
cgit v1.2.3


From 9a69b0ed6257ae5e71c99bf21ce53f98c558476a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Tue, 31 Jan 2017 13:17:20 +0100
Subject: lightnvm: allow targets to use sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to register through the sysfs interface, a driver needs to know
its kobject. On a disk structure, this happens when the partition
information is added (device_add_disk), which for lightnvm takes place
after the target has been initialized. This means that on target
initialization, the kboject has not been created yet.

This patch adds a target function to let targets initialize their own
kboject as a child of the disk kobject.

Signed-off-by: Javier González <javier@cnexlabs.com>
Added exit typedef and passed gendisk instead of void pointer for exit.
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index bc282d26017a..ca45e4a088a9 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -440,6 +440,8 @@ typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
 typedef void (nvm_tgt_exit_fn)(void *);
+typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
+typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
 
 struct nvm_tgt_type {
 	const char *name;
@@ -453,6 +455,10 @@ struct nvm_tgt_type {
 	nvm_tgt_init_fn *init;
 	nvm_tgt_exit_fn *exit;
 
+	/* sysfs */
+	nvm_tgt_sysfs_init_fn *sysfs_init;
+	nvm_tgt_sysfs_exit_fn *sysfs_exit;
+
 	/* For internal use */
 	struct list_head list;
 };
-- 
cgit v1.2.3


From 2b477c00f3bd87c3286f5940cb4174d8b01ee0d5 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.com>
Date: Thu, 22 Dec 2016 12:38:06 -0500
Subject: svcrpc: free contexts immediately on PROC_DESTROY

We currently handle a client PROC_DESTROY request by turning it
CACHE_NEGATIVE, setting the expired time to now, and then waiting for
cache_clean to clean it up later.  Since we forgot to set the cache's
nextcheck value, that could take up to 30 minutes.  Also, though there's
probably no real bug in this case, setting CACHE_NEGATIVE directly like
this probably isn't a great idea in general.

So let's just remove the entry from the cache directly, and move this
bit of cache manipulation to a helper function.

Signed-off-by: Neil Brown <neilb@suse.com>
Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..9dcf2c8fe1c5 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -227,6 +227,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
 					umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
+extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
-- 
cgit v1.2.3


From e26bfebdfc0d212d366de9990a096665d5c0209a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 09:45:28 +0000
Subject: fscache: Fix dead object requeue

Under some circumstances, an fscache object can become queued such that it
fscache_object_work_func() can be called once the object is in the
OBJECT_DEAD state.  This results in the kernel oopsing when it tries to
invoke the handler for the state (which is hard coded to 0x2).

The way this comes about is something like the following:

 (1) The object dispatcher is processing a work state for an object.  This
     is done in workqueue context.

 (2) An out-of-band event comes in that isn't masked, causing the object to
     be queued, say EV_KILL.

 (3) The object dispatcher finishes processing the current work state on
     that object and then sees there's another event to process, so,
     without returning to the workqueue core, it processes that event too.
     It then follows the chain of events that initiates until we reach
     OBJECT_DEAD without going through a wait state (such as
     WAIT_FOR_CLEARANCE).

     At this point, object->events may be 0, object->event_mask will be 0
     and oob_event_mask will be 0.

 (4) The object dispatcher returns to the workqueue processor, and in due
     course, this sees that the object's work item is still queued and
     invokes it again.

 (5) The current state is a work state (OBJECT_DEAD), so the dispatcher
     jumps to it - resulting in an OOPS.

When I'm seeing this, the work state in (1) appears to have been either
LOOK_UP_OBJECT or CREATE_OBJECT (object->oob_table is
fscache_osm_lookup_oob).

The window for (2) is very small:

 (A) object->event_mask is cleared whilst the event dispatch process is
     underway - though there's no memory barrier to force this to the top
     of the function.

     The window, therefore is from the time the object was selected by the
     workqueue processor and made requeueable to the time the mask was
     cleared.

 (B) fscache_raise_event() will only queue the object if it manages to set
     the event bit and the corresponding event_mask bit was set.

     The enqueuement is then deferred slightly whilst we get a ref on the
     object and get the per-CPU variable for workqueue congestion.  This
     slight deferral slightly increases the probability by allowing extra
     time for the workqueue to make the item requeueable.

Handle this by giving the dead state a processor function and checking the
for the dead state address rather than seeing if the processor function is
address 0x2.  The dead state processor function can then set a flag to
indicate that it's occurred and give a warning if it occurs more than once
per object.

If this race occurs, an oops similar to the following is seen (note the RIP
value):

BUG: unable to handle kernel NULL pointer dereference at 0000000000000002
IP: [<0000000000000002>] 0x1
PGD 0
Oops: 0010 [#1] SMP
Modules linked in: ...
CPU: 17 PID: 16077 Comm: kworker/u48:9 Not tainted 3.10.0-327.18.2.el7.x86_64 #1
Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015
Workqueue: fscache_object fscache_object_work_func [fscache]
task: ffff880302b63980 ti: ffff880717544000 task.ti: ffff880717544000
RIP: 0010:[<0000000000000002>]  [<0000000000000002>] 0x1
RSP: 0018:ffff880717547df8  EFLAGS: 00010202
RAX: ffffffffa0368640 RBX: ffff880edf7a4480 RCX: dead000000200200
RDX: 0000000000000002 RSI: 00000000ffffffff RDI: ffff880edf7a4480
RBP: ffff880717547e18 R08: 0000000000000000 R09: dfc40a25cb3a4510
R10: dfc40a25cb3a4510 R11: 0000000000000400 R12: 0000000000000000
R13: ffff880edf7a4510 R14: ffff8817f6153400 R15: 0000000000000600
FS:  0000000000000000(0000) GS:ffff88181f420000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000002 CR3: 000000000194a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffffffffa0363695 ffff880edf7a4510 ffff88093f16f900 ffff8817faa4ec00
 ffff880717547e60 ffffffff8109d5db 00000000faa4ec18 0000000000000000
 ffff8817faa4ec18 ffff88093f16f930 ffff880302b63980 ffff88093f16f900
Call Trace:
 [<ffffffffa0363695>] ? fscache_object_work_func+0xa5/0x200 [fscache]
 [<ffffffff8109d5db>] process_one_work+0x17b/0x470
 [<ffffffff8109e4ac>] worker_thread+0x21c/0x400
 [<ffffffff8109e290>] ? rescuer_thread+0x400/0x400
 [<ffffffff810a5acf>] kthread+0xcf/0xe0
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff816460d8>] ret_from_fork+0x58/0x90
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeremy McNicoll <jeremymc@redhat.com>
Tested-by: Frank Sorenson <sorenson@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fscache-cache.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 13ba552e6c09..4c467ef50159 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -360,6 +360,7 @@ struct fscache_object {
 #define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */
 #define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */
 #define FSCACHE_OBJECT_KILLED_BY_CACHE	7	/* T if object was killed by the cache */
+#define FSCACHE_OBJECT_RUN_AFTER_DEAD	8	/* T if object has been dispatched after death */
 
 	struct list_head	cache_link;	/* link in cache->object_list */
 	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */
-- 
cgit v1.2.3


From 57292b58ddb58689e8c3b4c6eadbef10d9ca44dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 31 Jan 2017 16:57:29 +0100
Subject: block: introduce blk_rq_is_passthrough

This can be used to check for fs vs non-fs requests and basically
removes all knowledge of BLOCK_PC specific from the block layer,
as well as preparing for removing the cmd_type field in struct request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h       | 16 +++++++++++-----
 include/linux/blktrace_api.h |  4 ++--
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e4c5f284fe2d..7121be081517 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -242,6 +242,11 @@ struct request {
 	struct request *next_rq;
 };
 
+static inline bool blk_rq_is_passthrough(struct request *rq)
+{
+	return rq->cmd_type != REQ_TYPE_FS;
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -698,9 +703,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
 
-#define blk_account_rq(rq) \
-	(((rq)->rq_flags & RQF_STARTED) && \
-	 ((rq)->cmd_type == REQ_TYPE_FS))
+static inline bool blk_account_rq(struct request *rq)
+{
+	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
+}
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
@@ -775,7 +781,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
 
 static inline bool rq_mergeable(struct request *rq)
 {
-	if (rq->cmd_type != REQ_TYPE_FS)
+	if (blk_rq_is_passthrough(rq))
 		return false;
 
 	if (req_op(rq) == REQ_OP_FLUSH)
@@ -1049,7 +1055,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
 {
 	struct request_queue *q = rq->q;
 
-	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
+	if (blk_rq_is_passthrough(rq))
 		return q->limits.max_hw_sectors;
 
 	if (!q->limits.chunk_sectors ||
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index a143a67a6f33..341f9418bd68 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -114,12 +114,12 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
 static inline sector_t blk_rq_trace_sector(struct request *rq)
 {
-	return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_pos(rq);
+	return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq);
 }
 
 static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)
 {
-	return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_sectors(rq);
+	return blk_rq_is_passthrough(rq) ? 0 : blk_rq_sectors(rq);
 }
 
 #endif
-- 
cgit v1.2.3


From 2f5a8e80f79dc82e00f4cca557dc9ceaf064b450 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 31 Jan 2017 16:57:30 +0100
Subject: ide: don't abuse cmd_type

Currently the legacy ide driver defines several request types of it's own,
which is in the way of removing that field entirely.

Instead add a type field to struct ide_request and use that to distinguish
the different types of IDE-internal requests.

It's a bit of a mess, but so is the surrounding code..

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/ide.h | 56 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 086fbe172817..5cc6caa94cac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -40,24 +40,58 @@
 
 struct device;
 
-/* IDE-specific values for req->cmd_type */
-enum ata_cmd_type_bits {
-	REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
-	REQ_TYPE_ATA_PC,
-	REQ_TYPE_ATA_SENSE,	/* sense request */
-	REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */
-	REQ_TYPE_ATA_PM_RESUME,	/* resume request */
+/* values for ide_request.type */
+enum ata_priv_type {
+	ATA_PRIV_MISC,
+	ATA_PRIV_TASKFILE,
+	ATA_PRIV_PC,
+	ATA_PRIV_SENSE,		/* sense request */
+	ATA_PRIV_PM_SUSPEND,	/* suspend request */
+	ATA_PRIV_PM_RESUME,	/* resume request */
 };
 
-#define ata_pm_request(rq)	\
-	((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
-	 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
-
 struct ide_request {
 	struct scsi_request sreq;
 	u8 sense[SCSI_SENSE_BUFFERSIZE];
+	u8 type;
 };
 
+static inline struct ide_request *ide_req(struct request *rq)
+{
+	return blk_mq_rq_to_pdu(rq);
+}
+
+static inline bool ata_misc_request(struct request *rq)
+{
+	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+               ide_req(rq)->type == ATA_PRIV_MISC;
+}
+
+static inline bool ata_taskfile_request(struct request *rq)
+{
+	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+               ide_req(rq)->type == ATA_PRIV_TASKFILE;
+}
+
+static inline bool ata_pc_request(struct request *rq)
+{
+	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+               ide_req(rq)->type == ATA_PRIV_PC;
+}
+
+static inline bool ata_sense_request(struct request *rq)
+{
+	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+               ide_req(rq)->type == ATA_PRIV_SENSE;
+}
+
+static inline bool ata_pm_request(struct request *rq)
+{
+	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+		(ide_req(rq)->type == ATA_PRIV_PM_SUSPEND ||
+		 ide_req(rq)->type == ATA_PRIV_PM_RESUME);
+}
+
 /* Error codes returned in rq->errors to the higher part of the driver. */
 enum {
 	IDE_DRV_ERROR_GENERAL	= 101,
-- 
cgit v1.2.3


From aebf526b53aea164508730427597d45f3e06b376 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 31 Jan 2017 16:57:31 +0100
Subject: block: fold cmd_type into the REQ_OP_ space

Instead of keeping two levels of indirection for requests types, fold it
all into the operations.  The little caveat here is that previously
cmd_type only applied to struct request, while the request and bio op
fields were set to plain REQ_OP_READ/WRITE even for passthrough
operations.

Instead this patch adds new REQ_OP_* for SCSI passthrough and driver
private requests, althought it has to add two for each so that we
can communicate the data in/out nature of the request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h |  7 +++++++
 include/linux/blkdev.h    | 22 +++++++++++-----------
 include/linux/ide.h       | 14 +++++---------
 3 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 37c9a43c5e78..d703acb55d0f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,13 @@ enum req_opf {
 	/* write the zero filled sector many times */
 	REQ_OP_WRITE_ZEROES	= 8,
 
+	/* SCSI passthrough using struct scsi_request */
+	REQ_OP_SCSI_IN		= 32,
+	REQ_OP_SCSI_OUT		= 33,
+	/* Driver private requests */
+	REQ_OP_DRV_IN		= 34,
+	REQ_OP_DRV_OUT		= 35,
+
 	REQ_OP_LAST,
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7121be081517..1e947e725528 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -70,15 +70,6 @@ struct request_list {
 	unsigned int		flags;
 };
 
-/*
- * request command types
- */
-enum rq_cmd_type_bits {
-	REQ_TYPE_FS		= 1,	/* fs request */
-	REQ_TYPE_BLOCK_PC,		/* scsi command */
-	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
-};
-
 /*
  * request flags */
 typedef __u32 __bitwise req_flags_t;
@@ -145,7 +136,6 @@ struct request {
 	struct blk_mq_ctx *mq_ctx;
 
 	int cpu;
-	unsigned cmd_type;
 	unsigned int cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
 	unsigned long atomic_flags;
@@ -242,9 +232,19 @@ struct request {
 	struct request *next_rq;
 };
 
+static inline bool blk_rq_is_scsi(struct request *rq)
+{
+	return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_rq_is_private(struct request *rq)
+{
+	return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+}
+
 static inline bool blk_rq_is_passthrough(struct request *rq)
 {
-	return rq->cmd_type != REQ_TYPE_FS;
+	return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 }
 
 static inline unsigned short req_get_ioprio(struct request *req)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 5cc6caa94cac..2f51c1724b5a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -63,31 +63,27 @@ static inline struct ide_request *ide_req(struct request *rq)
 
 static inline bool ata_misc_request(struct request *rq)
 {
-	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
-               ide_req(rq)->type == ATA_PRIV_MISC;
+	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_MISC;
 }
 
 static inline bool ata_taskfile_request(struct request *rq)
 {
-	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
-               ide_req(rq)->type == ATA_PRIV_TASKFILE;
+	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_TASKFILE;
 }
 
 static inline bool ata_pc_request(struct request *rq)
 {
-	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
-               ide_req(rq)->type == ATA_PRIV_PC;
+	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PC;
 }
 
 static inline bool ata_sense_request(struct request *rq)
 {
-	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
-               ide_req(rq)->type == ATA_PRIV_SENSE;
+	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_SENSE;
 }
 
 static inline bool ata_pm_request(struct request *rq)
 {
-	return rq->cmd_type == REQ_TYPE_DRV_PRIV &&
+	return blk_rq_is_private(rq) &&
 		(ide_req(rq)->type == ATA_PRIV_PM_SUSPEND ||
 		 ide_req(rq)->type == ATA_PRIV_PM_RESUME);
 }
-- 
cgit v1.2.3


From d486f1f204382557b5fbcb3ddbb5845cd4ba4e2c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 31 Jan 2017 12:34:41 -0700
Subject: block: move internal_tag to same cache line as tag

Since we removed cmd_type, we now have a hole in the struct. Move
the internal_tag member to the same cacheline as tag, since we
use them at the same time.

This doesn't fix the hole, just moves it elsewhere.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1e947e725528..11f7a8e86a89 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -138,6 +138,9 @@ struct request {
 	int cpu;
 	unsigned int cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
+
+	int internal_tag;
+
 	unsigned long atomic_flags;
 
 	/* the following two fields are internal, NEVER access directly */
@@ -209,8 +212,6 @@ struct request {
 
 	unsigned short ioprio;
 
-	int internal_tag;
-
 	void *special;		/* opaque pointer available for LLD use */
 
 	int errors;
-- 
cgit v1.2.3


From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:38 +0100
Subject: perf/x86/intel/rapl: Make package handling more robust

The package management code in RAPL relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left RAPL in broken state. This was not noticed because on a regular boot
all CPUs are online before RAPL is initialized.

A possible fix would be to reintroduce the mess which allocates a package
data structure in CPU prepare and when it turns out to already exist in
starting throw it away later in the CPU online callback. But that's a
horrible hack and not required at all because RAPL becomes functional for
perf only in the CPU online callback. That's correct because user space is
not yet informed about the CPU being onlined, so nothing caan rely on RAPL
being available on that particular CPU.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

This also adds a missing check for available package data in the
event_init() function.

Reported-by: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d936a0021839..8329f3dc592c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -10,7 +10,6 @@ enum cpuhp_state {
 	CPUHP_PERF_X86_PREPARE,
 	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
-	CPUHP_PERF_X86_RAPL_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,
-- 
cgit v1.2.3


From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:40 +0100
Subject: perf/x86/intel/uncore: Make package handling more robust

The package management code in uncore relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left uncore in broken state. This was not noticed because on a regular boot
all CPUs are online before uncore is initialized.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8329f3dc592c..921acaaa1601 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,7 +8,6 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
-	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
@@ -85,7 +84,6 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-	CPUHP_AP_PERF_X86_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
-- 
cgit v1.2.3


From db4545d9a7881db0a7e18599e6cd1adbcb93db33 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Tue, 31 Jan 2017 13:21:34 +0000
Subject: x86/efi: Deduplicate efi_char16_printk()

Eliminate the separate 32-bit and 64x- bit code paths by way of the shiny
new efi_call_proto() macro.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1485868902-20401-3-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5b1af30ece55..6642c4d9d11d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1240,17 +1240,17 @@ struct efivar_entry {
 	bool deleting;
 };
 
-struct efi_simple_text_output_protocol_32 {
+typedef struct {
 	u32 reset;
 	u32 output_string;
 	u32 test_string;
-};
+} efi_simple_text_output_protocol_32_t;
 
-struct efi_simple_text_output_protocol_64 {
+typedef struct {
 	u64 reset;
 	u64 output_string;
 	u64 test_string;
-};
+} efi_simple_text_output_protocol_64_t;
 
 struct efi_simple_text_output_protocol {
 	void *reset;
-- 
cgit v1.2.3


From a19ebf59e20880c87dd49b6336476307559ac5ba Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Tue, 31 Jan 2017 13:21:36 +0000
Subject: efi: Introduce the EFI_MEM_ATTR bit and set it from the memory
 attributes table

UEFI v2.6 introduces a configuration table called
EFI_MEMORY_ATTRIBUTES_TABLE which provides additional information about
EFI runtime regions. Currently this table describes memory protections
that may be applied to the EFI Runtime code and data regions by the kernel.

Allocate a EFI_XXX bit to keep track of whether this feature is
published by firmware or not.

Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Lee, Chun-Yi <jlee@suse.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1485868902-20401-5-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6642c4d9d11d..5f632bf9969d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1065,6 +1065,7 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_ARCH_1		7	/* First arch-specific bit */
 #define EFI_DBG			8	/* Print additional debug info at runtime */
 #define EFI_NX_PE_DATA		9	/* Can runtime data regions be mapped non-executable? */
+#define EFI_MEM_ATTR		10	/* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */
 
 #ifdef CONFIG_EFI
 /*
-- 
cgit v1.2.3


From c4c39c70c5fef43655019236bec8ba5e7273b868 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 31 Jan 2017 13:21:39 +0000
Subject: efi: Use typed function pointers for the runtime services table

Instead of using void pointers, and casting them to correctly typed
function pointers upon use, declare the runtime services pointers
as function pointers using their respective prototypes, for which
typedefs are already available.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1485868902-20401-8-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f632bf9969d..85e9fdaa8d07 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -509,24 +509,6 @@ typedef struct {
 	u64 query_variable_info;
 } efi_runtime_services_64_t;
 
-typedef struct {
-	efi_table_hdr_t hdr;
-	void *get_time;
-	void *set_time;
-	void *get_wakeup_time;
-	void *set_wakeup_time;
-	void *set_virtual_address_map;
-	void *convert_pointer;
-	void *get_variable;
-	void *get_next_variable;
-	void *set_variable;
-	void *get_next_high_mono_count;
-	void *reset_system;
-	void *update_capsule;
-	void *query_capsule_caps;
-	void *query_variable_info;
-} efi_runtime_services_t;
-
 typedef efi_status_t efi_get_time_t (efi_time_t *tm, efi_time_cap_t *tc);
 typedef efi_status_t efi_set_time_t (efi_time_t *tm);
 typedef efi_status_t efi_get_wakeup_time_t (efi_bool_t *enabled, efi_bool_t *pending,
@@ -561,6 +543,24 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes,
 						unsigned long size,
 						bool nonblocking);
 
+typedef struct {
+	efi_table_hdr_t			hdr;
+	efi_get_time_t			*get_time;
+	efi_set_time_t			*set_time;
+	efi_get_wakeup_time_t		*get_wakeup_time;
+	efi_set_wakeup_time_t		*set_wakeup_time;
+	efi_set_virtual_address_map_t	*set_virtual_address_map;
+	void				*convert_pointer;
+	efi_get_variable_t		*get_variable;
+	efi_get_next_variable_t		*get_next_variable;
+	efi_set_variable_t		*set_variable;
+	efi_get_next_high_mono_count_t	*get_next_high_mono_count;
+	efi_reset_system_t		*reset_system;
+	efi_update_capsule_t		*update_capsule;
+	efi_query_capsule_caps_t	*query_capsule_caps;
+	efi_query_variable_info_t	*query_variable_info;
+} efi_runtime_services_t;
+
 void efi_native_runtime_setup(void);
 
 /*
-- 
cgit v1.2.3


From 7b0a911478c74ca02581d496f732c10e811e894f Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Tue, 31 Jan 2017 13:21:40 +0000
Subject: efi/x86: Move the EFI BGRT init code to early init code

Before invoking the arch specific handler, efi_mem_reserve() reserves
the given memory region through memblock.

efi_bgrt_init() will call efi_mem_reserve() after mm_init(), at which
time memblock is dead and should not be used anymore.

The EFI BGRT code depends on ACPI initialization to get the BGRT ACPI
table, so move parsing of the BGRT table to ACPI early boot code to
ensure that efi_mem_reserve() in EFI BGRT code still use memblock safely.

Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-acpi@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1485868902-20401-9-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi-bgrt.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h
index 051b21fedf68..2fd3993c370b 100644
--- a/include/linux/efi-bgrt.h
+++ b/include/linux/efi-bgrt.h
@@ -1,20 +1,19 @@
 #ifndef _LINUX_EFI_BGRT_H
 #define _LINUX_EFI_BGRT_H
 
-#ifdef CONFIG_ACPI_BGRT
-
 #include <linux/acpi.h>
 
-void efi_bgrt_init(void);
+#ifdef CONFIG_ACPI_BGRT
+
+void efi_bgrt_init(struct acpi_table_header *table);
 
 /* The BGRT data itself; only valid if bgrt_image != NULL. */
-extern void *bgrt_image;
 extern size_t bgrt_image_size;
-extern struct acpi_table_bgrt *bgrt_tab;
+extern struct acpi_table_bgrt bgrt_tab;
 
 #else /* !CONFIG_ACPI_BGRT */
 
-static inline void efi_bgrt_init(void) {}
+static inline void efi_bgrt_init(struct acpi_table_header *table) {}
 
 #endif /* !CONFIG_ACPI_BGRT */
 
-- 
cgit v1.2.3


From 07e5f5e353aaa61696c8353d87050994a0c4648a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:17 +0100
Subject: time: Introduce jiffies64_to_nsecs()

This will be needed for the cputime_t to nsec conversion.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jiffies.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 589d14e970ad..624215cebee5 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -293,6 +293,8 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
 	return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
 }
 
+extern u64 jiffies64_to_nsecs(u64 j);
+
 extern unsigned long __msecs_to_jiffies(const unsigned int m);
 #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
 /*
-- 
cgit v1.2.3


From ba03ce822db234f8acb559de4a317a5c1f95c029 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:18 +0100
Subject: sched/cputime: Remove the unused INIT_CPUTIME macro

It's a leftover from removed code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 69e6852fede1..5f60aed37701 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -620,13 +620,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#define INIT_CPUTIME	\
-	(struct task_cputime) {					\
-		.utime = 0,					\
-		.stime = 0,					\
-		.sum_exec_runtime = 0,				\
-	}
-
 /*
  * This is the atomic variant of task_cputime, which can be used for
  * storing and updating task_cputime statistics without locking.
-- 
cgit v1.2.3


From 16a6d9be90373fb0b521850cd0185a4d460dd152 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:21 +0100
Subject: sched/cputime: Convert guest time accounting to nsecs (u64)

cputime_t is being obsolete and replaced by nsecs units in order to make
internal timestamps less opaque and more granular.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-6-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f60aed37701..252ff25983c8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -774,8 +774,8 @@ struct signal_struct {
 	 */
 	seqlock_t stats_lock;
 	cputime_t utime, stime, cutime, cstime;
-	cputime_t gtime;
-	cputime_t cgtime;
+	u64 gtime;
+	u64 cgtime;
 	struct prev_cputime prev_cputime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@ -1658,7 +1658,7 @@ struct task_struct {
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 	cputime_t utimescaled, stimescaled;
 #endif
-	cputime_t gtime;
+	u64 gtime;
 	struct prev_cputime prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	seqcount_t vtime_seqcount;
@@ -2254,7 +2254,7 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void task_cputime(struct task_struct *t,
 			 cputime_t *utime, cputime_t *stime);
-extern cputime_t task_gtime(struct task_struct *t);
+extern u64 task_gtime(struct task_struct *t);
 #else
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
@@ -2263,7 +2263,7 @@ static inline void task_cputime(struct task_struct *t,
 	*stime = t->stime;
 }
 
-static inline cputime_t task_gtime(struct task_struct *t)
+static inline u64 task_gtime(struct task_struct *t)
 {
 	return t->gtime;
 }
-- 
cgit v1.2.3


From a1cecf2ba78e0a6de00ff99df34b662728535aa5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:22 +0100
Subject: sched/cputime: Introduce special task_cputime_t() API to return
 old-typed cputime

This API returns a task's cputime in cputime_t in order to ease the
conversion of cputime internals to use nsecs units instead. Blindly
converting all cputime readers to use this API now will later let us
convert more smoothly and step by step all these places to use the
new nsec based cputime.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-7-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 252ff25983c8..9cc722f77799 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -615,6 +615,13 @@ struct task_cputime {
 	unsigned long long sum_exec_runtime;
 };
 
+/* Temporary type to ease cputime_t to nsecs conversion */
+struct task_cputime_t {
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long long sum_exec_runtime;
+};
+
 /* Alternate field names when used to cache expirations. */
 #define virt_exp	utime
 #define prof_exp	stime
@@ -748,7 +755,7 @@ struct signal_struct {
 	struct thread_group_cputimer cputimer;
 
 	/* Earliest-expiration cache. */
-	struct task_cputime cputime_expires;
+	struct task_cputime_t cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
@@ -1682,7 +1689,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-	struct task_cputime cputime_expires;
+	struct task_cputime_t cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -2286,6 +2293,19 @@ static inline void task_cputime_scaled(struct task_struct *t,
 }
 #endif
 
+static inline void task_cputime_t(struct task_struct *t,
+				  cputime_t *utime, cputime_t *stime)
+{
+	task_cputime(t, utime, stime);
+}
+
+static inline void task_cputime_t_scaled(struct task_struct *t,
+					 cputime_t *utimescaled,
+					 cputime_t *stimescaled)
+{
+	task_cputime_scaled(t, utimescaled, stimescaled);
+}
+
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
@@ -3499,7 +3519,13 @@ static __always_inline bool need_resched(void)
  * Thread group CPU time accounting.
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
+
+static inline void thread_group_cputime_t(struct task_struct *tsk,
+					  struct task_cputime_t *times)
+{
+	thread_group_cputime(tsk, (struct task_cputime *)times);
+}
 
 /*
  * Reevaluate whether the task has signals pending delivery.
-- 
cgit v1.2.3


From 5613fda9a503cd6137b120298902a34a1386b2c1 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:23 +0100
Subject: sched/cputime: Convert task/group cputime to nsecs

Now that most cputime readers use the transition API which return the
task cputime in old style cputime_t, we can safely store the cputime in
nsecs. This will eventually make cputime statistics less opaque and more
granular. Back and forth convertions between cputime_t and nsecs in order
to deal with cputime_t random granularity won't be needed anymore.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-8-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 55 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9cc722f77799..b7ccc54b35cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -585,8 +585,8 @@ struct cpu_itimer {
  */
 struct prev_cputime {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	cputime_t utime;
-	cputime_t stime;
+	u64 utime;
+	u64 stime;
 	raw_spinlock_t lock;
 #endif
 };
@@ -601,8 +601,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev)
 
 /**
  * struct task_cputime - collected CPU time counts
- * @utime:		time spent in user mode, in &cputime_t units
- * @stime:		time spent in kernel mode, in &cputime_t units
+ * @utime:		time spent in user mode, in nanoseconds
+ * @stime:		time spent in kernel mode, in nanoseconds
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
  *
  * This structure groups together three kinds of CPU time that are tracked for
@@ -610,8 +610,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev)
  * these counts together and treat all three of them in parallel.
  */
 struct task_cputime {
-	cputime_t utime;
-	cputime_t stime;
+	u64 utime;
+	u64 stime;
 	unsigned long long sum_exec_runtime;
 };
 
@@ -780,7 +780,7 @@ struct signal_struct {
 	 * in __exit_signal, except for the group leader.
 	 */
 	seqlock_t stats_lock;
-	cputime_t utime, stime, cutime, cstime;
+	u64 utime, stime, cutime, cstime;
 	u64 gtime;
 	u64 cgtime;
 	struct prev_cputime prev_cputime;
@@ -1661,9 +1661,9 @@ struct task_struct {
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
-	cputime_t utime, stime;
+	u64 utime, stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-	cputime_t utimescaled, stimescaled;
+	u64 utimescaled, stimescaled;
 #endif
 	u64 gtime;
 	struct prev_cputime prev_cputime;
@@ -2260,11 +2260,11 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void task_cputime(struct task_struct *t,
-			 cputime_t *utime, cputime_t *stime);
+			 u64 *utime, u64 *stime);
 extern u64 task_gtime(struct task_struct *t);
 #else
 static inline void task_cputime(struct task_struct *t,
-				cputime_t *utime, cputime_t *stime)
+				u64 *utime, u64 *stime)
 {
 	*utime = t->utime;
 	*stime = t->stime;
@@ -2278,16 +2278,16 @@ static inline u64 task_gtime(struct task_struct *t)
 
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 static inline void task_cputime_scaled(struct task_struct *t,
-				       cputime_t *utimescaled,
-				       cputime_t *stimescaled)
+				       u64 *utimescaled,
+				       u64 *stimescaled)
 {
 	*utimescaled = t->utimescaled;
 	*stimescaled = t->stimescaled;
 }
 #else
 static inline void task_cputime_scaled(struct task_struct *t,
-				       cputime_t *utimescaled,
-				       cputime_t *stimescaled)
+				       u64 *utimescaled,
+				       u64 *stimescaled)
 {
 	task_cputime(t, utimescaled, stimescaled);
 }
@@ -2296,18 +2296,26 @@ static inline void task_cputime_scaled(struct task_struct *t,
 static inline void task_cputime_t(struct task_struct *t,
 				  cputime_t *utime, cputime_t *stime)
 {
-	task_cputime(t, utime, stime);
+	u64 ut, st;
+
+	task_cputime(t, &ut, &st);
+	*utime = nsecs_to_cputime(ut);
+	*stime = nsecs_to_cputime(st);
 }
 
 static inline void task_cputime_t_scaled(struct task_struct *t,
 					 cputime_t *utimescaled,
 					 cputime_t *stimescaled)
 {
-	task_cputime_scaled(t, utimescaled, stimescaled);
+	u64 ut, st;
+
+	task_cputime_scaled(t, &ut, &st);
+	*utimescaled = nsecs_to_cputime(ut);
+	*stimescaled = nsecs_to_cputime(st);
 }
 
-extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
 
 /*
  * Per process flags
@@ -3522,9 +3530,14 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
 
 static inline void thread_group_cputime_t(struct task_struct *tsk,
-					  struct task_cputime_t *times)
+					  struct task_cputime_t *cputime)
 {
-	thread_group_cputime(tsk, (struct task_cputime *)times);
+	struct task_cputime times;
+
+	thread_group_cputime(tsk, &times);
+	cputime->utime = nsecs_to_cputime(times.utime);
+	cputime->stime = nsecs_to_cputime(times.stime);
+	cputime->sum_exec_runtime = times.sum_exec_runtime;
 }
 
 /*
-- 
cgit v1.2.3


From cd19c364b313c179410fcac8376330964cc9bfd9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:27 +0100
Subject: fs/binfmt: Convert obsolete cputime type to nsecs

Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-12-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compat.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 63609398ef9f..9e40be522793 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -731,7 +731,25 @@ asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
 static inline bool in_compat_syscall(void) { return is_compat_task(); }
 #endif
 
-#else
+/**
+ * ns_to_compat_timeval - Compat version of ns_to_timeval
+ * @nsec:	the nanoseconds value to be converted
+ *
+ * Returns the compat_timeval representation of the nsec parameter.
+ */
+static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
+{
+	struct timeval tv;
+	struct compat_timeval ctv;
+
+	tv = ns_to_timeval(nsec);
+	ctv.tv_sec = tv.tv_sec;
+	ctv.tv_usec = tv.tv_usec;
+
+	return ctv;
+}
+
+#else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
 static inline bool in_compat_syscall(void) { return false; }
-- 
cgit v1.2.3


From d4bc42af73bf297f7182ed978b19850553242195 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:28 +0100
Subject: acct: Convert obsolete cputime type to nsecs

Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-13-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7ccc54b35cc..75fc773c158d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -563,7 +563,7 @@ struct pacct_struct {
 	int			ac_flag;
 	long			ac_exitcode;
 	unsigned long		ac_mem;
-	cputime_t		ac_utime, ac_stime;
+	u64			ac_utime, ac_stime;
 	unsigned long		ac_minflt, ac_majflt;
 };
 
-- 
cgit v1.2.3


From 605dc2b31a2ab235570107cb650036b41e741165 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:30 +0100
Subject: tsacct: Convert obsolete cputime type to nsecs

Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-15-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75fc773c158d..dc44366128d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1815,7 +1815,7 @@ struct task_struct {
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	cputime_t acct_timexpd;	/* stime + utime since last update */
+	u64 acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
 	nodemask_t mems_allowed;	/* Protected by alloc_lock */
-- 
cgit v1.2.3


From ebd7e7fc4bc63be5eaf9da903b8060b02dd711ea Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:34 +0100
Subject: timers/posix-timers: Convert internals to use nsecs

Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.

Also convert posix-cpu-timers to use nsec based internal counters to
simplify it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-19-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/posix-timers.h | 12 +-----------
 include/linux/sched.h        |  6 +++---
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 62d44c176071..890de52362d0 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -8,19 +8,9 @@
 #include <linux/alarmtimer.h>
 
 
-static inline unsigned long long cputime_to_expires(cputime_t expires)
-{
-	return (__force unsigned long long)expires;
-}
-
-static inline cputime_t expires_to_cputime(unsigned long long expires)
-{
-	return (__force cputime_t)expires;
-}
-
 struct cpu_timer_list {
 	struct list_head entry;
-	unsigned long long expires, incr;
+	u64 expires, incr;
 	struct task_struct *task;
 	int firing;
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc44366128d8..baa6a2834e0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -755,7 +755,7 @@ struct signal_struct {
 	struct thread_group_cputimer cputimer;
 
 	/* Earliest-expiration cache. */
-	struct task_cputime_t cputime_expires;
+	struct task_cputime cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
@@ -1689,7 +1689,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-	struct task_cputime_t cputime_expires;
+	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -3527,7 +3527,7 @@ static __always_inline bool need_resched(void)
  * Thread group CPU time accounting.
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
 static inline void thread_group_cputime_t(struct task_struct *tsk,
 					  struct task_cputime_t *cputime)
-- 
cgit v1.2.3


From 858cf3a8c59968e7c5f7c1a1192459a0d52d1ab4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:35 +0100
Subject: timers/itimer: Convert internal cputime_t units to nsec

Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.

Also convert itimers to use nsec based internal counters. This simplifies
it and removes the whole game with error/inc_error which served to deal
with cputime_t random granularity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-20-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/posix-timers.h | 2 +-
 include/linux/sched.h        | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 890de52362d0..64aa189efe21 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -119,7 +119,7 @@ void run_posix_cpu_timers(struct task_struct *task);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
-			   cputime_t *newval, cputime_t *oldval);
+			   u64 *newval, u64 *oldval);
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index baa6a2834e0f..268fdd713089 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -568,10 +568,8 @@ struct pacct_struct {
 };
 
 struct cpu_itimer {
-	cputime_t expires;
-	cputime_t incr;
-	u32 error;
-	u32 incr_error;
+	u64 expires;
+	u64 incr;
 };
 
 /**
-- 
cgit v1.2.3


From 71ea47b197de9a53bc3747a8b1c667df9c6a4c68 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:36 +0100
Subject: sched/cputime: Remove temporary cputime_t accessors

Now that the whole cputime conversion to nsec units is complete, we
can remove the compatibility accessors.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-21-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 268fdd713089..d17645402767 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -29,7 +29,6 @@ struct sched_param {
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <linux/cputime.h>
 
 #include <linux/smp.h>
 #include <linux/sem.h>
@@ -613,13 +612,6 @@ struct task_cputime {
 	unsigned long long sum_exec_runtime;
 };
 
-/* Temporary type to ease cputime_t to nsecs conversion */
-struct task_cputime_t {
-	cputime_t utime;
-	cputime_t stime;
-	unsigned long long sum_exec_runtime;
-};
-
 /* Alternate field names when used to cache expirations. */
 #define virt_exp	utime
 #define prof_exp	stime
@@ -2291,27 +2283,6 @@ static inline void task_cputime_scaled(struct task_struct *t,
 }
 #endif
 
-static inline void task_cputime_t(struct task_struct *t,
-				  cputime_t *utime, cputime_t *stime)
-{
-	u64 ut, st;
-
-	task_cputime(t, &ut, &st);
-	*utime = nsecs_to_cputime(ut);
-	*stime = nsecs_to_cputime(st);
-}
-
-static inline void task_cputime_t_scaled(struct task_struct *t,
-					 cputime_t *utimescaled,
-					 cputime_t *stimescaled)
-{
-	u64 ut, st;
-
-	task_cputime_scaled(t, &ut, &st);
-	*utimescaled = nsecs_to_cputime(ut);
-	*stimescaled = nsecs_to_cputime(st);
-}
-
 extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
 
@@ -3527,17 +3498,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-static inline void thread_group_cputime_t(struct task_struct *tsk,
-					  struct task_cputime_t *cputime)
-{
-	struct task_cputime times;
-
-	thread_group_cputime(tsk, &times);
-	cputime->utime = nsecs_to_cputime(times.utime);
-	cputime->stime = nsecs_to_cputime(times.stime);
-	cputime->sum_exec_runtime = times.sum_exec_runtime;
-}
-
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
-- 
cgit v1.2.3


From 23244a5c8003d4154161a8289a7d3783b0237c08 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:37 +0100
Subject: sched/cputime: Push time to account_user_time() in nsecs

This is one more step toward converting cputime accounting to pure nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-22-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c3e38ded2d73..b716001ac23e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,7 +78,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
-extern void account_user_time(struct task_struct *, cputime_t);
+extern void account_user_time(struct task_struct *, u64);
 extern void account_guest_time(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_system_index_time(struct task_struct *, cputime_t,
-- 
cgit v1.2.3


From be9095ed4fb3cf69e9fdf64e28ff6b5bd0ec7215 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:38 +0100
Subject: sched/cputime: Push time to account_steal_time() in nsecs

This is one more step toward converting cputime accounting to pure nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-23-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b716001ac23e..1d55d10abf9d 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -83,7 +83,7 @@ extern void account_guest_time(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_system_index_time(struct task_struct *, cputime_t,
 				      enum cpu_usage_stat);
-extern void account_steal_time(cputime_t);
+extern void account_steal_time(u64);
 extern void account_idle_time(cputime_t);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-- 
cgit v1.2.3


From 18b43a9bd7ae91185e398dd983fb4fffb9e81b3a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:39 +0100
Subject: sched/cputime: Push time to account_idle_time() in nsecs

This is one more step toward converting cputime accounting to pure nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-24-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 1d55d10abf9d..e1cd8970e096 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -84,7 +84,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_system_index_time(struct task_struct *, cputime_t,
 				      enum cpu_usage_stat);
 extern void account_steal_time(u64);
-extern void account_idle_time(cputime_t);
+extern void account_idle_time(u64);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
-- 
cgit v1.2.3


From fb8b049c988f1ff460b063b8a41ea9a3c79921c2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:40 +0100
Subject: sched/cputime: Push time to account_system_time() in nsecs

This is one more step toward converting cputime accounting to pure nsecs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-25-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index e1cd8970e096..66be8b6beceb 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -9,7 +9,6 @@
 #include <linux/sched.h>
 #include <linux/vtime.h>
 #include <asm/irq.h>
-#include <linux/cputime.h>
 
 /*
  * 'kernel_stat.h' contains the definitions needed for doing
@@ -79,9 +78,9 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 }
 
 extern void account_user_time(struct task_struct *, u64);
-extern void account_guest_time(struct task_struct *, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t);
-extern void account_system_index_time(struct task_struct *, cputime_t,
+extern void account_guest_time(struct task_struct *, u64);
+extern void account_system_time(struct task_struct *, int, u64);
+extern void account_system_index_time(struct task_struct *, u64,
 				      enum cpu_usage_stat);
 extern void account_steal_time(u64);
 extern void account_idle_time(u64);
-- 
cgit v1.2.3


From 934ad473552a48d303a54279518aa19cf674567d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:50 +0100
Subject: sched/cputime: Remove unused nsec_to_cputime()

It's unused now.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-35-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cputime.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cputime.h b/include/linux/cputime.h
index f2eb2ee535ca..a257d6690621 100644
--- a/include/linux/cputime.h
+++ b/include/linux/cputime.h
@@ -8,9 +8,4 @@
 	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
 #endif
 
-#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs)	\
-	usecs_to_cputime((__nsecs) / NSEC_PER_USEC)
-#endif
-
 #endif /* __LINUX_CPUTIME_H */
-- 
cgit v1.2.3


From b672592f022152155fde7db99aafbcf04a2c3ba5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 31 Jan 2017 04:09:51 +0100
Subject: sched/cputime: Remove generic asm headers

cputime_t is now only used by two architectures:

	* powerpc (when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y)
	* s390

And since the core doesn't use it anymore, we don't need any arch support
from the others. So we can remove their stub implementations.

A final cleanup would be to provide an efficient pure arch
implementation of cputime_to_nsec() for s390 and powerpc and finally
remove include/linux/cputime.h .

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Link: http://lkml.kernel.org/r/1485832191-26889-36-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cputime.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cputime.h b/include/linux/cputime.h
index a257d6690621..a691dc4ddc13 100644
--- a/include/linux/cputime.h
+++ b/include/linux/cputime.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_CPUTIME_H
 #define __LINUX_CPUTIME_H
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm/cputime.h>
 
 #ifndef cputime_to_nsecs
@@ -8,4 +9,5 @@
 	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
 #endif
 
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #endif /* __LINUX_CPUTIME_H */
-- 
cgit v1.2.3


From 7e1f9467d1e48c64c27d6a32de1bfd1b9cdb1002 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Sun, 29 Jan 2017 07:42:12 -0800
Subject: sched/wait, rcuwait: Fix typo in comment

Forgot to update the comment after renaming the call.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Link: http://lkml.kernel.org/r/1485704532-9290-1-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rcuwait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 0e93d56c7ab2..a4ede51b3e7c 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -47,7 +47,7 @@ extern void rcuwait_wake_up(struct rcuwait *w);
 	for (;;) {							\
 		/*							\
 		 * Implicit barrier (A) pairs with (B) in		\
-		 * rcuwait_trywake().					\
+		 * rcuwait_wake_up().					\
 		 */							\
 		set_current_state(TASK_UNINTERRUPTIBLE);		\
 		if (condition)						\
-- 
cgit v1.2.3


From 0754445d71c37a7afd4f0790a9be4cf53c1b8cc4 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Sun, 29 Jan 2017 07:15:31 -0800
Subject: sched/wake_q: Clarify queue reinit comment

As of:

  bcc9a76d5ac ("locking/rwsem: Reinit wake_q after use")

the comment regarding the list reinitialization no longer applies,
update it with the new wake_q_init() helper.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Waiman Long <longman@redhat.com>
Cc: peterz@infradead.org
Cc: longman@redhat.com
Cc: akpm@linux-foundation.org
Cc: paulmck@linux.vnet.ibm.com
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20170129151531.GA2444@linux-80c1.suse
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4e62b378bd65..1cc0dede2122 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1000,8 +1000,8 @@ enum cpu_idle_type {
  *
  * The DEFINE_WAKE_Q macro declares and initializes the list head.
  * wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function, where the fact that the queue is
- * not used again will be easy to see by inspection.
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
  *
  * Note that this can cause spurious wakeups. schedule() callers
  * must ensure the call is done inside a loop, confirming that the
-- 
cgit v1.2.3


From 733ce725aa4bfa9063be053bfe7f4597d76f0dd1 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Fri, 27 Jan 2017 12:38:16 -0500
Subject: sched/clock: Add dummy clear_sched_clock_stable() stub function

In commit:

  acb04058de494 ("sched/clock: Fix hotplug crash")

the PARISC code gained a call to this function.

However the prototype for it is within a CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
 #ifdef/#endif.  That, combined with this:

  arch/parisc/Kconfig:    select HAVE_UNSTABLE_SCHED_CLOCK if SMP

means that PARISC can have it either enabled or disabled, resulting
in the following build fail:

  arch/parisc/kernel/setup.c:180:2: error: implicit declaration of
  function 'clear_sched_clock_stable' [-Werror=implicit-function-declaration]

Add a no-op stub for the non-SMP case to prevent this.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: acb04058de49 ("sched/clock: Fix hotplug crash")
Link: http://lkml.kernel.org/r/20170127173816.22733-1-paul.gortmaker@windriver.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d17645402767..e2ed46d3ed71 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2511,6 +2511,10 @@ static inline void sched_clock_tick(void)
 {
 }
 
+static inline void clear_sched_clock_stable(void)
+{
+}
+
 static inline void sched_clock_idle_sleep_event(void)
 {
 }
-- 
cgit v1.2.3


From 975e155ed8732cb81f55c021c441ae662dd040b5 Mon Sep 17 00:00:00 2001
From: Shile Zhang <shile.zhang@nokia.com>
Date: Sat, 28 Jan 2017 22:00:49 +0800
Subject: sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning
 knob in milliseconds

We added the 'sched_rr_timeslice_ms' SCHED_RR tuning knob in this commit:

  ce0dbbbb30ae ("sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice")

... which name suggests to users that it's in milliseconds, while in reality
it's being set in milliseconds but the result is shown in jiffies.

This is obviously confusing when HZ is not 1000, it makes it appear like the
value set failed, such as HZ=100:

  root# echo 100 > /proc/sys/kernel/sched_rr_timeslice_ms
  root# cat /proc/sys/kernel/sched_rr_timeslice_ms
  10

Fix this to be milliseconds all around.

Signed-off-by: Shile Zhang <shile.zhang@nokia.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1485612049-20923-1-git-send-email-shile.zhang@nokia.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 441145351301..49308e142aae 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -59,6 +59,7 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 extern unsigned int sysctl_sched_autogroup_enabled;
 #endif
 
+extern int sysctl_sched_rr_timeslice;
 extern int sched_rr_timeslice;
 
 extern int sched_rr_handler(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 1 Feb 2017 06:06:16 +1300
Subject: fs: Better permission checking for submounts

To support unprivileged users mounting filesystems two permission
checks have to be performed: a test to see if the user allowed to
create a mount in the mount namespace, and a test to see if
the user is allowed to access the specified filesystem.

The automount case is special in that mounting the original filesystem
grants permission to mount the sub-filesystems, to any user who
happens to stumble across the their mountpoint and satisfies the
ordinary filesystem permission checks.

Attempting to handle the automount case by using override_creds
almost works.  It preserves the idea that permission to mount
the original filesystem is permission to mount the sub-filesystem.
Unfortunately using override_creds messes up the filesystems
ordinary permission checks.

Solve this by being explicit that a mount is a submount by introducing
vfs_submount, and using it where appropriate.

vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let
sget and friends know that a mount is a submount so they can take appropriate
action.

sget and sget_userns are modified to not perform any permission checks
on submounts.

follow_automount is modified to stop using override_creds as that
has proven problemantic.

do_mount is modified to always remove the new MS_SUBMOUNT flag so
that we know userspace will never by able to specify it.

autofs4 is modified to stop using current_real_cred that was put in
there to handle the previous version of submount permission checking.

cifs is modified to pass the mountpoint all of the way down to vfs_submount.

debugfs is modified to pass the mountpoint all of the way down to
trace_automount by adding a new parameter.  To make this change easier
a new typedef debugfs_automount_t is introduced to capture the type of
the debugfs automount function.

Cc: stable@vger.kernel.org
Fixes: 069d5ac9ae0d ("autofs:  Fix automounts by using current_real_cred()->uid")
Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds")
Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/debugfs.h | 3 ++-
 include/linux/mount.h   | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 014cc564d1c4..233006be30aa 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -97,9 +97,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
 struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 				      const char *dest);
 
+typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
 struct dentry *debugfs_create_automount(const char *name,
 					struct dentry *parent,
-					struct vfsmount *(*f)(void *),
+					debugfs_automount_t f,
 					void *data);
 
 void debugfs_remove(struct dentry *dentry);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index c6f55158d5e5..8e0352af06b7 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -90,6 +90,9 @@ struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data);
+extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
+				     struct file_system_type *type,
+				     const char *name, void *data);
 
 extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
-- 
cgit v1.2.3


From 1a2a14444d32b89b28116daea86f63ced1716668 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dmichail@google.com>
Date: Tue, 31 Jan 2017 16:03:13 -0800
Subject: net: fix ndo_features_check/ndo_fix_features comment ordering

Commit cdba756f5803a2 ("net: move ndo_features_check() close to
ndo_start_xmit()") inadvertently moved the doc comment for
.ndo_fix_features instead of .ndo_features_check. Fix the comment
ordering.

Fixes: cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()")
Signed-off-by: Dimitris Michailidis <dmichail@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9bde9558b596..70ad0291d517 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -866,11 +866,15 @@ struct netdev_xdp {
  *	of useless work if you return NETDEV_TX_BUSY.
  *	Required; cannot be NULL.
  *
- * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
- *		netdev_features_t features);
- *	Adjusts the requested feature flags according to device-specific
- *	constraints, and returns the resulting flags. Must not modify
- *	the device state.
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ *					   struct net_device *dev
+ *					   netdev_features_t features);
+ *	Called by core transmit path to determine if device is capable of
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
@@ -1028,6 +1032,12 @@ struct netdev_xdp {
  *	Called to release previously enslaved netdev.
  *
  *      Feature/offload setting functions.
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ *		netdev_features_t features);
+ *	Adjusts the requested feature flags according to device-specific
+ *	constraints, and returns the resulting flags. Must not modify
+ *	the device state.
+ *
  * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
  *	Called to update device configuration to new features. Passed
  *	feature set might be less than what was returned by ndo_fix_features()).
@@ -1100,15 +1110,6 @@ struct netdev_xdp {
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
- *					   struct net_device *dev
- *					   netdev_features_t features);
- *	Called by core transmit path to determine if device is capable of
- *	performing offload operations on a given packet. This is to give
- *	the device an opportunity to implement any restrictions that cannot
- *	be otherwise expressed by feature flags. The check is called with
- *	the set of features that the stack has calculated and it returns
- *	those the driver believes to be appropriate.
  * int (*ndo_set_tx_maxrate)(struct net_device *dev,
  *			     int queue_index, u32 maxrate);
  *	Called when a user wants to set a max-rate limitation of specific
-- 
cgit v1.2.3


From cb9c68363efb6d1f950ec55fb06e031ee70db5fc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 23 Jan 2017 18:21:56 +0100
Subject: skbuff: add and use skb_nfct helper

Followup patch renames skb->nfct and changes its type so add a helper to
avoid intrusive rename change later.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cfd417e..276431e047af 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3553,6 +3553,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
 	skb->csum = csum_add(skb->csum, delta);
 }
 
+static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	return skb->nfct;
+#else
+	return NULL;
+#endif
+}
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
@@ -3652,9 +3661,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_XFRM)
 		!skb->sp &&
 #endif
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-		!skb->nfct &&
-#endif
+		!skb_nfct(skb) &&
 		!skb->_skb_refdst &&
 		!skb_has_frag_list(skb);
 }
-- 
cgit v1.2.3


From a9e419dc7be6997409dca6d1b9daf3cc7046902f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 23 Jan 2017 18:21:59 +0100
Subject: netfilter: merge ctinfo into nfct pointer storage area

After this change conntrack operations (lookup, creation, matching from
ruleset) only access one instead of two sk_buff cache lines.

This works for normal conntracks because those are allocated from a slab
that guarantees hw cacheline or 8byte alignment (whatever is larger)
so the 3 bits needed for ctinfo won't overlap with nf_conn addresses.

Template allocation now does manual address alignment (see previous change)
on arches that don't have sufficent kmalloc min alignment.

Some spots intentionally use skb->_nfct instead of skb_nfct() helpers,
this is to avoid undoing the skb_nfct() use when we remove untracked
conntrack object in the future.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 276431e047af..ac0bc085b139 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@ip_summed: Driver fed us an IP checksum
  *	@nohdr: Payload reference only, must not modify header
- *	@nfctinfo: Relationship of this skb to the connection
  *	@pkt_type: Packet class
  *	@fclone: skbuff clone status
  *	@ipvs_property: skbuff is owned by ipvs
@@ -594,7 +593,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@nf_trace: netfilter packet trace flag
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
- *	@nfct: Associated connection, if any
+ *	@_nfct: Associated connection, if any (with nfctinfo bits)
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
  *	@tc_index: Traffic control index
@@ -668,7 +667,7 @@ struct sk_buff {
 	struct	sec_path	*sp;
 #endif
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	struct nf_conntrack	*nfct;
+	unsigned long		 _nfct;
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info	*nf_bridge;
@@ -721,7 +720,6 @@ struct sk_buff {
 	__u8			pkt_type:3;
 	__u8			pfmemalloc:1;
 	__u8			ignore_df:1;
-	__u8			nfctinfo:3;
 
 	__u8			nf_trace:1;
 	__u8			ip_summed:2;
@@ -836,6 +834,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 #define SKB_DST_NOREF	1UL
 #define SKB_DST_PTRMASK	~(SKB_DST_NOREF)
 
+#define SKB_NFCT_PTRMASK	~(7UL)
 /**
  * skb_dst - returns skb dst_entry
  * @skb: buffer
@@ -3556,7 +3555,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
 static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-	return skb->nfct;
+	return (void *)(skb->_nfct & SKB_NFCT_PTRMASK);
 #else
 	return NULL;
 #endif
@@ -3590,8 +3589,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 static inline void nf_reset(struct sk_buff *skb)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	nf_conntrack_put(skb->nfct);
-	skb->nfct = NULL;
+	nf_conntrack_put(skb_nfct(skb));
+	skb->_nfct = 0;
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(skb->nf_bridge);
@@ -3611,10 +3610,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
 			     bool copy)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	dst->nfct = src->nfct;
-	nf_conntrack_get(src->nfct);
-	if (copy)
-		dst->nfctinfo = src->nfctinfo;
+	dst->_nfct = src->_nfct;
+	nf_conntrack_get(skb_nfct(src));
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	dst->nf_bridge  = src->nf_bridge;
@@ -3629,7 +3626,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
 static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	nf_conntrack_put(dst->nfct);
+	nf_conntrack_put(skb_nfct(dst));
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(dst->nf_bridge);
-- 
cgit v1.2.3


From f44f1ab5a2dcd4e16eab850fd08e40ff2d0c28d4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Feb 2017 15:56:49 +0100
Subject: block: Unhash block device inodes on gendisk destruction

Currently, block device inodes stay around after corresponding gendisk
hash died until memory reclaim finds them and frees them. Since we will
make block device inode pin the bdi, we want to free the block device
inode as soon as the device goes away so that bdi does not stay around
unnecessarily. Furthermore we need to avoid issues when new device with
the same major,minor pair gets created since reusing the bdi structure
would be rather difficult in this case.

Unhashing block device inode on gendisk destruction nicely deals with
these problems. Once last block device inode reference is dropped (which
may be directly in del_gendisk()), the inode gets evicted. Furthermore if
the major,minor pair gets reallocated, we are guaranteed to get new
block device inode even if old block device inode is not yet evicted and
thus we avoid issues with possible reuse of bdi.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894..702cb6c50194 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2342,6 +2342,7 @@ extern struct kmem_cache *names_cachep;
 #ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
 extern void unregister_blkdev(unsigned int, const char *);
+extern void bdev_unhash_inode(dev_t dev);
 extern struct block_device *bdget(dev_t);
 extern struct block_device *bdgrab(struct block_device *bdev);
 extern void bd_set_size(struct block_device *, loff_t size);
-- 
cgit v1.2.3


From dc3b17cc8bf21307c7e076e7c778d5db756f7871 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Feb 2017 15:56:50 +0100
Subject: block: Use pointer to backing_dev_info from request_queue

We will want to have struct backing_dev_info allocated separately from
struct request_queue. As the first step add pointer to backing_dev_info
to request_queue and convert all users touching it. No functional
changes in this patch.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 11f7a8e86a89..a75e42de34ab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -432,7 +432,8 @@ struct request_queue {
 	 */
 	struct delayed_work	delay_work;
 
-	struct backing_dev_info	backing_dev_info;
+	struct backing_dev_info	*backing_dev_info;
+	struct backing_dev_info	_backing_dev_info;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
-- 
cgit v1.2.3


From d03f6cdc1fc422accb734c7c07a661a0018d8631 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Feb 2017 15:56:51 +0100
Subject: block: Dynamically allocate and refcount backing_dev_info

Instead of storing backing_dev_info inside struct request_queue,
allocate it dynamically, reference count it, and free it when the last
reference is dropped. Currently only request_queue holds the reference
but in the following patch we add other users referencing
backing_dev_info.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev-defs.h |  2 ++
 include/linux/backing-dev.h      | 10 +++++++++-
 include/linux/blkdev.h           |  1 -
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e850e76acaaf..ad955817916d 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -10,6 +10,7 @@
 #include <linux/flex_proportions.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kref.h>
 
 struct page;
 struct device;
@@ -144,6 +145,7 @@ struct backing_dev_info {
 
 	char *name;
 
+	struct kref refcnt;	/* Reference counter for the structure */
 	unsigned int capabilities; /* Device capabilities */
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 43b93a947e61..efb6ca992d05 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,7 +18,14 @@
 #include <linux/slab.h>
 
 int __must_check bdi_init(struct backing_dev_info *bdi);
-void bdi_exit(struct backing_dev_info *bdi);
+
+static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
+{
+	kref_get(&bdi->refcnt);
+	return bdi;
+}
+
+void bdi_put(struct backing_dev_info *bdi);
 
 __printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -29,6 +36,7 @@ void bdi_unregister(struct backing_dev_info *bdi);
 
 int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
 void bdi_destroy(struct backing_dev_info *bdi);
+struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
 
 void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
 			bool range_cyclic, enum wb_reason reason);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a75e42de34ab..e77c1039fd0e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -433,7 +433,6 @@ struct request_queue {
 	struct delayed_work	delay_work;
 
 	struct backing_dev_info	*backing_dev_info;
-	struct backing_dev_info	_backing_dev_info;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
-- 
cgit v1.2.3


From b1d2dc5659b41741f5a29b2ade76ffb4e5bb13d8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Feb 2017 15:56:52 +0100
Subject: block: Make blk_get_backing_dev_info() safe without open bdev

Currenly blk_get_backing_dev_info() is not safe to be called when the
block device is not open as bdev->bd_disk is NULL in that case. However
inode_to_bdi() uses this function and may be call called from flusher
worker or other writeback related functions without bdev being open
which leads to crashes such as:

[113031.075540] Unable to handle kernel paging request for data at address 0x00000000
[113031.075614] Faulting instruction address: 0xc0000000003692e0
0:mon> t
[c0000000fb65f900] c00000000036cb6c writeback_sb_inodes+0x30c/0x590
[c0000000fb65fa10] c00000000036ced4 __writeback_inodes_wb+0xe4/0x150
[c0000000fb65fa70] c00000000036d33c wb_writeback+0x30c/0x450
[c0000000fb65fb40] c00000000036e198 wb_workfn+0x268/0x580
[c0000000fb65fc50] c0000000000f3470 process_one_work+0x1e0/0x590
[c0000000fb65fce0] c0000000000f38c8 worker_thread+0xa8/0x660
[c0000000fb65fd80] c0000000000fc4b0 kthread+0x110/0x130
[c0000000fb65fe30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 702cb6c50194..c930cbc19342 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -423,6 +423,7 @@ struct block_device {
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
 	struct request_queue *  bd_queue;
+	struct backing_dev_info *bd_bdi;
 	struct list_head	bd_list;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
-- 
cgit v1.2.3


From efa7c9f97e3ef624e9a398bf69c15f58eea9f0e8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Feb 2017 15:56:53 +0100
Subject: block: Get rid of blk_get_backing_dev_info()

blk_get_backing_dev_info() is now a simple dereference. Remove that
function and simplify some code around that.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 2 +-
 include/linux/blkdev.h      | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index efb6ca992d05..c52a48cb9a66 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -191,7 +191,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 	sb = inode->i_sb;
 #ifdef CONFIG_BLOCK
 	if (sb_is_blkdev_sb(sb))
-		return blk_get_backing_dev_info(I_BDEV(inode));
+		return I_BDEV(inode)->bd_bdi;
 #endif
 	return sb->s_bdi;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e77c1039fd0e..b31137e2afd0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1187,7 +1187,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
-extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
 {
-- 
cgit v1.2.3


From 0dba1314d4f81115dce711292ec7981d17231064 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 1 Feb 2017 14:05:23 -0800
Subject: scsi, block: fix duplicate bdi name registration crashes

Warnings of the following form occur because scsi reuses a devt number
while the block layer still has it referenced as the name of the bdi
[1]:

 WARNING: CPU: 1 PID: 93 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80
 sysfs: cannot create duplicate filename '/devices/virtual/bdi/8:192'
 [..]
 Call Trace:
  dump_stack+0x86/0xc3
  __warn+0xcb/0xf0
  warn_slowpath_fmt+0x5f/0x80
  ? kernfs_path_from_node+0x4f/0x60
  sysfs_warn_dup+0x62/0x80
  sysfs_create_dir_ns+0x77/0x90
  kobject_add_internal+0xb2/0x350
  kobject_add+0x75/0xd0
  device_add+0x15a/0x650
  device_create_groups_vargs+0xe0/0xf0
  device_create_vargs+0x1c/0x20
  bdi_register+0x90/0x240
  ? lockdep_init_map+0x57/0x200
  bdi_register_owner+0x36/0x60
  device_add_disk+0x1bb/0x4e0
  ? __pm_runtime_use_autosuspend+0x5c/0x70
  sd_probe_async+0x10d/0x1c0
  async_run_entry_fn+0x39/0x170

This is a brute-force fix to pass the devt release information from
sd_probe() to the locations where we register the bdi,
device_add_disk(), and unregister the bdi, blk_cleanup_queue().

Thanks to Omar for the quick reproducer script [2]. This patch survives
where an unmodified kernel fails in a few seconds.

[1]: https://marc.info/?l=linux-scsi&m=147116857810716&w=4
[2]: http://marc.info/?l=linux-block&m=148554717109098&w=2

Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Jan Kara <jack@suse.cz>
Reported-by: Omar Sandoval <osandov@osandov.com>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 +
 include/linux/genhd.h  | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b31137e2afd0..f84fbe55d3b3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -433,6 +433,7 @@ struct request_queue {
 	struct delayed_work	delay_work;
 
 	struct backing_dev_info	*backing_dev_info;
+	struct disk_devt	*disk_devt;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 76f39754e7b0..a999d281a2f1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,6 +167,13 @@ struct blk_integrity {
 };
 
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
+struct disk_devt {
+	atomic_t count;
+	void (*release)(struct disk_devt *disk_devt);
+};
+
+void put_disk_devt(struct disk_devt *disk_devt);
+void get_disk_devt(struct disk_devt *disk_devt);
 
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
@@ -176,6 +183,7 @@ struct gendisk {
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
+	struct disk_devt *disk_devt;
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
 	char *(*devnode)(struct gendisk *gd, umode_t *mode);
-- 
cgit v1.2.3


From 1c83a9aab807f7452c4957b2401e1cbf43941820 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 2 Feb 2017 11:52:14 -0500
Subject: ext4: move halfmd4 into hash.c directly

The "half md4" transform should not be used by any new code. And
fortunately, it's only used now by ext4. Since ext4 supports several
hashing methods, at some point it might be desirable to move to
something like SipHash. As an intermediate step, remove half md4 from
cryptohash.h and lib, and make it just a local function in ext4's
hash.c. There's precedent for doing this; the other function ext can use
for its hashes -- TEA -- is also implemented in the same place. Also, by
being a local function, this might allow gcc to perform some additional
optimizations.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/cryptohash.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index f4754282c9c2..3252799832cf 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -15,6 +15,4 @@ void sha_transform(__u32 *digest, const char *data, __u32 *W);
 
 void md5_transform(__u32 *hash, __u32 const *in);
 
-__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]);
-
 #endif
-- 
cgit v1.2.3


From a7c5437b0bbec5165df6eb1efc5e37dc40b9e05d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 31 Jan 2017 14:53:17 -0800
Subject: debugfs: add debugfs_lookup()

We don't always have easy access to the dentry of a file or directory we
created in debugfs. Add a helper which allows us to get a dentry we
previously created.

The motivation for this change is a problem with blktrace and the blk-mq
debugfs entries introduced in 07e4fead45e6 ("blk-mq: create debugfs
directory tree"). Namely, in some cases, the directory that blktrace
needs to create may already exist, but in other cases, it may not. We
_could_ rely on a bunch of implied knowledge to decide whether to create
the directory or not, but it's much cleaner on our end to just look it
up.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/debugfs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 014cc564d1c4..c0befcf41b58 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -80,6 +80,8 @@ static const struct file_operations __fops = {				\
 
 #if defined(CONFIG_DEBUG_FS)
 
+struct dentry *debugfs_lookup(const char *name, struct dentry *parent);
+
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops);
@@ -181,6 +183,12 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
  * want to duplicate the design decision mistakes of procfs and devfs again.
  */
 
+static inline struct dentry *debugfs_lookup(const char *name,
+					    struct dentry *parent)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
 					struct dentry *parent, void *data,
 					const struct file_operations *fops)
-- 
cgit v1.2.3


From 03796c149a99e14506db9de3adba710c26f83ba9 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 31 Jan 2017 14:53:18 -0800
Subject: block: fix debugfs config conditional in struct request_queue

The debugfs dentries are only used for CONFIG_BLK_DEBUG_FS, so make them
conditional on that instead of CONFIG_DEBUG_FS.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f84fbe55d3b3..e0bac14347e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -570,7 +570,7 @@ struct request_queue {
 	struct list_head	tag_set_list;
 	struct bio_set		*bio_split;
 
-#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_BLK_DEBUG_FS
 	struct dentry		*debugfs_dir;
 	struct dentry		*mq_debugfs_dir;
 #endif
-- 
cgit v1.2.3


From a428d314ebcf65842fd64ad850c02c280586e74d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 31 Jan 2017 14:53:19 -0800
Subject: blktrace: make do_blk_trace_setup() static

This isn't used outside of blktrace.c anymore.

Fixes: 62c2a7d969f3 ("block: push BKL into blktrace ioctls")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blktrace_api.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 341f9418bd68..d2e908586e3d 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -30,9 +30,6 @@ struct blk_trace {
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q, char *name,
-			      dev_t dev, struct block_device *bdev,
-			      struct blk_user_trace_setup *buts);
 extern __printf(2, 3)
 void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
@@ -80,7 +77,6 @@ extern struct attribute_group blk_trace_attr_group;
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
 # define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
 # define blk_trace_shutdown(q)				do { } while (0)
-# define do_blk_trace_setup(q, name, dev, bdev, buts)	(-ENOTTY)
 # define blk_add_driver_data(q, rq, data, len)		do {} while (0)
 # define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
 # define blk_trace_startstop(q, start)			(-ENOTTY)
-- 
cgit v1.2.3


From b343d77b94b6702082f4f402e1492e71ef87095b Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 30 Jan 2017 13:38:22 +0100
Subject: soc: samsung: pmu: Add register defines for pad retention control

Add PMU defines related to pad retention control. Will be later used
by the Exynos pin controller driver.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index 9786c62d7159..49df0a01a2cc 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -631,4 +631,20 @@
 					 | EXYNOS5420_KFC_USE_STANDBY_WFI2  \
 					 | EXYNOS5420_KFC_USE_STANDBY_WFI3)
 
+/* For EXYNOS5433 */
+#define EXYNOS5433_PAD_RETENTION_AUD_OPTION			(0x3028)
+#define EXYNOS5433_PAD_RETENTION_MMC2_OPTION			(0x30C8)
+#define EXYNOS5433_PAD_RETENTION_TOP_OPTION			(0x3108)
+#define EXYNOS5433_PAD_RETENTION_UART_OPTION			(0x3128)
+#define EXYNOS5433_PAD_RETENTION_MMC0_OPTION			(0x3148)
+#define EXYNOS5433_PAD_RETENTION_MMC1_OPTION			(0x3168)
+#define EXYNOS5433_PAD_RETENTION_EBIA_OPTION			(0x3188)
+#define EXYNOS5433_PAD_RETENTION_EBIB_OPTION			(0x31A8)
+#define EXYNOS5433_PAD_RETENTION_SPI_OPTION			(0x31C8)
+#define EXYNOS5433_PAD_RETENTION_MIF_OPTION			(0x31E8)
+#define EXYNOS5433_PAD_RETENTION_USBXTI_OPTION			(0x3228)
+#define EXYNOS5433_PAD_RETENTION_BOOTLDO_OPTION			(0x3248)
+#define EXYNOS5433_PAD_RETENTION_UFS_OPTION			(0x3268)
+#define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION		(0x32A8)
+
 #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
-- 
cgit v1.2.3


From a45463cbf3f9dcdae683033c256f50bded513d6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Feb 2017 18:01:17 +0100
Subject: workqueue: avoid clang warning

Building with clang shows lots of warning like:

drivers/amba/bus.c:447:8: warning: implicit conversion from 'long long' to 'int' changes value from 4294967248 to -48
      [-Wconstant-conversion]
static DECLARE_DELAYED_WORK(deferred_retry_work, amba_deferred_retry_func);
       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/workqueue.h:187:26: note: expanded from macro 'DECLARE_DELAYED_WORK'
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/workqueue.h:177:10: note: expanded from macro '__DELAYED_WORK_INITIALIZER'
        .work = __WORK_INITIALIZER((n).work, (f)),                      \
                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/workqueue.h:170:10: note: expanded from macro '__WORK_INITIALIZER'
        .data = WORK_DATA_STATIC_INIT(),                                \
                ^~~~~~~~~~~~~~~~~~~~~~~
include/linux/workqueue.h:111:39: note: expanded from macro 'WORK_DATA_STATIC_INIT'
        ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~
include/asm-generic/atomic-long.h:32:41: note: expanded from macro 'ATOMIC_LONG_INIT'
 #define ATOMIC_LONG_INIT(i)     ATOMIC_INIT(i)
                                ~~~~~~~~~~~~^~
arch/arm/include/asm/atomic.h:21:27: note: expanded from macro 'ATOMIC_INIT'
 #define ATOMIC_INIT(i)  { (i) }
                        ~  ^

This makes the type cast explicit, which shuts up the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a26cc437293c..bde063cefd04 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -106,9 +106,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
-	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
+	ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC))
 
 struct delayed_work {
 	struct work_struct work;
-- 
cgit v1.2.3


From e4cf8a38fc0d257b4070066e46a678f4777fecaa Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 1 Feb 2017 03:40:06 +0100
Subject: net: phy: Marvell: Add mv88e6390 internal PHY

The mv88e6390 Ethernet switch has internal PHYs. These PHYs don't have
an model ID in the ID2 register. So the MDIO driver in the switch
intercepts reads to this register, and returns the switch family ID.
Extend the Marvell PHY driver by including this ID, and treat the PHY
as a 88E1540.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/marvell_phy.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index a57f0dfb6db7..3d616d7f65bf 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -19,6 +19,12 @@
 #define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
+/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
+ * not have a model ID. So the switch driver traps reads to the ID2
+ * register and returns the switch family ID
+ */
+#define MARVELL_PHY_ID_88E6390		0x01410f90
+
 /* struct phy_device dev_flags definitions */
 #define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
 #define MARVELL_PHY_M1118_DNS323_LEDS		0x00000002
-- 
cgit v1.2.3


From 60f06fde4cff6f6134decbf09199b5e047bc3355 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 2 Feb 2017 00:35:03 +0100
Subject: net: phy: marvell: Add support for 88e1545 PHY

The 88e1545 PHYs are discrete Marvell PHYs, found in a quad package on
the zii-devel-b board. Add support for it to the Marvell PHY driver.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/marvell_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 3d616d7f65bf..4055cf8cc978 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -17,6 +17,7 @@
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
 #define MARVELL_PHY_ID_88E1540		0x01410eb0
+#define MARVELL_PHY_ID_88E1545		0x01410ea0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
-- 
cgit v1.2.3


From 0f1b92cbdd0309afae0af1963e8cccddb3d2eaff Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 30 Jan 2017 18:06:11 +0300
Subject: introduce the walk_process_tree() helper

Add the new helper to walk the process tree, the next patch adds a user.
Note that it visits the group leaders only, proc_visitor can do
for_each_thread itself or we can trivially extend walk_process_tree() to
do this.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2334229167f..6261bfc12853 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3053,6 +3053,9 @@ extern bool current_is_single_threaded(void);
 #define for_each_process_thread(p, t)	\
 	for_each_process(p) for_each_thread(p, t)
 
+typedef int (*proc_visitor)(struct task_struct *p, void *data);
+void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
-- 
cgit v1.2.3


From c3485ee0d560b182e1e0f67d67246718739f0782 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 2 Feb 2017 13:48:05 -0600
Subject: tty_port: Add port client functions

Introduce a client (upward direction) operations struct for tty_port
clients. Initially supported operations are for receiving data and write
wake-up. This will allow for having clients other than an ldisc.

Convert the calls to the ldisc to use the client ops as the default
operations.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Tested-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 21c0fabfed60..1017e904c0a3 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -217,12 +217,18 @@ struct tty_port_operations {
 	/* Called on the final put of a port */
 	void (*destruct)(struct tty_port *port);
 };
-	
+
+struct tty_port_client_operations {
+	int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t);
+	void (*write_wakeup)(struct tty_port *port);
+};
+
 struct tty_port {
 	struct tty_bufhead	buf;		/* Locked internally */
 	struct tty_struct	*tty;		/* Back pointer */
 	struct tty_struct	*itty;		/* internal back ptr */
 	const struct tty_port_operations *ops;	/* Port operations */
+	const struct tty_port_client_operations *client_ops; /* Port client operations */
 	spinlock_t		lock;		/* Lock protecting tty field */
 	int			blocked_open;	/* Waiting to open */
 	int			count;		/* Usage count */
@@ -241,6 +247,7 @@ struct tty_port {
 						   based drain is needed else
 						   set to size of fifo */
 	struct kref		kref;		/* Ref counter */
+	void 			*client_data;
 };
 
 /* tty_port::iflags bits -- use atomic bit ops */
-- 
cgit v1.2.3


From cd6484e1830be260abfba80a9c7d8f65531126d6 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 2 Feb 2017 13:48:07 -0600
Subject: serdev: Introduce new bus for serial attached devices

The serdev bus is designed for devices such as Bluetooth, WiFi, GPS
and NFC connected to UARTs on host processors. Tradionally these have
been handled with tty line disciplines, rfkill, and userspace glue such
as hciattach. This approach has many drawbacks since it doesn't fit
into the Linux driver model. Handling of sideband signals, power control
and firmware loading are the main issues.

This creates a serdev bus with controllers (i.e. host serial ports) and
attached devices. Typically, these are point to point connections, but
some devices have muxing protocols or a h/w mux is conceivable. Any
muxing is not yet supported with the serdev bus.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Tested-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 241 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 241 insertions(+)
 create mode 100644 include/linux/serdev.h

(limited to 'include/linux')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
new file mode 100644
index 000000000000..3ac26c1a8aab
--- /dev/null
+++ b/include/linux/serdev.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2016-2017 Linaro Ltd., Rob Herring <robh@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _LINUX_SERDEV_H
+#define _LINUX_SERDEV_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct serdev_controller;
+struct serdev_device;
+
+/*
+ * serdev device structures
+ */
+
+/**
+ * struct serdev_device_ops - Callback operations for a serdev device
+ * @receive_buf:	Function called with data received from device.
+ * @write_wakeup:	Function called when ready to transmit more data.
+ */
+struct serdev_device_ops {
+	int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t);
+	void (*write_wakeup)(struct serdev_device *);
+};
+
+/**
+ * struct serdev_device - Basic representation of an serdev device
+ * @dev:	Driver model representation of the device.
+ * @nr:		Device number on serdev bus.
+ * @ctrl:	serdev controller managing this device.
+ * @ops:	Device operations.
+ */
+struct serdev_device {
+	struct device dev;
+	int nr;
+	struct serdev_controller *ctrl;
+	const struct serdev_device_ops *ops;
+};
+
+static inline struct serdev_device *to_serdev_device(struct device *d)
+{
+	return container_of(d, struct serdev_device, dev);
+}
+
+/**
+ * struct serdev_device_driver - serdev slave device driver
+ * @driver:	serdev device drivers should initialize name field of this
+ *		structure.
+ * @probe:	binds this driver to a serdev device.
+ * @remove:	unbinds this driver from the serdev device.
+ */
+struct serdev_device_driver {
+	struct device_driver driver;
+	int	(*probe)(struct serdev_device *);
+	void	(*remove)(struct serdev_device *);
+};
+
+static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d)
+{
+	return container_of(d, struct serdev_device_driver, driver);
+}
+
+/*
+ * serdev controller structures
+ */
+struct serdev_controller_ops {
+	int (*write_buf)(struct serdev_controller *, const unsigned char *, size_t);
+	void (*write_flush)(struct serdev_controller *);
+	int (*write_room)(struct serdev_controller *);
+	int (*open)(struct serdev_controller *);
+	void (*close)(struct serdev_controller *);
+	void (*set_flow_control)(struct serdev_controller *, bool);
+	unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int);
+};
+
+/**
+ * struct serdev_controller - interface to the serdev controller
+ * @dev:	Driver model representation of the device.
+ * @nr:		number identifier for this controller/bus.
+ * @serdev:	Pointer to slave device for this controller.
+ * @ops:	Controller operations.
+ */
+struct serdev_controller {
+	struct device		dev;
+	unsigned int		nr;
+	struct serdev_device	*serdev;
+	const struct serdev_controller_ops *ops;
+};
+
+static inline struct serdev_controller *to_serdev_controller(struct device *d)
+{
+	return container_of(d, struct serdev_controller, dev);
+}
+
+static inline void *serdev_device_get_drvdata(const struct serdev_device *serdev)
+{
+	return dev_get_drvdata(&serdev->dev);
+}
+
+static inline void serdev_device_set_drvdata(struct serdev_device *serdev, void *data)
+{
+	dev_set_drvdata(&serdev->dev, data);
+}
+
+/**
+ * serdev_device_put() - decrement serdev device refcount
+ * @serdev	serdev device.
+ */
+static inline void serdev_device_put(struct serdev_device *serdev)
+{
+	if (serdev)
+		put_device(&serdev->dev);
+}
+
+static inline void serdev_device_set_client_ops(struct serdev_device *serdev,
+					      const struct serdev_device_ops *ops)
+{
+	serdev->ops = ops;
+}
+
+static inline
+void *serdev_controller_get_drvdata(const struct serdev_controller *ctrl)
+{
+	return ctrl ? dev_get_drvdata(&ctrl->dev) : NULL;
+}
+
+static inline void serdev_controller_set_drvdata(struct serdev_controller *ctrl,
+					       void *data)
+{
+	dev_set_drvdata(&ctrl->dev, data);
+}
+
+/**
+ * serdev_controller_put() - decrement controller refcount
+ * @ctrl	serdev controller.
+ */
+static inline void serdev_controller_put(struct serdev_controller *ctrl)
+{
+	if (ctrl)
+		put_device(&ctrl->dev);
+}
+
+struct serdev_device *serdev_device_alloc(struct serdev_controller *);
+int serdev_device_add(struct serdev_device *);
+void serdev_device_remove(struct serdev_device *);
+
+struct serdev_controller *serdev_controller_alloc(struct device *, size_t);
+int serdev_controller_add(struct serdev_controller *);
+void serdev_controller_remove(struct serdev_controller *);
+
+static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl)
+{
+	struct serdev_device *serdev = ctrl->serdev;
+
+	if (!serdev || !serdev->ops->write_wakeup)
+		return;
+
+	serdev->ops->write_wakeup(ctrl->serdev);
+}
+
+static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl,
+					      const unsigned char *data,
+					      size_t count)
+{
+	struct serdev_device *serdev = ctrl->serdev;
+
+	if (!serdev || !serdev->ops->receive_buf)
+		return -EINVAL;
+
+	return serdev->ops->receive_buf(ctrl->serdev, data, count);
+}
+
+#if IS_ENABLED(CONFIG_SERIAL_DEV_BUS)
+
+int serdev_device_open(struct serdev_device *);
+void serdev_device_close(struct serdev_device *);
+unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
+void serdev_device_set_flow_control(struct serdev_device *, bool);
+int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
+void serdev_device_write_flush(struct serdev_device *);
+int serdev_device_write_room(struct serdev_device *);
+
+/*
+ * serdev device driver functions
+ */
+int __serdev_device_driver_register(struct serdev_device_driver *, struct module *);
+#define serdev_device_driver_register(sdrv) \
+	__serdev_device_driver_register(sdrv, THIS_MODULE)
+
+/**
+ * serdev_device_driver_unregister() - unregister an serdev client driver
+ * @sdrv:	the driver to unregister
+ */
+static inline void serdev_device_driver_unregister(struct serdev_device_driver *sdrv)
+{
+	if (sdrv)
+		driver_unregister(&sdrv->driver);
+}
+
+#define module_serdev_device_driver(__serdev_device_driver) \
+	module_driver(__serdev_device_driver, serdev_device_driver_register, \
+			serdev_device_driver_unregister)
+
+#else
+
+static inline int serdev_device_open(struct serdev_device *sdev)
+{
+	return -ENODEV;
+}
+static inline void serdev_device_close(struct serdev_device *sdev) {}
+static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev, unsigned int baudrate)
+{
+	return 0;
+}
+static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
+static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count)
+{
+	return -ENODEV;
+}
+static inline void serdev_device_write_flush(struct serdev_device *sdev) {}
+static inline int serdev_device_write_room(struct serdev_device *sdev)
+{
+	return 0;
+}
+
+#define serdev_device_driver_register(x)
+#define serdev_device_driver_unregister(x)
+
+#endif /* CONFIG_SERIAL_DEV_BUS */
+
+#endif /*_LINUX_SERDEV_H */
-- 
cgit v1.2.3


From bed35c6dfa6a36233c3e1238a40dc1ae67955898 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 2 Feb 2017 13:48:08 -0600
Subject: serdev: add a tty port controller driver

Add a serdev controller driver for tty ports.

The controller is registered with serdev when tty ports are registered
with the TTY core. As the TTY core is built-in only, this has the side
effect of making serdev built-in as well.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-By: Sebastian Reichel <sre@kernel.org>
Tested-By: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 3ac26c1a8aab..9519da6253a8 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -238,4 +238,25 @@ static inline int serdev_device_write_room(struct serdev_device *sdev)
 
 #endif /* CONFIG_SERIAL_DEV_BUS */
 
+/*
+ * serdev hooks into TTY core
+ */
+struct tty_port;
+struct tty_driver;
+
+#ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT
+struct device *serdev_tty_port_register(struct tty_port *port,
+					struct device *parent,
+					struct tty_driver *drv, int idx);
+void serdev_tty_port_unregister(struct tty_port *port);
+#else
+static inline struct device *serdev_tty_port_register(struct tty_port *port,
+					   struct device *parent,
+					   struct tty_driver *drv, int idx)
+{
+	return ERR_PTR(-ENODEV);
+}
+static inline void serdev_tty_port_unregister(struct tty_port *port) {}
+#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
+
 #endif /*_LINUX_SERDEV_H */
-- 
cgit v1.2.3


From 3bb434cdcc6af3d4e70ba041e6f596e465d11e14 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 1 Feb 2017 15:42:57 +0100
Subject: vmw_vmci: switch to pci_irq_alloc_vectors

Cleans up the IRQ management code a lot, including removing a lot of
state from the per-device structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/vmw_vmci_defs.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 1bd31a38c51e..b724ef7005de 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -54,13 +54,6 @@
 #define VMCI_IMR_DATAGRAM      0x1
 #define VMCI_IMR_NOTIFICATION  0x2
 
-/* Interrupt type. */
-enum {
-	VMCI_INTR_TYPE_INTX = 0,
-	VMCI_INTR_TYPE_MSI = 1,
-	VMCI_INTR_TYPE_MSIX = 2,
-};
-
 /* Maximum MSI/MSI-X interrupt vectors in the device. */
 #define VMCI_MAX_INTRS 2
 
-- 
cgit v1.2.3


From d44fa3d46079dc095c1346fa6e5bc96dca1ead41 Mon Sep 17 00:00:00 2001
From: Agustin Vega-Frias <agustinv@codeaurora.org>
Date: Thu, 2 Feb 2017 18:23:58 -0500
Subject: ACPI: Add support for ResourceSource/IRQ domain mapping

ACPI extended IRQ resources may contain a ResourceSource to specify
an alternate interrupt controller. Introduce acpi_irq_get and use it
to implement ResourceSource/IRQ domain mapping.

The new API is similar to of_irq_get and allows re-initialization
of a platform resource from the ACPI extended IRQ resource, and
provides proper behavior for probe deferral when the domain is not
yet present when called.

Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Tested-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Agustin Vega-Frias <agustinv@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/acpi.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5b36974ed60a..8e577c2cb0ce 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1153,4 +1153,14 @@ int parse_spcr(bool earlycon);
 static inline int parse_spcr(bool earlycon) { return 0; }
 #endif
 
+#if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI)
+int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res);
+#else
+static inline
+int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res)
+{
+	return -EINVAL;
+}
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 3541f9e8bdebce02458882b66b638d7302c1f616 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2017 08:04:56 -0800
Subject: tcp: add tcp_mss_clamp() helper

Small cleanup factorizing code doing the TCP_MAXSEG clamping.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f88f4649ba6f..cfc2d9506ce8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -445,4 +445,13 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk);
 
+static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
+{
+	/* We use READ_ONCE() here because socket might not be locked.
+	 * This happens for listeners.
+	 */
+	u16 user_mss = READ_ONCE(tp->rx_opt.user_mss);
+
+	return (user_mss && user_mss < mss) ? user_mss : mss;
+}
 #endif	/* _LINUX_TCP_H */
-- 
cgit v1.2.3


From 71810db27c1c853b335675bee335d893bc3d324b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:06 +0000
Subject: modversions: treat symbol CRCs as 32 bit quantities

The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.

This has a couple of downsides:

 - Given that the CRCs are treated as memory addresses, we waste 4 bytes
   for each CRC on 64 bit architectures,

 - On architectures that support runtime relocation, a R_<arch>_RELATIVE
   relocation entry is emitted for each CRC value, which identifies it
   as a quantity that requires fixing up based on the actual runtime
   load offset of the kernel. This results in corrupted CRCs unless we
   explicitly undo the fixup (and this is currently being handled in the
   core module code)

 - Such runtime relocation entries take up 24 bytes of __init space
   each, resulting in a x8 overhead in [uncompressed] kernel size for
   CRCs.

Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset.  Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant.  Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.

So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff).  To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.

Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/export.h | 14 ++++++++++++++
 include/linux/module.h | 14 +++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index 2a0f61fbc731..7473fba6a60c 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -43,6 +43,13 @@ extern struct module __this_module;
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
+#if defined(CONFIG_MODULE_REL_CRCS)
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
+	    "	.previous					\n");
+#elif !defined(CONFIG_64BIT)
 #define __CRC_SYMBOL(sym, sec)						\
 	extern __visible void *__crc_##sym __attribute__((weak));	\
 	static const unsigned long __kcrctab_##sym			\
@@ -50,6 +57,13 @@ extern struct module __this_module;
 	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
 	= (unsigned long) &__crc_##sym;
 #else
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.previous					\n");
+#endif
+#else
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..cc7cba219b20 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -346,7 +346,7 @@ struct module {
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	unsigned int num_syms;
 
 	/* Kernel parameters. */
@@ -359,18 +359,18 @@ struct module {
 	/* GPL-only exported symbols. */
 	unsigned int num_gpl_syms;
 	const struct kernel_symbol *gpl_syms;
-	const unsigned long *gpl_crcs;
+	const s32 *gpl_crcs;
 
 #ifdef CONFIG_UNUSED_SYMBOLS
 	/* unused exported symbols. */
 	const struct kernel_symbol *unused_syms;
-	const unsigned long *unused_crcs;
+	const s32 *unused_crcs;
 	unsigned int num_unused_syms;
 
 	/* GPL-only, unused exported symbols. */
 	unsigned int num_unused_gpl_syms;
 	const struct kernel_symbol *unused_gpl_syms;
-	const unsigned long *unused_gpl_crcs;
+	const s32 *unused_gpl_crcs;
 #endif
 
 #ifdef CONFIG_MODULE_SIG
@@ -382,7 +382,7 @@ struct module {
 
 	/* symbols that will be GPL-only in the near future. */
 	const struct kernel_symbol *gpl_future_syms;
-	const unsigned long *gpl_future_crcs;
+	const s32 *gpl_future_crcs;
 	unsigned int num_gpl_future_syms;
 
 	/* Exception table */
@@ -523,7 +523,7 @@ struct module *find_module(const char *name);
 
 struct symsearch {
 	const struct kernel_symbol *start, *stop;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	enum {
 		NOT_GPL_ONLY,
 		GPL_ONLY,
@@ -539,7 +539,7 @@ struct symsearch {
  */
 const struct kernel_symbol *find_symbol(const char *name,
 					struct module **owner,
-					const unsigned long **crc,
+					const s32 **crc,
 					bool gplok,
 					bool warn);
 
-- 
cgit v1.2.3


From 4b9eee96fcb361a5e16a8d2619825e8a048f81f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:07 +0000
Subject: module: unify absolute krctab definitions for 32-bit and 64-bit

The previous patch introduced a separate inline asm version of the
krcrctab declaration template for use with 64-bit architectures, which
cannot refer to ELF symbols using 32-bit quantities.

This declaration should be equivalent to the C one for 32-bit
architectures, but just in case - unify them in a separate patch, which
can simply be dropped if it turns out to break anything.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/export.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index 7473fba6a60c..1a1dfdb2a5c6 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -49,13 +49,6 @@ extern struct module __this_module;
 	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
 	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
 	    "	.previous					\n");
-#elif !defined(CONFIG_64BIT)
-#define __CRC_SYMBOL(sym, sec)						\
-	extern __visible void *__crc_##sym __attribute__((weak));	\
-	static const unsigned long __kcrctab_##sym			\
-	__used								\
-	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
-	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
-- 
cgit v1.2.3


From 29905b52fad0854351f57bab867647e4982285bf Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 2 Feb 2017 18:05:26 +0000
Subject: log2: make order_base_2() behave correctly on const input value zero

The function order_base_2() is defined (according to the comment block)
as returning zero on input zero, but subsequently passes the input into
roundup_pow_of_two(), which is explicitly undefined for input zero.

This has gone unnoticed until now, but optimization passes in GCC 7 may
produce constant folded function instances where a constant value of
zero is passed into order_base_2(), resulting in link errors against the
deliberately undefined '____ilog2_NaN'.

So update order_base_2() to adhere to its own documented interface.

[ See

     http://marc.info/?l=linux-kernel&m=147672952517795&w=2

  and follow-up discussion for more background. The gcc "optimization
  pass" is really just broken, but now the GCC trunk problem seems to
  have escaped out of just specially built daily images, so we need to
  work around it in mainline.    - Linus ]

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/log2.h b/include/linux/log2.h
index fd7ff3d91e6a..ef3d4f67118c 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -203,6 +203,17 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
  *  ... and so on.
  */
 
-#define order_base_2(n) ilog2(roundup_pow_of_two(n))
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+	return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
 
+#define order_base_2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 0 || (n) == 1) ? 0 :	\
+		ilog2((n) - 1) + 1) :		\
+	__order_base_2(n)			\
+)
 #endif /* _LINUX_LOG2_H */
-- 
cgit v1.2.3


From 680a0873e193bae666439f4b5e32c758e68f114c Mon Sep 17 00:00:00 2001
From: Andy Gross <andy.gross@linaro.org>
Date: Wed, 1 Feb 2017 11:28:27 -0600
Subject: arm: kernel: Add SMC structure parameter

This patch adds a quirk parameter to the arm_smccc_(smc/hvc) calls.
The quirk structure allows for specialized SMC operations due to SoC
specific requirements.  The current arm_smccc_(smc/hvc) is renamed and
macros are used instead to specify the standard arm_smccc_(smc/hvc) or
the arm_smccc_(smc/hvc)_quirk function.

This patch and partial implementation was suggested by Will Deacon.

Signed-off-by: Andy Gross <andy.gross@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/arm-smccc.h | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index b5abfda80465..c66f8ae94b5a 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -72,33 +72,57 @@ struct arm_smccc_res {
 };
 
 /**
- * arm_smccc_smc() - make SMC calls
+ * struct arm_smccc_quirk - Contains quirk information
+ * @id: quirk identification
+ * @state: quirk specific information
+ * @a6: Qualcomm quirk entry for returning post-smc call contents of a6
+ */
+struct arm_smccc_quirk {
+	int	id;
+	union {
+		unsigned long a6;
+	} state;
+};
+
+/**
+ * __arm_smccc_smc() - make SMC calls
  * @a0-a7: arguments passed in registers 0 to 7
  * @res: result values from registers 0 to 3
+ * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required.
  *
  * This function is used to make SMC calls following SMC Calling Convention.
  * The content of the supplied param are copied to registers 0 to 7 prior
  * to the SMC instruction. The return values are updated with the content
- * from register 0 to 3 on return from the SMC instruction.
+ * from register 0 to 3 on return from the SMC instruction.  An optional
+ * quirk structure provides vendor specific behavior.
  */
-asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1,
+asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
 			unsigned long a2, unsigned long a3, unsigned long a4,
 			unsigned long a5, unsigned long a6, unsigned long a7,
-			struct arm_smccc_res *res);
+			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
 
 /**
- * arm_smccc_hvc() - make HVC calls
+ * __arm_smccc_hvc() - make HVC calls
  * @a0-a7: arguments passed in registers 0 to 7
  * @res: result values from registers 0 to 3
  *
  * This function is used to make HVC calls following SMC Calling
  * Convention.  The content of the supplied param are copied to registers 0
  * to 7 prior to the HVC instruction. The return values are updated with
- * the content from register 0 to 3 on return from the HVC instruction.
+ * the content from register 0 to 3 on return from the HVC instruction.  An
+ * optional quirk structure provides vendor specific behavior.
  */
-asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1,
+asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 			unsigned long a2, unsigned long a3, unsigned long a4,
 			unsigned long a5, unsigned long a6, unsigned long a7,
-			struct arm_smccc_res *res);
+			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
+
+#define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)
+
+#define arm_smccc_smc_quirk(...) __arm_smccc_smc(__VA_ARGS__)
+
+#define arm_smccc_hvc(...) __arm_smccc_hvc(__VA_ARGS__, NULL)
+
+#define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__)
 
 #endif /*__LINUX_ARM_SMCCC_H*/
-- 
cgit v1.2.3


From 82bcd087029f6056506ea929f11af02622230901 Mon Sep 17 00:00:00 2001
From: Andy Gross <andy.gross@linaro.org>
Date: Wed, 1 Feb 2017 11:28:28 -0600
Subject: firmware: qcom: scm: Fix interrupted SCM calls

This patch adds a Qualcomm specific quirk to the arm_smccc_smc call.

On Qualcomm ARM64 platforms, the SMC call can return before it has
completed.  If this occurs, the call can be restarted, but it requires
using the returned session ID value from the interrupted SMC call.

The quirk stores off the session ID from the interrupted call in the
quirk structure so that it can be used by the caller.

This patch folds in a fix given by Sricharan R:
https://lkml.org/lkml/2016/9/28/272

Signed-off-by: Andy Gross <andy.gross@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/arm-smccc.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index c66f8ae94b5a..b67934164401 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -14,9 +14,6 @@
 #ifndef __LINUX_ARM_SMCCC_H
 #define __LINUX_ARM_SMCCC_H
 
-#include <linux/linkage.h>
-#include <linux/types.h>
-
 /*
  * This file provides common defines for ARM SMC Calling Convention as
  * specified in
@@ -60,6 +57,13 @@
 #define ARM_SMCCC_OWNER_TRUSTED_OS	50
 #define ARM_SMCCC_OWNER_TRUSTED_OS_END	63
 
+#define ARM_SMCCC_QUIRK_NONE		0
+#define ARM_SMCCC_QUIRK_QCOM_A6		1 /* Save/restore register a6 */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/types.h>
 /**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
@@ -125,4 +129,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 
 #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__)
 
+#endif /*__ASSEMBLY__*/
 #endif /*__LINUX_ARM_SMCCC_H*/
-- 
cgit v1.2.3


From b3c7ef0adadc5768e0baa786213c6bd1ce521a77 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 31 Jan 2017 22:59:53 -0800
Subject: bridge: uapi: add per vlan tunnel info

New nested netlink attribute to associate tunnel info per vlan.
This is used by bridge driver to send tunnel metadata to
bridge ports in vlan tunnel mode. This patch also adds new per
port flag IFLA_BRPORT_VLAN_TUNNEL to enable vlan tunnel mode.
off by default.

One example use for this is a vxlan bridging gateway or vtep
which maps vlans to vn-segments (or vnis). User can configure
per-vlan tunnel information which the bridge driver can use
to bridge vlan into the corresponding vn-segment.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index debc9d5904e5..c5847dc75a93 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -47,6 +47,7 @@ struct br_ip_list {
 #define BR_PROXYARP_WIFI	BIT(10)
 #define BR_MCAST_FLOOD		BIT(11)
 #define BR_MULTICAST_TO_UNICAST	BIT(12)
+#define BR_VLAN_TUNNEL		BIT(13)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
-- 
cgit v1.2.3


From 3898fac1f488c76e0eef5b5267b4ba8112a82ac4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 2 Feb 2017 17:09:54 +0100
Subject: trace: rename trace_print_hex_seq arg and add kdoc

Steven suggested to improve trace_print_hex_seq() a bit after commit
2acae0d5b0f7 ("trace: add variant without spacing in trace_print_hex_seq")
in two ways: i) by adding a kdoc comment for the helper function
itself and ii) by renaming 'spacing' argument into 'concatenate'
to better denote that we don't add spaces between each hex bytes.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index cfa475a0e9ca..0f165507495c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -34,7 +34,7 @@ const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 
 const char *trace_print_hex_seq(struct trace_seq *p,
 				const unsigned char *buf, int len,
-				bool spacing);
+				bool concatenate);
 
 const char *trace_print_array_seq(struct trace_seq *p,
 				   const void *buf, int count,
-- 
cgit v1.2.3


From b862815c3ee7b49ec20a9ab25da55a5f0bcbb95e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 3 Feb 2017 10:29:05 +0100
Subject: list: introduce list_for_each_entry_from_reverse helper

Similar to list_for_each_entry_continue and its reverse variant
list_for_each_entry_continue_reverse, introduce reverse helper for
list_for_each_entry_from.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index d1039ecaf94f..ae537fa46216 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -526,6 +526,19 @@ static inline void list_splice_tail_init(struct list_head *list,
 	for (; &pos->member != (head);					\
 	     pos = list_next_entry(pos, member))
 
+/**
+ * list_for_each_entry_from_reverse - iterate backwards over list of given type
+ *                                    from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_head within the struct.
+ *
+ * Iterate backwards over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from_reverse(pos, head, member)		\
+	for (; &pos->member != (head);					\
+	     pos = list_prev_entry(pos, member))
+
 /**
  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
  * @pos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3


From 44091d29f2075972aede47ef17e1e70db3d51190 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 3 Feb 2017 10:29:06 +0100
Subject: lib: Introduce priority array area manager

This introduces a infrastructure for management of linear priority
areas. Priority order in an array matters, however order of items inside
a priority group does not matter.

As an initial implementation, L-sort algorithm is used. It is quite
trivial. More advanced algorithm called P-sort will be introduced as a
follow-up. The infrastructure is prepared for other algos.

Alongside this, a testing module is introduced as well.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/parman.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 include/linux/parman.h

(limited to 'include/linux')

diff --git a/include/linux/parman.h b/include/linux/parman.h
new file mode 100644
index 000000000000..3c8cccc7d4da
--- /dev/null
+++ b/include/linux/parman.h
@@ -0,0 +1,76 @@
+/*
+ * include/linux/parman.h - Manager for linear priority array areas
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARMAN_H
+#define _PARMAN_H
+
+#include <linux/list.h>
+
+enum parman_algo_type {
+	PARMAN_ALGO_TYPE_LSORT,
+};
+
+struct parman_item {
+	struct list_head list;
+	unsigned long index;
+};
+
+struct parman_prio {
+	struct list_head list;
+	struct list_head item_list;
+	unsigned long priority;
+};
+
+struct parman_ops {
+	unsigned long base_count;
+	unsigned long resize_step;
+	int (*resize)(void *priv, unsigned long new_count);
+	void (*move)(void *priv, unsigned long from_index,
+		     unsigned long to_index, unsigned long count);
+	enum parman_algo_type algo;
+};
+
+struct parman;
+
+struct parman *parman_create(const struct parman_ops *ops, void *priv);
+void parman_destroy(struct parman *parman);
+void parman_prio_init(struct parman *parman, struct parman_prio *prio,
+		      unsigned long priority);
+void parman_prio_fini(struct parman_prio *prio);
+int parman_item_add(struct parman *parman, struct parman_prio *prio,
+		    struct parman_item *item);
+void parman_item_remove(struct parman *parman, struct parman_prio *prio,
+			struct parman_item *item);
+
+#endif
-- 
cgit v1.2.3


From a96dfddbcc04336bbed50dc2b24823e45e09e80c Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 3 Feb 2017 13:13:23 -0800
Subject: base/memory, hotplug: fix a kernel oops in show_valid_zones()

Reading a sysfs "memoryN/valid_zones" file leads to the following oops
when the first page of a range is not backed by struct page.
show_valid_zones() assumes that 'start_pfn' is always valid for
page_zone().

 BUG: unable to handle kernel paging request at ffffea017a000000
 IP: show_valid_zones+0x6f/0x160

This issue may happen on x86-64 systems with 64GiB or more memory since
their memory block size is bumped up to 2GiB.  [1] An example of such
systems is desribed below.  0x3240000000 is only aligned by 1GiB and
this memory block starts from 0x3200000000, which is not backed by
struct page.

 BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable

Since test_pages_in_a_zone() already checks holes, fix this issue by
extending this function to return 'valid_start' and 'valid_end' for a
given range.  show_valid_zones() then proceeds with the valid range.

[1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on
    large-memory x86-64 systems")'

Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>	[4.4+]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index c1784c0b4f35..134a2f69c21a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
-extern int test_pages_in_a_zone(unsigned long, unsigned long);
+extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
+	unsigned long *valid_start, unsigned long *valid_end);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
-- 
cgit v1.2.3


From 79e7fff47b7bb4124ef970a13eac4fdeddd1fc25 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2017 18:43:28 -0800
Subject: net: remove support for per driver ndo_busy_poll()

We added generic support for busy polling in NAPI layer in linux-4.5

No network driver uses ndo_busy_poll() anymore, we can get rid
of the pointer in struct net_device_ops, and its use in sk_busy_loop()

Saves NETIF_F_BUSY_POLL features bit.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 --
 include/linux/netdevice.h       | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 9c6c8ef2e9e7..9a0419594e84 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -71,7 +71,6 @@ enum {
 	NETIF_F_HW_VLAN_STAG_RX_BIT,	/* Receive VLAN STAG HW acceleration */
 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
 	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
-	NETIF_F_BUSY_POLL_BIT,		/* Busy poll */
 
 	NETIF_F_HW_TC_BIT,		/* Offload TC infrastructure */
 
@@ -134,7 +133,6 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
-#define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
 #define NETIF_F_HW_TC		__NETIF_F(HW_TC)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f3878fbe7786..6f18b509fb2f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1184,9 +1184,6 @@ struct net_device_ops {
 	int			(*ndo_netpoll_setup)(struct net_device *dev,
 						     struct netpoll_info *info);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
-#endif
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	int			(*ndo_busy_poll)(struct napi_struct *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
-- 
cgit v1.2.3


From f9f41e3ef99ac9d4e91b07634362e393fb929aad Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 5 Jan 2017 10:17:27 +0530
Subject: cpufreq: Remove policy create/remove notifiers

Those were added by:

commit fcd7af917abb ("cpufreq: stats: handle cpufreq_unregister_driver()
and suspend/resume properly")

but aren't used anymore since:

commit 1aefc75b2449 ("cpufreq: stats: Make the stats code non-modular").

Remove them. Also remove the redundant parameter to the respective
routines.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7e05c5e4e45c..a597bb825ae1 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -416,8 +416,6 @@ static inline void cpufreq_resume(void) {}
 #define CPUFREQ_ADJUST			(0)
 #define CPUFREQ_NOTIFY			(1)
 #define CPUFREQ_START			(2)
-#define CPUFREQ_CREATE_POLICY		(3)
-#define CPUFREQ_REMOVE_POLICY		(4)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
-- 
cgit v1.2.3


From 052f573f5cca0ce0a16de409012660565bd792df Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 5 Jan 2017 11:34:31 +0530
Subject: cpufreq: Remove CPUFREQ_START notifier event

Its not used anymore, remove it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a597bb825ae1..b07838b1fc60 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -415,7 +415,6 @@ static inline void cpufreq_resume(void) {}
 /* Policy Notifiers  */
 #define CPUFREQ_ADJUST			(0)
 #define CPUFREQ_NOTIFY			(1)
-#define CPUFREQ_START			(2)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
-- 
cgit v1.2.3


From 565ebe8073f84ced436a18e76a5ba8e6bb73dfb3 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 3 Feb 2017 15:26:25 +0530
Subject: cpufreq: Fix typos in comments

- s/freqnency/frequency/
- s/accomodating/accommodating/

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index b07838b1fc60..87165f06a307 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -31,7 +31,7 @@
 
 #define CPUFREQ_ETERNAL			(-1)
 #define CPUFREQ_NAME_LEN		16
-/* Print length for names. Extra 1 space for accomodating '\n' in prints */
+/* Print length for names. Extra 1 space for accommodating '\n' in prints */
 #define CPUFREQ_NAME_PLEN		(CPUFREQ_NAME_LEN + 1)
 
 struct cpufreq_governor;
@@ -115,7 +115,7 @@ struct cpufreq_policy {
 	 *   guarantee that frequency can be changed on any CPU sharing the
 	 *   policy and that the change will affect all of the policy CPUs then.
 	 * - fast_switch_enabled is to be set by governors that support fast
-	 *   freqnency switching with the help of cpufreq_enable_fast_switch().
+	 *   frequency switching with the help of cpufreq_enable_fast_switch().
 	 */
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
-- 
cgit v1.2.3


From 668802c25729a8e3423015c33c05f1c3be3858e9 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 30 Jan 2017 12:57:43 -0500
Subject: tick/broadcast: Reduce lock cacheline contention

It was observed that on an Intel x86 system without the ARAT (Always
running APIC timer) feature and with fairly large number of CPUs as
well as CPUs coming in and out of intel_idle frequently, the lock
contention on the tick_broadcast_lock can become significant.

To reduce contention, the lock is put into its own cacheline and all
the cpumask_var_t variables are put into the __read_mostly section.

Running the SP benchmark of the NAS Parallel Benchmarks on a 4-socket
16-core 32-thread Nehalam system, the performance number improved
from 3353.94 Mop/s to 3469.31 Mop/s when this patch was applied on
a 4.9.6 kernel.  This is a 3.4% improvement.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1485799063-20857-1-git-send-email-longman@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpumask.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c717f5ea88cb..23c1a6d09ec5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -649,11 +649,15 @@ static inline size_t cpumask_size(void)
  * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
  * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
  * other type of cpumask_var_t implementation is configured.
+ *
+ * Please also note that __cpumask_var_read_mostly can be used to declare
+ * a cpumask_var_t variable itself (not its content) as read mostly.
  */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 typedef struct cpumask *cpumask_var_t;
 
-#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
+#define this_cpu_cpumask_var_ptr(x)	this_cpu_read(x)
+#define __cpumask_var_read_mostly	__read_mostly
 
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
@@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask);
 typedef struct cpumask cpumask_var_t[1];
 
 #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
+#define __cpumask_var_read_mostly
 
 static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
-- 
cgit v1.2.3


From 4b0947974e593d52aace18ca5c7e2746fdebae60 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 2 Feb 2017 14:53:10 +0100
Subject: gpio: Rename devm_get_gpiod_from_child()

Rename devm_get_gpiod_from_child() into
devm_fwnode_get_gpiod_from_child() to reflect the fact that this
function is operating on a fwnode object.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 80bad7ebde04..f32f49e96c0b 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -138,11 +138,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					 const char *propname,
 					 enum gpiod_flags dflags,
 					 const char *label);
-struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-					    const char *con_id,
-					    struct fwnode_handle *child,
-					    enum gpiod_flags flags,
-					    const char *label);
+struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
+						   const char *con_id,
+						   struct fwnode_handle *child,
+						   enum gpiod_flags flags,
+						   const char *label);
 #else /* CONFIG_GPIOLIB */
 
 static inline int gpiod_count(struct device *dev, const char *con_id)
@@ -425,11 +425,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 }
 
 static inline
-struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-					    const char *con_id,
-					    struct fwnode_handle *child,
-					    enum gpiod_flags flags,
-					    const char *label)
+struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
+						   const char *con_id,
+						   struct fwnode_handle *child,
+						   enum gpiod_flags flags,
+						   const char *label)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3


From 537b94dafce29af6a3e923d216472cfc2f3659af Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 2 Feb 2017 14:53:11 +0100
Subject: gpio: Add the devm_fwnode_get_index_gpiod_from_child() helper

devm_fwnode_get_gpiod_from_child() currently allows GPIO users to
request a GPIO that is defined in a child fwnode instead of directly in
the device fwnode.
Extend this API by adding the devm_fwnode_get_index_gpiod_from_child()
helper which does the same except you can also specify an index in case
the 'xx-gpios' property describe several GPIOs.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index f32f49e96c0b..ea9b01d40017 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -135,14 +135,14 @@ int desc_to_gpio(const struct gpio_desc *desc);
 struct fwnode_handle;
 
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
-					 const char *propname,
+					 const char *propname, int index,
 					 enum gpiod_flags dflags,
 					 const char *label);
-struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
-						   const char *con_id,
-						   struct fwnode_handle *child,
-						   enum gpiod_flags flags,
-						   const char *label);
+struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
+						const char *con_id, int index,
+						struct fwnode_handle *child,
+						enum gpiod_flags flags,
+						const char *label);
 #else /* CONFIG_GPIOLIB */
 
 static inline int gpiod_count(struct device *dev, const char *con_id)
@@ -417,13 +417,25 @@ struct fwnode_handle;
 
 static inline
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
-					 const char *propname,
+					 const char *propname, int index,
 					 enum gpiod_flags dflags,
 					 const char *label)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
+static inline
+struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
+						const char *con_id, int index,
+						struct fwnode_handle *child,
+						enum gpiod_flags flags,
+						const char *label)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+#endif /* CONFIG_GPIOLIB */
+
 static inline
 struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
 						   const char *con_id,
@@ -431,11 +443,10 @@ struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
 						   enum gpiod_flags flags,
 						   const char *label)
 {
-	return ERR_PTR(-ENOSYS);
+	return devm_fwnode_get_index_gpiod_from_child(dev, con_id, 0, child,
+						      flags, label);
 }
 
-#endif /* CONFIG_GPIOLIB */
-
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
 
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
-- 
cgit v1.2.3


From 02c1602ee7b3e3d062c3eacd374d6a6e3a2ebb73 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Feb 2017 15:25:02 -0800
Subject: net: remove __napi_complete()

All __napi_complete() callers have been converted to
use the more standard napi_complete_done(),
we can now remove this NAPI method for good.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f18b509fb2f..014fbe256d55 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -463,7 +463,6 @@ static inline bool napi_reschedule(struct napi_struct *napi)
 	return false;
 }
 
-bool __napi_complete(struct napi_struct *n);
 bool napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *	napi_complete - NAPI processing complete
-- 
cgit v1.2.3


From 656441478ed55d960df5f3ccdf5a0f8c61dfd0b3 Mon Sep 17 00:00:00 2001
From: Mark Marshall <mark.marshall@omicronenergy.com>
Date: Thu, 26 Jan 2017 16:18:27 +0100
Subject: mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0

The commit 7a654172161c ("mtd/ifc: Add support for IFC controller
version 2.0") added support for version 2.0 of the IFC controller.
The version 2.0 controller has the ECC status registers at a different
location to the previous versions.

Correct the fsl_ifc_nand structure so that the ECC status can be read
from the correct location for both version 1.0 and 2.0 of the controller.

Cc: stable@vger.kernel.org
Fixes: 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0")
Signed-off-by: Mark Marshall <mark.marshall@omicronenergy.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/fsl_ifc.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index 3f9778cbc79d..c332f0a45607 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -733,8 +733,12 @@ struct fsl_ifc_nand {
 	__be32 nand_erattr1;
 	u32 res19[0x10];
 	__be32 nand_fsr;
-	u32 res20[0x3];
-	__be32 nand_eccstat[6];
+	u32 res20;
+	/* The V1 nand_eccstat is actually 4 words that overlaps the
+	 * V2 nand_eccstat.
+	 */
+	__be32 v1_nand_eccstat[2];
+	__be32 v2_nand_eccstat[6];
 	u32 res21[0x1c];
 	__be32 nanndcr;
 	u32 res22[0x2];
-- 
cgit v1.2.3


From a61d5ce9cc56e2e41bbb1ad62ca7a16d7e7567bd Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 8 Dec 2016 12:58:45 +0200
Subject: net/mlx5: Fix static checker warnings

For some reason, sparse doesn't like using an expression of type (!x)
with a bitwise | and &.  In order to mitigate that, we use a local variable.

This removes the following sparse complaints on the core driver
(and similar ones on the IB driver too):

drivers/net/ethernet/mellanox/mlx5/core/srq.c:83:9: warning: dubious: !x & y
drivers/net/ethernet/mellanox/mlx5/core/srq.c:96:9: warning: dubious: !x & y
drivers/net/ethernet/mellanox/mlx5/core/port.c:59:9: warning: dubious: !x & y
drivers/net/ethernet/mellanox/mlx5/core/vport.c:561:9: warning: dubious: !x & y

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Reported-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7b6cd67a263f..dd9a263ed368 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -67,10 +67,11 @@
 
 /* insert a value to a struct */
 #define MLX5_SET(typ, p, fld, v) do { \
+	u32 _v = v; \
 	BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32);             \
 	*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
 	cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \
-		     (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \
+		     (~__mlx5_dw_mask(typ, fld))) | (((_v) & __mlx5_mask(typ, fld)) \
 		     << __mlx5_dw_bit_off(typ, fld))); \
 } while (0)
 
-- 
cgit v1.2.3


From a4077ce5871304f8a78f80b74b18b6052a410f1a Mon Sep 17 00:00:00 2001
From: "Andrey Jr. Melnikov" <temnota.am@gmail.com>
Date: Thu, 8 Dec 2016 19:57:08 +0300
Subject: mtd: nand: Add Winbond manufacturer id

Add WINBOND manufacturer id.

Signed-off-by: Andrey Jr. Melnikov <temnota.am@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 include/linux/mtd/nand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index f67915c7726f..f6017a1a35d6 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -959,6 +959,7 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv)
 #define NAND_MFR_SANDISK	0x45
 #define NAND_MFR_INTEL		0x89
 #define NAND_MFR_ATO		0x9b
+#define NAND_MFR_WINBOND	0xef
 
 /* The maximum expected count of bytes in the NAND ID sequence */
 #define NAND_MAX_ID_LEN 8
-- 
cgit v1.2.3


From a1831bb9403720db6d4c033fe2d6bd0116dd28fe Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 1 Feb 2017 17:53:04 +0000
Subject: iommu/dma: Remove bogus dma_supported() implementation

Back when this was first written, dma_supported() was somewhat of a
murky mess, with subtly different interpretations being relied upon in
various places. The "does device X support DMA to address range Y?"
uses assuming Y to be physical addresses, which motivated the current
iommu_dma_supported() implementation and are alluded to in the comment
therein, have since been cleaned up, leaving only the far less ambiguous
"can device X drive address bits Y" usage internal to DMA API mask
setting. As such, there is no reason to keep a slightly misleading
callback which does nothing but duplicate the current default behaviour;
we already constrain IOVA allocations to the iommu_domain aperture where
necessary, so let's leave DMA mask business to architecture-specific
code where it belongs.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dma-iommu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 3a846f9ec0fd..5725c94b1f12 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -67,7 +67,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
-int iommu_dma_supported(struct device *dev, u64 mask);
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 /* The DMA API isn't _quite_ the whole story, though... */
-- 
cgit v1.2.3


From d254586c34538c0014280806c5d4795697cf21e5 Mon Sep 17 00:00:00 2001
From: David Jander <david@protonic.nl>
Date: Fri, 10 Oct 2014 17:30:10 +0200
Subject: can: rx-offload: Add support for HW fifo based irq offloading

Some CAN controllers have a usable FIFO already but can still benefit
from off-loading the CAN controller FIFO. The CAN frames of the FIFO are
read and put into a skb queue during interrupt and then transmitted in a
NAPI context.

Signed-off-by: David Jander <david@protonic.nl>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/rx-offload.h | 51 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 include/linux/can/rx-offload.h

(limited to 'include/linux')

diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
new file mode 100644
index 000000000000..cadfe9869600
--- /dev/null
+++ b/include/linux/can/rx-offload.h
@@ -0,0 +1,51 @@
+/*
+ * linux/can/rx-offload.h
+ *
+ * Copyright (c) 2014 David Jander, Protonic Holland
+ * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CAN_RX_OFFLOAD_H
+#define _CAN_RX_OFFLOAD_H
+
+#include <linux/netdevice.h>
+#include <linux/can.h>
+
+struct can_rx_offload {
+	struct net_device *dev;
+
+	unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, unsigned int mb);
+
+	struct sk_buff_head skb_queue;
+	u32 skb_queue_len_max;
+
+	struct napi_struct napi;
+};
+
+int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
+int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
+int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
+void can_rx_offload_reset(struct can_rx_offload *offload);
+void can_rx_offload_del(struct can_rx_offload *offload);
+void can_rx_offload_enable(struct can_rx_offload *offload);
+
+static inline void can_rx_offload_schedule(struct can_rx_offload *offload)
+{
+	napi_schedule(&offload->napi);
+}
+
+static inline void can_rx_offload_disable(struct can_rx_offload *offload)
+{
+	napi_disable(&offload->napi);
+}
+
+#endif /* !_CAN_RX_OFFLOAD_H */
-- 
cgit v1.2.3


From 3abbac0b5dd3e3b3dfb30a4885cdde5c20e1cb83 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 23 Sep 2014 15:28:21 +0200
Subject: can: rx-offload: Add support for timestamp based irq offloading

Some CAN controllers don't implement a FIFO in hardware, but fill their
mailboxes in a particular order (from lowest to highest or highest to lowest).
This makes problems to read the frames in the correct order from the hardware,
as new frames might be filled into just read (low) mailboxes. This gets worse,
when following new frames are received into not read (higher) mailboxes.

On the bright side some these CAN controllers put a timestamp on each received
CAN frame. This patch adds support to offload CAN frames in interrupt context,
order them by timestamp and then transmitted in a NAPI context.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/rx-offload.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index cadfe9869600..cb31683bbe15 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -23,15 +23,23 @@
 struct can_rx_offload {
 	struct net_device *dev;
 
-	unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, unsigned int mb);
+	unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf,
+				     u32 *timestamp, unsigned int mb);
 
 	struct sk_buff_head skb_queue;
 	u32 skb_queue_len_max;
 
+	unsigned int mb_first;
+	unsigned int mb_last;
+
 	struct napi_struct napi;
+
+	bool inc;
 };
 
+int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload);
 int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
+int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From f32f5bd2eb7e91a5090c06bbe1ed14bffbbda5d3 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Thu, 19 Nov 2015 17:12:26 +0200
Subject: net/mlx5: Configure cache line size for start and end padding

There is a hardware feature that will pad the start or end of a DMA to
be cache line aligned to avoid RMWs on the last cache line. The default
cache line size setting for this feature is 64B. This change configures
the hardware to use 128B alignment on systems with 128B cache lines.

In addition we lower bound MPWRQ stride by HCA cacheline in mlx5e,
MPWRQ stride should be at least the HCA cacheline, the current default
is 64B and in case HCA_CAP.cach_line_128byte capability is set, MPWRQ RX
stride will automatically be aligned to 128B.

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a919dfb920ae..cc8ae860cd45 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -804,10 +804,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_150[0xa];
 	u8         log_max_ra_res_qp[0x6];
 
-	u8         pad_cap[0x1];
+	u8         end_pad[0x1];
 	u8         cc_query_allowed[0x1];
 	u8         cc_modify_allowed[0x1];
-	u8         reserved_at_163[0xd];
+	u8         start_pad[0x1];
+	u8         cache_line_128byte[0x1];
+	u8         reserved_at_163[0xb];
 	u8         gid_table_size[0x10];
 
 	u8         out_of_seq_cnt[0x1];
-- 
cgit v1.2.3


From 2b31f7ae5f645edd852addfca445895b5806f3f9 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 28 Nov 2016 18:04:50 +0200
Subject: net/mlx5: TX WQE update

Add new TX WQE fields for Connect-X5 vlan insertion support,
type and vlan_tci, when type = MLX5_ETH_WQE_INSERT_VLAN the
HW will insert the vlan and prio fields (vlan_tci) to the packet.

Those bits and the inline header fields are mutually exclusive, and
valid only when:
MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_NOT_REQUIRED
and MLX5_CAP_ETH(mdev, wqe_vlan_insert),
who will be set in ConnectX-5 and later HW generations.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h |  3 ++-
 include/linux/mlx5/qp.h       | 16 ++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cc8ae860cd45..afcd4736d8df 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -577,7 +577,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_at_5[0x3];
+	u8         reserved_at_5[0x2];
+	u8         wqe_vlan_insert[0x1];
 	u8         self_lb_en_modifiable[0x1];
 	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 219c699c17b7..3096370fe831 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -221,14 +221,26 @@ enum {
 	MLX5_ETH_WQE_L4_CSUM            = 1 << 7,
 };
 
+enum {
+	MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
+};
+
 struct mlx5_wqe_eth_seg {
 	u8              rsvd0[4];
 	u8              cs_flags;
 	u8              rsvd1;
 	__be16          mss;
 	__be32          rsvd2;
-	__be16          inline_hdr_sz;
-	u8              inline_hdr_start[2];
+	union {
+		struct {
+			__be16 sz;
+			u8     start[2];
+		} inline_hdr;
+		struct {
+			__be16 type;
+			__be16 vlan_tci;
+		} insert;
+	};
 };
 
 struct mlx5_wqe_xrc_seg {
-- 
cgit v1.2.3


From a8eca326151ee1beac82a4fd86d9edad3a37aaed Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 6 Feb 2017 16:20:14 +0100
Subject: net: remove ndo_neigh_{construct, destroy} from stacked devices

In commit 18bfb924f000 ("net: introduce default neigh_construct/destroy
ndo calls for L2 upper devices") we added these ndos to stacked devices
such as team and bond, so that calls will be propagated to mlxsw.

However, previous commit removed the reliance on these ndos and no new
users of these ndos have appeared since above mentioned commit. We can
therefore safely remove this dead code.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 014fbe256d55..58afbd1cc659 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3888,10 +3888,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
 void netdev_lower_state_changed(struct net_device *lower_dev,
 				void *lower_state_info);
-int netdev_default_l2upper_neigh_construct(struct net_device *dev,
-					   struct neighbour *n);
-void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
-					  struct neighbour *n);
 
 /* RSS keys are 40 or 52 bytes long */
 #define NETDEV_RSS_KEY_LEN 52
-- 
cgit v1.2.3


From 455a7b238cd6bc68c4a550cbbd37c1e22b64f71c Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Fri, 3 Feb 2017 12:50:31 -0700
Subject: block: Add Sed-opal library

This patch implements the necessary logic to bring an Opal
enabled drive out of a factory-enabled into a working
Opal state.

This patch set also enables logic to save a password to
be replayed during a resume from suspend.

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Rafael Antognolli <Rafael.Antognolli@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/sed-opal.h | 178 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)
 create mode 100644 include/linux/sed-opal.h

(limited to 'include/linux')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
new file mode 100644
index 000000000000..af1a85eae193
--- /dev/null
+++ b/include/linux/sed-opal.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Authors:
+ *    Rafael Antognolli <rafael.antognolli@intel.com>
+ *    Scott  Bauer      <scott.bauer@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef LINUX_OPAL_H
+#define LINUX_OPAL_H
+
+#include <uapi/linux/sed-opal.h>
+#include <linux/kernel.h>
+
+/*
+ * These constant values come from:
+ * SPC-4 section
+ * 6.30 SECURITY PROTOCOL IN command / table 265.
+ */
+enum {
+	TCG_SECP_00 = 0,
+	TCG_SECP_01,
+};
+struct opal_dev;
+
+#define IO_BUFFER_LENGTH 2048
+#define MAX_TOKS 64
+
+typedef int (*opal_step)(struct opal_dev *dev);
+typedef int (sec_send_recv)(struct opal_dev *ctx, u16 spsp, u8 secp,
+			    void *buffer, size_t len, bool send);
+
+
+enum opal_atom_width {
+	OPAL_WIDTH_TINY,
+	OPAL_WIDTH_SHORT,
+	OPAL_WIDTH_MEDIUM,
+	OPAL_WIDTH_LONG,
+	OPAL_WIDTH_TOKEN
+};
+
+/*
+ * Token defs derived from:
+ * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
+ * 3.2.2 Data Stream Encoding
+ */
+enum opal_response_token {
+	OPAL_DTA_TOKENID_BYTESTRING = 0xe0,
+	OPAL_DTA_TOKENID_SINT = 0xe1,
+	OPAL_DTA_TOKENID_UINT = 0xe2,
+	OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */
+	OPAL_DTA_TOKENID_INVALID = 0X0
+};
+
+/*
+ * On the parsed response, we don't store again the toks that are already
+ * stored in the response buffer. Instead, for each token, we just store a
+ * pointer to the position in the buffer where the token starts, and the size
+ * of the token in bytes.
+ */
+struct opal_resp_tok {
+	const u8 *pos;
+	size_t len;
+	enum opal_response_token type;
+	enum opal_atom_width width;
+	union {
+		u64 u;
+		s64 s;
+	} stored;
+};
+
+/*
+ * From the response header it's not possible to know how many tokens there are
+ * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later
+ * if we start dealing with messages that have more than that, we can increase
+ * this number. This is done to avoid having to make two passes through the
+ * response, the first one counting how many tokens we have and the second one
+ * actually storing the positions.
+ */
+struct parsed_resp {
+	int num;
+	struct opal_resp_tok toks[MAX_TOKS];
+};
+
+/**
+ * struct opal_dev - The structure representing a OPAL enabled SED.
+ * @supported: Whether or not OPAL is supported on this controller.
+ * @send_recv: The combined sec_send/sec_recv function pointer.
+ * @opal_step: A series of opal methods that are necessary to complete a command.
+ * @func_data: An array of parameters for the opal methods above.
+ * @state: Describes the current opal_step we're working on.
+ * @dev_lock: Locks the entire opal_dev structure.
+ * @parsed: Parsed response from controller.
+ * @prev_data: Data returned from a method to the controller.
+ * @unlk_lst: A list of Locking ranges to unlock on this device during a resume.
+ */
+struct opal_dev {
+	bool initialized;
+	bool supported;
+	sec_send_recv *send_recv;
+
+	const opal_step *funcs;
+	void **func_data;
+	int state;
+	struct mutex dev_lock;
+	u16 comid;
+	u32 hsn;
+	u32 tsn;
+	u64 align;
+	u64 lowest_lba;
+
+	size_t pos;
+	u8 cmd[IO_BUFFER_LENGTH];
+	u8 resp[IO_BUFFER_LENGTH];
+
+	struct parsed_resp parsed;
+	size_t prev_d_len;
+	void *prev_data;
+
+	struct list_head unlk_lst;
+};
+
+#ifdef CONFIG_BLK_SED_OPAL
+bool opal_unlock_from_suspend(struct opal_dev *dev);
+void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv);
+int sed_ioctl(struct opal_dev *dev, unsigned int cmd, unsigned long ptr);
+
+static inline bool is_sed_ioctl(unsigned int cmd)
+{
+	switch (cmd) {
+	case IOC_OPAL_SAVE:
+	case IOC_OPAL_LOCK_UNLOCK:
+	case IOC_OPAL_TAKE_OWNERSHIP:
+	case IOC_OPAL_ACTIVATE_LSP:
+	case IOC_OPAL_SET_PW:
+	case IOC_OPAL_ACTIVATE_USR:
+	case IOC_OPAL_REVERT_TPR:
+	case IOC_OPAL_LR_SETUP:
+	case IOC_OPAL_ADD_USR_TO_LR:
+	case IOC_OPAL_ENABLE_DISABLE_MBR:
+	case IOC_OPAL_ERASE_LR:
+	case IOC_OPAL_SECURE_ERASE_LR:
+		return true;
+	}
+	return false;
+}
+#else
+static inline bool is_sed_ioctl(unsigned int cmd)
+{
+	return false;
+}
+
+static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd,
+			    unsigned long ptr)
+{
+	return 0;
+}
+static inline bool opal_unlock_from_suspend(struct opal_dev *dev)
+{
+	return false;
+}
+static inline void init_opal_dev(struct opal_dev *opal_dev,
+				 sec_send_recv *send_recv)
+{
+	opal_dev->supported = false;
+	opal_dev->initialized = true;
+}
+#endif /* CONFIG_BLK_SED_OPAL */
+#endif /* LINUX_OPAL_H */
-- 
cgit v1.2.3


From 2aad40d911eeb7dcac91c669f2762a28134f0eb1 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 27 Jan 2017 03:12:57 -0800
Subject: remoteproc: Move qcom_mdt_loader into drivers/soc/qcom

With the remoteproc parts cleaned out of the MDT loader we can move it
to drivers/soc/qcom.

Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/soc/qcom/mdt_loader.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/linux/soc/qcom/mdt_loader.h

(limited to 'include/linux')

diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
new file mode 100644
index 000000000000..f423001db3a9
--- /dev/null
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -0,0 +1,18 @@
+#ifndef __QCOM_MDT_LOADER_H__
+#define __QCOM_MDT_LOADER_H__
+
+#include <linux/types.h>
+
+#define QCOM_MDT_TYPE_MASK	(7 << 24)
+#define QCOM_MDT_TYPE_HASH	(2 << 24)
+#define QCOM_MDT_RELOCATABLE	BIT(27)
+
+struct device;
+struct firmware;
+
+ssize_t qcom_mdt_get_size(const struct firmware *fw);
+int qcom_mdt_load(struct device *dev, const struct firmware *fw,
+		  const char *fw_name, int pas_id, void *mem_region,
+		  phys_addr_t mem_phys, size_t mem_size);
+
+#endif
-- 
cgit v1.2.3


From 4d29b2e5ad37a50b186f828d73086cdbf36580bf Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Feb 2017 01:30:49 +0100
Subject: PM / core: Update kerneldoc comments in pm.h

Refresh the struct dev_pm_ops kerneldoc comment, so that it looks
better and is more readable after processing by Sphinx, and drop
the kerneldoc marker from a few other comments ("PM_EVENT_ messages"
and a couple of enum types declarations) which are not proper
kerneldoc and generally confuse Sphinx.

Also change the comment describing struct dev_pm_domain into
a kerneldoc one.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/pm.h | 113 +++++++++++++++++++++++++++--------------------------
 1 file changed, 57 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index f926af41e122..10867b11d503 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -64,24 +64,7 @@ typedef struct pm_message {
 } pm_message_t;
 
 /**
- * struct dev_pm_ops - device PM callbacks
- *
- * Several device power state transitions are externally visible, affecting
- * the state of pending I/O queues and (for drivers that touch hardware)
- * interrupts, wakeups, DMA, and other hardware state.  There may also be
- * internal transitions to various low-power modes which are transparent
- * to the rest of the driver stack (such as a driver that's ON gating off
- * clocks which are not in active use).
- *
- * The externally visible transitions are handled with the help of callbacks
- * included in this structure in such a way that two levels of callbacks are
- * involved.  First, the PM core executes callbacks provided by PM domains,
- * device types, classes and bus types.  They are the subsystem-level callbacks
- * supposed to execute callbacks provided by device drivers, although they may
- * choose not to do that.  If the driver callbacks are executed, they have to
- * collaborate with the subsystem-level callbacks to achieve the goals
- * appropriate for the given system transition, given transition phase and the
- * subsystem the device belongs to.
+ * struct dev_pm_ops - device PM callbacks.
  *
  * @prepare: The principal role of this callback is to prevent new children of
  *	the device from being registered after it has returned (the driver's
@@ -240,34 +223,6 @@ typedef struct pm_message {
  *	driver's interrupt handler, which is guaranteed not to run while
  *	@restore_noirq() is being executed.  Analogous to @resume_noirq().
  *
- * All of the above callbacks, except for @complete(), return error codes.
- * However, the error codes returned by the resume operations, @resume(),
- * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
- * not cause the PM core to abort the resume transition during which they are
- * returned.  The error codes returned in those cases are only printed by the PM
- * core to the system logs for debugging purposes.  Still, it is recommended
- * that drivers only return error codes from their resume methods in case of an
- * unrecoverable failure (i.e. when the device being handled refuses to resume
- * and becomes unusable) to allow us to modify the PM core in the future, so
- * that it can avoid attempting to handle devices that failed to resume and
- * their children.
- *
- * It is allowed to unregister devices while the above callbacks are being
- * executed.  However, a callback routine must NOT try to unregister the device
- * it was called for, although it may unregister children of that device (for
- * example, if it detects that a child was unplugged while the system was
- * asleep).
- *
- * Refer to Documentation/power/admin-guide/devices.rst for more information about the role
- * of the above callbacks in the system suspend process.
- *
- * There also are callbacks related to runtime power management of devices.
- * Again, these callbacks are executed by the PM core only for subsystems
- * (PM domains, device types, classes and bus types) and the subsystem-level
- * callbacks are supposed to invoke the driver callbacks.  Moreover, the exact
- * actions to be performed by a device driver's callbacks generally depend on
- * the platform and subsystem the device belongs to.
- *
  * @runtime_suspend: Prepare the device for a condition in which it won't be
  *	able to communicate with the CPU(s) and RAM due to power management.
  *	This need not mean that the device should be put into a low-power state.
@@ -287,11 +242,54 @@ typedef struct pm_message {
  *	Check these conditions, and return 0 if it's appropriate to let the PM
  *	core queue a suspend request for the device.
  *
- * Refer to Documentation/power/runtime_pm.txt for more information about the
- * role of the above callbacks in device runtime power management.
+ * Several device power state transitions are externally visible, affecting
+ * the state of pending I/O queues and (for drivers that touch hardware)
+ * interrupts, wakeups, DMA, and other hardware state.  There may also be
+ * internal transitions to various low-power modes which are transparent
+ * to the rest of the driver stack (such as a driver that's ON gating off
+ * clocks which are not in active use).
  *
+ * The externally visible transitions are handled with the help of callbacks
+ * included in this structure in such a way that, typically, two levels of
+ * callbacks are involved.  First, the PM core executes callbacks provided by PM
+ * domains, device types, classes and bus types.  They are the subsystem-level
+ * callbacks expected to execute callbacks provided by device drivers, although
+ * they may choose not to do that.  If the driver callbacks are executed, they
+ * have to collaborate with the subsystem-level callbacks to achieve the goals
+ * appropriate for the given system transition, given transition phase and the
+ * subsystem the device belongs to.
+ *
+ * All of the above callbacks, except for @complete(), return error codes.
+ * However, the error codes returned by @resume(), @thaw(), @restore(),
+ * @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do not cause the PM
+ * core to abort the resume transition during which they are returned.  The
+ * error codes returned in those cases are only printed to the system logs for
+ * debugging purposes.  Still, it is recommended that drivers only return error
+ * codes from their resume methods in case of an unrecoverable failure (i.e.
+ * when the device being handled refuses to resume and becomes unusable) to
+ * allow the PM core to be modified in the future, so that it can avoid
+ * attempting to handle devices that failed to resume and their children.
+ *
+ * It is allowed to unregister devices while the above callbacks are being
+ * executed.  However, a callback routine MUST NOT try to unregister the device
+ * it was called for, although it may unregister children of that device (for
+ * example, if it detects that a child was unplugged while the system was
+ * asleep).
+ *
+ * Refer to Documentation/power/devices.txt for more information about the role
+ * of the above callbacks in the system suspend process.
+ *
+ * There also are callbacks related to runtime power management of devices.
+ * Again, as a rule these callbacks are executed by the PM core for subsystems
+ * (PM domains, device types, classes and bus types) and the subsystem-level
+ * callbacks are expected to invoke the driver callbacks.  Moreover, the exact
+ * actions to be performed by a device driver's callbacks generally depend on
+ * the platform and subsystem the device belongs to.
+ *
+ * Refer to Documentation/power/runtime_pm.txt for more information about the
+ * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle()
+ * callbacks in device runtime power management.
  */
-
 struct dev_pm_ops {
 	int (*prepare)(struct device *dev);
 	void (*complete)(struct device *dev);
@@ -391,7 +389,7 @@ const struct dev_pm_ops name = { \
 	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
-/**
+/*
  * PM_EVENT_ messages
  *
  * The following PM_EVENT_ messages are defined for the internal use of the PM
@@ -487,7 +485,7 @@ const struct dev_pm_ops name = { \
 
 #define PMSG_IS_AUTO(msg)	(((msg).event & PM_EVENT_AUTO) != 0)
 
-/**
+/*
  * Device run-time power management status.
  *
  * These status labels are used internally by the PM core to indicate the
@@ -517,7 +515,7 @@ enum rpm_status {
 	RPM_SUSPENDING,
 };
 
-/**
+/*
  * Device run-time power management request types.
  *
  * RPM_REQ_NONE		Do nothing.
@@ -616,15 +614,18 @@ extern void update_pm_runtime_accounting(struct device *dev);
 extern int dev_pm_get_subsys_data(struct device *dev);
 extern void dev_pm_put_subsys_data(struct device *dev);
 
-/*
- * Power domains provide callbacks that are executed during system suspend,
- * hibernation, system resume and during runtime PM transitions along with
- * subsystem-level and driver-level callbacks.
+/**
+ * struct dev_pm_domain - power management domain representation.
  *
+ * @ops: Power management operations associated with this domain.
  * @detach: Called when removing a device from the domain.
  * @activate: Called before executing probe routines for bus types and drivers.
  * @sync: Called after successful driver probe.
  * @dismiss: Called after unsuccessful driver probe and after driver removal.
+ *
+ * Power domains provide callbacks that are executed during system suspend,
+ * hibernation, system resume and during runtime PM transitions instead of
+ * subsystem-level and driver-level callbacks.
  */
 struct dev_pm_domain {
 	struct dev_pm_ops	ops;
-- 
cgit v1.2.3


From 2728b2d2e5be4b828a523a06089cd605419fc65c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Feb 2017 01:32:13 +0100
Subject: PM / core / docs: Convert sleep states API document to reST

Move the document describing the system sleep state transitions API
for devices to Documentation/driver-api/pm/, convert it to reST and
update it to use current terminology.  Also remove the remaining
reference to the old version of it from pm.h.

The new document still contains references to some documents in the
.txt format that will be converted later.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/pm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 10867b11d503..a0894bc52bb4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -276,9 +276,6 @@ typedef struct pm_message {
  * example, if it detects that a child was unplugged while the system was
  * asleep).
  *
- * Refer to Documentation/power/devices.txt for more information about the role
- * of the above callbacks in the system suspend process.
- *
  * There also are callbacks related to runtime power management of devices.
  * Again, as a rule these callbacks are executed by the PM core for subsystems
  * (PM domains, device types, classes and bus types) and the subsystem-level
-- 
cgit v1.2.3


From 88e30752dd47f0a6398cd014af82332f1b9873ea Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 30 Jan 2017 09:00:07 -0800
Subject: rpmsg: qcom: smd: Return positively when not enabled

Remoteproc treats the error codes returned from the stubbed SMD API as
errors, but the fact that SMD is not enabled should not affect
remoteproc's ability to start the remote processors.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/rpmsg/qcom_smd.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rpmsg/qcom_smd.h b/include/linux/rpmsg/qcom_smd.h
index e674b2e3074b..8ec8b6439b25 100644
--- a/include/linux/rpmsg/qcom_smd.h
+++ b/include/linux/rpmsg/qcom_smd.h
@@ -18,14 +18,12 @@ static inline struct qcom_smd_edge *
 qcom_smd_register_edge(struct device *parent,
 		       struct device_node *node)
 {
-	return ERR_PTR(-ENXIO);
+	return NULL;
 }
 
 static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge)
 {
-	/* This shouldn't be possible */
-	WARN_ON(1);
-	return -ENXIO;
+	return 0;
 }
 
 #endif
-- 
cgit v1.2.3


From 1f318a8bafcfba9f0d623f4870c4e890fd22e659 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Feb 2017 18:00:14 +0100
Subject: modules: mark __inittest/__exittest as __maybe_unused

clang warns about unused inline functions by default:

arch/arm/crypto/aes-cipher-glue.c:68:1: warning: unused function '__inittest' [-Wunused-function]
arch/arm/crypto/aes-cipher-glue.c:69:1: warning: unused function '__exittest' [-Wunused-function]

As these appear in every single module, let's just disable the warnings by marking the
two functions as __maybe_unused.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jessica Yu <jeyu@redhat.com>
---
 include/linux/module.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index ef599379f9a4..4e09f469bd7d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -127,13 +127,13 @@ extern void cleanup_module(void);
 
 /* Each module must use one module_init(). */
 #define module_init(initfn)					\
-	static inline initcall_t __inittest(void)		\
+	static inline initcall_t __maybe_unused __inittest(void)		\
 	{ return initfn; }					\
 	int init_module(void) __attribute__((alias(#initfn)));
 
 /* This is only required if you want to be unloadable. */
 #define module_exit(exitfn)					\
-	static inline exitcall_t __exittest(void)		\
+	static inline exitcall_t __maybe_unused __exittest(void)		\
 	{ return exitfn; }					\
 	void cleanup_module(void) __attribute__((alias(#exitfn)));
 
-- 
cgit v1.2.3


From b6a05c823fc573a65efc4466f174abf05f922e0f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jan 2017 13:18:58 +0100
Subject: scsi: remove eh_timed_out methods in the transport template

Instead define the timeout behavior purely based on the host_template
eh_timed_out method and wire up the existing transport implementations
in the host templates.  This also clears up the confusion that the
transport template method overrides the host template one, so some
drivers have to re-override the transport template one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/libata.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index c170be548b7f..46e18c0619c6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1130,6 +1130,7 @@ extern int ata_sas_port_start(struct ata_port *ap);
 extern void ata_sas_port_stop(struct ata_port *ap);
 extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
 extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
+extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern int sata_scr_valid(struct ata_link *link);
 extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
@@ -1355,6 +1356,7 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	.proc_name		= drv_name,			\
 	.slave_configure	= ata_scsi_slave_config,	\
 	.slave_destroy		= ata_scsi_slave_destroy,	\
+	.eh_timed_out		= ata_scsi_timed_out,		\
 	.bios_param		= ata_std_bios_param,		\
 	.unlock_native_capacity	= ata_scsi_unlock_native_capacity, \
 	.sdev_attrs		= ata_common_sdev_attrs
-- 
cgit v1.2.3


From 46f47e48008b63f5fd3a3bad8b79ba1a89fb625f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 24 Jan 2017 10:58:06 -0800
Subject: fscrypt: split supp and notsupp declarations into their own headers

Previously, each filesystem configured without encryption support would
define all the public fscrypt functions to their notsupp_* stubs.  This
list of #defines had to be updated in every filesystem whenever a change
was made to the public fscrypt functions.  To make things more
maintainable now that we have three filesystems using fscrypt, split the
old header fscrypto.h into several new headers.  fscrypt_supp.h contains
the real declarations and is included by filesystems when configured
with encryption support, whereas fscrypt_notsupp.h contains the inline
stubs and is included by filesystems when configured without encryption
support.  fscrypt_common.h contains common declarations needed by both.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fscrypt_common.h  | 146 +++++++++++++++++
 include/linux/fscrypt_notsupp.h | 168 +++++++++++++++++++
 include/linux/fscrypt_supp.h    |  66 ++++++++
 include/linux/fscrypto.h        | 347 ----------------------------------------
 4 files changed, 380 insertions(+), 347 deletions(-)
 create mode 100644 include/linux/fscrypt_common.h
 create mode 100644 include/linux/fscrypt_notsupp.h
 create mode 100644 include/linux/fscrypt_supp.h
 delete mode 100644 include/linux/fscrypto.h

(limited to 'include/linux')

diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
new file mode 100644
index 000000000000..547f81592ba1
--- /dev/null
+++ b/include/linux/fscrypt_common.h
@@ -0,0 +1,146 @@
+/*
+ * fscrypt_common.h: common declarations for per-file encryption
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+
+#ifndef _LINUX_FSCRYPT_COMMON_H
+#define _LINUX_FSCRYPT_COMMON_H
+
+#include <linux/key.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <crypto/skcipher.h>
+#include <uapi/linux/fs.h>
+
+#define FS_CRYPTO_BLOCK_SIZE		16
+
+struct fscrypt_info;
+
+struct fscrypt_ctx {
+	union {
+		struct {
+			struct page *bounce_page;	/* Ciphertext page */
+			struct page *control_page;	/* Original page  */
+		} w;
+		struct {
+			struct bio *bio;
+			struct work_struct work;
+		} r;
+		struct list_head free_list;	/* Free list */
+	};
+	u8 flags;				/* Flags */
+};
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct fscrypt_symlink_data {
+	__le16 len;
+	char encrypted_path[1];
+} __packed;
+
+/**
+ * This function is used to calculate the disk space required to
+ * store a filename of length l in encrypted symlink format.
+ */
+static inline u32 fscrypt_symlink_data_len(u32 l)
+{
+	if (l < FS_CRYPTO_BLOCK_SIZE)
+		l = FS_CRYPTO_BLOCK_SIZE;
+	return (l + sizeof(struct fscrypt_symlink_data) - 1);
+}
+
+struct fscrypt_str {
+	unsigned char *name;
+	u32 len;
+};
+
+struct fscrypt_name {
+	const struct qstr *usr_fname;
+	struct fscrypt_str disk_name;
+	u32 hash;
+	u32 minor_hash;
+	struct fscrypt_str crypto_buf;
+};
+
+#define FSTR_INIT(n, l)		{ .name = n, .len = l }
+#define FSTR_TO_QSTR(f)		QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p)		((p)->disk_name.name)
+#define fname_len(p)		((p)->disk_name.len)
+
+/*
+ * fscrypt superblock flags
+ */
+#define FS_CFLG_OWN_PAGES (1U << 1)
+
+/*
+ * crypto opertions for filesystems
+ */
+struct fscrypt_operations {
+	unsigned int flags;
+	const char *key_prefix;
+	int (*get_context)(struct inode *, void *, size_t);
+	int (*prepare_context)(struct inode *);
+	int (*set_context)(struct inode *, const void *, size_t, void *);
+	int (*dummy_context)(struct inode *);
+	bool (*is_encrypted)(struct inode *);
+	bool (*empty_dir)(struct inode *);
+	unsigned (*max_namelen)(struct inode *);
+};
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+	if (inode->i_sb->s_cop->dummy_context &&
+				inode->i_sb->s_cop->dummy_context(inode))
+		return true;
+	return false;
+}
+
+static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
+{
+	return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
+}
+
+static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
+{
+	return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
+}
+
+static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
+{
+	if (str->len == 1 && str->name[0] == '.')
+		return true;
+
+	if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+		return true;
+
+	return false;
+}
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+#else
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EINVAL);
+#endif
+}
+
+static inline int fscrypt_has_encryption_key(const struct inode *inode)
+{
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+	return (inode->i_crypt_info != NULL);
+#else
+	return 0;
+#endif
+}
+
+#endif	/* _LINUX_FSCRYPT_COMMON_H */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
new file mode 100644
index 000000000000..3511ca798804
--- /dev/null
+++ b/include/linux/fscrypt_notsupp.h
@@ -0,0 +1,168 @@
+/*
+ * fscrypt_notsupp.h
+ *
+ * This stubs out the fscrypt functions for filesystems configured without
+ * encryption support.
+ */
+
+#ifndef _LINUX_FSCRYPT_NOTSUPP_H
+#define _LINUX_FSCRYPT_NOTSUPP_H
+
+#include <linux/fscrypt_common.h>
+
+/* crypto.c */
+static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
+						  gfp_t gfp_flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
+{
+	return;
+}
+
+static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
+						struct page *page,
+						unsigned int len,
+						unsigned int offs,
+						u64 lblk_num, gfp_t gfp_flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int fscrypt_decrypt_page(const struct inode *inode,
+				       struct page *page,
+				       unsigned int len, unsigned int offs,
+				       u64 lblk_num)
+{
+	return -EOPNOTSUPP;
+}
+
+
+static inline void fscrypt_restore_control_page(struct page *page)
+{
+	return;
+}
+
+static inline void fscrypt_set_d_op(struct dentry *dentry)
+{
+	return;
+}
+
+static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
+{
+	return;
+}
+
+/* policy.c */
+static inline int fscrypt_ioctl_set_policy(struct file *filp,
+					   const void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_has_permitted_context(struct inode *parent,
+						struct inode *child)
+{
+	return 0;
+}
+
+static inline int fscrypt_inherit_context(struct inode *parent,
+					  struct inode *child,
+					  void *fs_data, bool preload)
+{
+	return -EOPNOTSUPP;
+}
+
+/* keyinfo.c */
+static inline int fscrypt_get_encryption_info(struct inode *inode)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_put_encryption_info(struct inode *inode,
+					       struct fscrypt_info *ci)
+{
+	return;
+}
+
+ /* fname.c */
+static inline int fscrypt_setup_filename(struct inode *dir,
+					 const struct qstr *iname,
+					 int lookup, struct fscrypt_name *fname)
+{
+	if (dir->i_sb->s_cop->is_encrypted(dir))
+		return -EOPNOTSUPP;
+
+	memset(fname, 0, sizeof(struct fscrypt_name));
+	fname->usr_fname = iname;
+	fname->disk_name.name = (unsigned char *)iname->name;
+	fname->disk_name.len = iname->len;
+	return 0;
+}
+
+static inline void fscrypt_free_filename(struct fscrypt_name *fname)
+{
+	return;
+}
+
+static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode,
+					       u32 ilen)
+{
+	/* never happens */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline int fscrypt_fname_alloc_buffer(const struct inode *inode,
+					     u32 ilen,
+					     struct fscrypt_str *crypto_str)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
+{
+	return;
+}
+
+static inline int fscrypt_fname_disk_to_usr(struct inode *inode,
+					    u32 hash, u32 minor_hash,
+					    const struct fscrypt_str *iname,
+					    struct fscrypt_str *oname)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_fname_usr_to_disk(struct inode *inode,
+					    const struct qstr *iname,
+					    struct fscrypt_str *oname)
+{
+	return -EOPNOTSUPP;
+}
+
+/* bio.c */
+static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx,
+					     struct bio *bio)
+{
+	return;
+}
+
+static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
+{
+	return;
+}
+
+static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+					sector_t pblk, unsigned int len)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif	/* _LINUX_FSCRYPT_NOTSUPP_H */
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
new file mode 100644
index 000000000000..a140f47e9b27
--- /dev/null
+++ b/include/linux/fscrypt_supp.h
@@ -0,0 +1,66 @@
+/*
+ * fscrypt_supp.h
+ *
+ * This is included by filesystems configured with encryption support.
+ */
+
+#ifndef _LINUX_FSCRYPT_SUPP_H
+#define _LINUX_FSCRYPT_SUPP_H
+
+#include <linux/fscrypt_common.h>
+
+/* crypto.c */
+extern struct kmem_cache *fscrypt_info_cachep;
+extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
+extern void fscrypt_release_ctx(struct fscrypt_ctx *);
+extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
+						unsigned int, unsigned int,
+						u64, gfp_t);
+extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
+				unsigned int, u64);
+extern void fscrypt_restore_control_page(struct page *);
+
+extern const struct dentry_operations fscrypt_d_ops;
+
+static inline void fscrypt_set_d_op(struct dentry *dentry)
+{
+	d_set_d_op(dentry, &fscrypt_d_ops);
+}
+
+static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
+	spin_unlock(&dentry->d_lock);
+}
+
+/* policy.c */
+extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
+extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
+extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
+extern int fscrypt_inherit_context(struct inode *, struct inode *,
+					void *, bool);
+/* keyinfo.c */
+extern int fscrypt_get_encryption_info(struct inode *);
+extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
+
+/* fname.c */
+extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
+				int lookup, struct fscrypt_name *);
+extern void fscrypt_free_filename(struct fscrypt_name *);
+extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
+extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
+				struct fscrypt_str *);
+extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
+extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
+			const struct fscrypt_str *, struct fscrypt_str *);
+extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
+			struct fscrypt_str *);
+
+/* bio.c */
+extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
+extern void fscrypt_pullback_bio_page(struct page **, bool);
+extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
+				 unsigned int);
+
+#endif	/* _LINUX_FSCRYPT_SUPP_H */
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
deleted file mode 100644
index 715f17b3c6d7..000000000000
--- a/include/linux/fscrypto.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * General per-file encryption definition
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-
-#ifndef _LINUX_FSCRYPTO_H
-#define _LINUX_FSCRYPTO_H
-
-#include <linux/key.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/bio.h>
-#include <linux/dcache.h>
-#include <crypto/skcipher.h>
-#include <uapi/linux/fs.h>
-
-#define FS_CRYPTO_BLOCK_SIZE		16
-
-struct fscrypt_info;
-
-struct fscrypt_ctx {
-	union {
-		struct {
-			struct page *bounce_page;	/* Ciphertext page */
-			struct page *control_page;	/* Original page  */
-		} w;
-		struct {
-			struct bio *bio;
-			struct work_struct work;
-		} r;
-		struct list_head free_list;	/* Free list */
-	};
-	u8 flags;				/* Flags */
-};
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct fscrypt_symlink_data {
-	__le16 len;
-	char encrypted_path[1];
-} __packed;
-
-/**
- * This function is used to calculate the disk space required to
- * store a filename of length l in encrypted symlink format.
- */
-static inline u32 fscrypt_symlink_data_len(u32 l)
-{
-	if (l < FS_CRYPTO_BLOCK_SIZE)
-		l = FS_CRYPTO_BLOCK_SIZE;
-	return (l + sizeof(struct fscrypt_symlink_data) - 1);
-}
-
-struct fscrypt_str {
-	unsigned char *name;
-	u32 len;
-};
-
-struct fscrypt_name {
-	const struct qstr *usr_fname;
-	struct fscrypt_str disk_name;
-	u32 hash;
-	u32 minor_hash;
-	struct fscrypt_str crypto_buf;
-};
-
-#define FSTR_INIT(n, l)		{ .name = n, .len = l }
-#define FSTR_TO_QSTR(f)		QSTR_INIT((f)->name, (f)->len)
-#define fname_name(p)		((p)->disk_name.name)
-#define fname_len(p)		((p)->disk_name.len)
-
-/*
- * fscrypt superblock flags
- */
-#define FS_CFLG_OWN_PAGES (1U << 1)
-
-/*
- * crypto opertions for filesystems
- */
-struct fscrypt_operations {
-	unsigned int flags;
-	const char *key_prefix;
-	int (*get_context)(struct inode *, void *, size_t);
-	int (*prepare_context)(struct inode *);
-	int (*set_context)(struct inode *, const void *, size_t, void *);
-	int (*dummy_context)(struct inode *);
-	bool (*is_encrypted)(struct inode *);
-	bool (*empty_dir)(struct inode *);
-	unsigned (*max_namelen)(struct inode *);
-};
-
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
-{
-	if (inode->i_sb->s_cop->dummy_context &&
-				inode->i_sb->s_cop->dummy_context(inode))
-		return true;
-	return false;
-}
-
-static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
-{
-	return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
-}
-
-static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
-{
-	return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
-}
-
-static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
-{
-	if (str->len == 1 && str->name[0] == '.')
-		return true;
-
-	if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
-		return true;
-
-	return false;
-}
-
-static inline struct page *fscrypt_control_page(struct page *page)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-	return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
-#else
-	WARN_ON_ONCE(1);
-	return ERR_PTR(-EINVAL);
-#endif
-}
-
-static inline int fscrypt_has_encryption_key(const struct inode *inode)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-	return (inode->i_crypt_info != NULL);
-#else
-	return 0;
-#endif
-}
-
-static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-	spin_lock(&dentry->d_lock);
-	dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
-	spin_unlock(&dentry->d_lock);
-#endif
-}
-
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-extern const struct dentry_operations fscrypt_d_ops;
-#endif
-
-static inline void fscrypt_set_d_op(struct dentry *dentry)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-	d_set_d_op(dentry, &fscrypt_d_ops);
-#endif
-}
-
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
-/* crypto.c */
-extern struct kmem_cache *fscrypt_info_cachep;
-extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
-extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
-						unsigned int, unsigned int,
-						u64, gfp_t);
-extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
-				unsigned int, u64);
-extern void fscrypt_restore_control_page(struct page *);
-
-/* policy.c */
-extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
-extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
-extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
-extern int fscrypt_inherit_context(struct inode *, struct inode *,
-					void *, bool);
-/* keyinfo.c */
-extern int fscrypt_get_encryption_info(struct inode *);
-extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
-
-/* fname.c */
-extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
-				int lookup, struct fscrypt_name *);
-extern void fscrypt_free_filename(struct fscrypt_name *);
-extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
-extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
-				struct fscrypt_str *);
-extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
-extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
-			const struct fscrypt_str *, struct fscrypt_str *);
-extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
-			struct fscrypt_str *);
-
-/* bio.c */
-extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
-extern void fscrypt_pullback_bio_page(struct page **, bool);
-extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
-				 unsigned int);
-#endif
-
-/* crypto.c */
-static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(const struct inode *i,
-							gfp_t f)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
-{
-	return;
-}
-
-static inline struct page *fscrypt_notsupp_encrypt_page(const struct inode *i,
-						struct page *p,
-						unsigned int len,
-						unsigned int offs,
-						u64 lblk_num, gfp_t f)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline int fscrypt_notsupp_decrypt_page(const struct inode *i, struct page *p,
-						unsigned int len, unsigned int offs,
-						u64 lblk_num)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c,
-						struct bio *b)
-{
-	return;
-}
-
-static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b)
-{
-	return;
-}
-
-static inline void fscrypt_notsupp_restore_control_page(struct page *p)
-{
-	return;
-}
-
-static inline int fscrypt_notsupp_zeroout_range(const struct inode *i, pgoff_t p,
-					sector_t s, unsigned int f)
-{
-	return -EOPNOTSUPP;
-}
-
-/* policy.c */
-static inline int fscrypt_notsupp_ioctl_set_policy(struct file *f,
-				const void __user *arg)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int fscrypt_notsupp_ioctl_get_policy(struct file *f,
-				void __user *arg)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int fscrypt_notsupp_has_permitted_context(struct inode *p,
-				struct inode *i)
-{
-	return 0;
-}
-
-static inline int fscrypt_notsupp_inherit_context(struct inode *p,
-				struct inode *i, void *v, bool b)
-{
-	return -EOPNOTSUPP;
-}
-
-/* keyinfo.c */
-static inline int fscrypt_notsupp_get_encryption_info(struct inode *i)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void fscrypt_notsupp_put_encryption_info(struct inode *i,
-					struct fscrypt_info *f)
-{
-	return;
-}
-
- /* fname.c */
-static inline int fscrypt_notsupp_setup_filename(struct inode *dir,
-			const struct qstr *iname,
-			int lookup, struct fscrypt_name *fname)
-{
-	if (dir->i_sb->s_cop->is_encrypted(dir))
-		return -EOPNOTSUPP;
-
-	memset(fname, 0, sizeof(struct fscrypt_name));
-	fname->usr_fname = iname;
-	fname->disk_name.name = (unsigned char *)iname->name;
-	fname->disk_name.len = iname->len;
-	return 0;
-}
-
-static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname)
-{
-	return;
-}
-
-static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s)
-{
-	/* never happens */
-	WARN_ON(1);
-	return 0;
-}
-
-static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode,
-				u32 ilen, struct fscrypt_str *crypto_str)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c)
-{
-	return;
-}
-
-static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode,
-			u32 hash, u32 minor_hash,
-			const struct fscrypt_str *iname,
-			struct fscrypt_str *oname)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode,
-			const struct qstr *iname,
-			struct fscrypt_str *oname)
-{
-	return -EOPNOTSUPP;
-}
-#endif	/* _LINUX_FSCRYPTO_H */
-- 
cgit v1.2.3


From e58910cdc9f43cda2e52fcdf2fddbdc74e80b2f7 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Mon, 6 Feb 2017 11:22:42 +0000
Subject: efi: Add SHIM and image security database GUID definitions

Add the definitions for shim and image security database, both of which
are used widely in various Linux distros.

Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1486380166-31868-4-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 85e9fdaa8d07..d00538a65899 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -611,6 +611,9 @@ void efi_native_runtime_setup(void);
 #define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 #define APPLE_PROPERTIES_PROTOCOL_GUID		EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb,  0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
 
+#define EFI_IMAGE_SECURITY_DATABASE_GUID	EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596,  0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
+#define EFI_SHIM_LOCK_GUID			EFI_GUID(0x605dab50, 0xe046, 0x4300,  0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
+
 /*
  * This GUID is used to pass to the kernel proper the struct screen_info
  * structure that was populated by the stub based on the GOP protocol instance
-- 
cgit v1.2.3


From de8cb458625c164bb3f93c4e415e479afce8fa9d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 6 Feb 2017 11:22:43 +0000
Subject: efi: Get and store the secure boot status

Get the firmware's secure-boot status in the kernel boot wrapper and stash
it somewhere that the main kernel image can find.

The efi_get_secureboot() function is extracted from the ARM stub and (a)
generalised so that it can be called from x86 and (b) made to use
efi_call_runtime() so that it can be run in mixed-mode.

For x86, it is stored in boot_params and can be overridden by the boot
loader or kexec.  This allows secure-boot mode to be passed on to a new
kernel.

Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1486380166-31868-5-git-send-email-ard.biesheuvel@linaro.org
[ Small readability edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index d00538a65899..94d34e0be24f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1480,6 +1480,14 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 bool efi_runtime_disabled(void);
 extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
 
+enum efi_secureboot_mode {
+	efi_secureboot_mode_unset,
+	efi_secureboot_mode_unknown,
+	efi_secureboot_mode_disabled,
+	efi_secureboot_mode_enabled,
+};
+enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table);
+
 /*
  * Arch code can implement the following three template macros, avoiding
  * reptition for the void/non-void return cases of {__,}efi_call_virt():
-- 
cgit v1.2.3


From 4025819d328cd0efc53ee22e01b800631944b7f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 11:58:42 +0100
Subject: delayacct: Include <uapi/linux/taskstats.h>

include/linux/delayacct.h relies on 'struct taskstats' but does
not include the header that defines it.

This worked so far because files that included <linux/taskstats.h>
also happened to include other headers that included
uapi/linux/taskstats.h.

Fix it.

Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 6cee17c22313..00e60f79a9cc 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -17,6 +17,7 @@
 #ifndef _LINUX_DELAYACCT_H
 #define _LINUX_DELAYACCT_H
 
+#include <uapi/linux/taskstats.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-- 
cgit v1.2.3


From bec84da8d1da6677c458e6eedd8e814eea91b9fc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 2 Feb 2017 17:41:25 -0800
Subject: device property: allow to constify properties

There is no reason why statically defined properties should be modifiable,
so let's make device_add_properties() and the rest of pset_*() functions to
take const pointers to properties.

This will allow us to mark properties as const/__initconst at definition
sites.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 856e50b2140c..d37a4498b3ac 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -242,7 +242,7 @@ struct property_entry {
 }
 
 int device_add_properties(struct device *dev,
-			  struct property_entry *properties);
+			  const struct property_entry *properties);
 void device_remove_properties(struct device *dev);
 
 bool device_dma_supported(struct device *dev);
-- 
cgit v1.2.3


From 9426998ce6f8616c48c2834cafbe5616da3f5abd Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 2 Feb 2017 17:41:26 -0800
Subject: device property: constify property arrays values

Data that is fed into property arrays should not be modified, so let's mark
relevant pointers as const. This will allow us making source arrays as
const/__initconst.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index d37a4498b3ac..7a0a1cce5165 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -160,12 +160,12 @@ struct property_entry {
 	bool is_string;
 	union {
 		union {
-			void *raw_data;
-			u8 *u8_data;
-			u16 *u16_data;
-			u32 *u32_data;
-			u64 *u64_data;
-			const char **str;
+			const void *raw_data;
+			const u8 *u8_data;
+			const u16 *u16_data;
+			const u32 *u32_data;
+			const u64 *u64_data;
+			const char * const *str;
 		} pointer;
 		union {
 			unsigned long long raw_data;
-- 
cgit v1.2.3


From 2d479e1fa2d09c5a9518a75a5d21ef2713117946 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 2 Feb 2017 17:41:27 -0800
Subject: device property: export code duplicating array of property entries

When augmenting ACPI-enumerated devices with additional property data based
on DMI info, a module has often several potential property sets, with only
one being active on a given box. In order to save memory it should be
possible to mark everything and __initdata or __initconst, execute DMI
match early, and duplicate relevant properties. Then kernel will discard
the rest of them.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 7a0a1cce5165..64e3a9c6d95f 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -241,6 +241,11 @@ struct property_entry {
 	.name = _name_,				\
 }
 
+struct property_entry *
+property_entries_dup(const struct property_entry *properties);
+
+void property_entries_free(const struct property_entry *properties);
+
 int device_add_properties(struct device *dev,
 			  const struct property_entry *properties);
 void device_remove_properties(struct device *dev);
-- 
cgit v1.2.3


From febf2407418a2d6c042fcd77b206040449cb9a70 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 6 Feb 2017 18:01:51 +0100
Subject: x86/ACPI: keep x86_cpu_to_acpiid mapping valid on CPU hotplug

We may or may not have all possible CPUs in MADT on boot but in any
case we're overwriting x86_cpu_to_acpiid mapping with U32_MAX when
acpi_register_lapic() is called again on the CPU hotplug path:

acpi_processor_hotadd_init()
  -> acpi_map_cpu()
    -> acpi_register_lapic()

As we have the required acpi_id information in acpi_processor_hotadd_init()
propagate it to acpi_map_cpu() to always keep x86_cpu_to_acpiid
mapping valid.

Reported-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5b36974ed60a..6ab47e92c65a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -291,7 +291,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+		 int *pcpu);
 int acpi_unmap_cpu(int cpu);
 int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
-- 
cgit v1.2.3


From af7bd4dc13093bf1477f370722bbab24cf457b91 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:52 +0200
Subject: vfs: create vfs helper vfs_tmpfile()

Factor out some common vfs bits from do_tmpfile()
to be used by overlayfs for concurrent copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894..4a7f3cc9edab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
+extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
+				  int open_flag);
+
 /*
  * VFS file helper functions.
  */
-- 
cgit v1.2.3


From bfe219d373cadab761373aeea4c70406bc27ea2c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:57 +0200
Subject: vfs: wrap write f_ops with file_{start,end}_write()

Before calling write f_ops, call file_start_write() instead
of sb_start_write().

Replace {sb,file}_start_write() for {copy,clone}_file_range() and
for fallocate().

Beyond correct semantics, this avoids freeze protection to sb when
operating on special inodes, such as fallocate() on a blockdev.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a7f3cc9edab..78c81e6f5d76 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1741,19 +1741,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	sb_start_write(file_inode(file_out)->i_sb);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	sb_end_write(file_inode(file_out)->i_sb);
-
-	return ret;
-}
-
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
@@ -2564,6 +2551,19 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
+static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
+				      struct file *file_out, loff_t pos_out,
+				      u64 len)
+{
+	int ret;
+
+	file_start_write(file_out);
+	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	file_end_write(file_out);
+
+	return ret;
+}
+
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
-- 
cgit v1.2.3


From 9fe7bfce8b3e112e8e08c40deb72ee7e24c6f072 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 2 Feb 2017 19:16:01 -0800
Subject: virtio_net: refactor freeze/restore logic into virtnet reset logic

For XDP we will need to reset the queues to allow for buffer headroom
to be configured. In order to do this we need to essentially run the
freeze()/restore() code path. Unfortunately the locking requirements
between the freeze/restore and reset paths are different however so
we can not simply reuse the code.

This patch refactors the code path and adds a reset helper routine.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d5eb5479a425..04b0d3f95043 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev)
 	return container_of(_dev, struct virtio_device, dev);
 }
 
+void virtio_add_status(struct virtio_device *dev, unsigned int status);
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
 void virtio_config_changed(struct virtio_device *dev);
+void virtio_config_disable(struct virtio_device *dev);
+void virtio_config_enable(struct virtio_device *dev);
+int virtio_finalize_features(struct virtio_device *dev);
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
-- 
cgit v1.2.3


From 55601a880690cdeccdb5923c2493f0e3736f8f6b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 4 Feb 2017 20:02:49 +0100
Subject: net: phy: Add 2000base-x, 2500base-x and rxaui modes

The mv88e6390 ports 9 and 10 supports some additional PHY modes. Add
these modes to the PHY core so they can be used in the binding.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 43474f39ef65..28ae9eafec19 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -81,6 +81,9 @@ typedef enum {
 	PHY_INTERFACE_MODE_MOCA,
 	PHY_INTERFACE_MODE_QSGMII,
 	PHY_INTERFACE_MODE_TRGMII,
+	PHY_INTERFACE_MODE_1000BASEX,
+	PHY_INTERFACE_MODE_2500BASEX,
+	PHY_INTERFACE_MODE_RXAUI,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -141,6 +144,12 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "qsgmii";
 	case PHY_INTERFACE_MODE_TRGMII:
 		return "trgmii";
+	case PHY_INTERFACE_MODE_1000BASEX:
+		return "1000base-x";
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return "2500base-x";
+	case PHY_INTERFACE_MODE_RXAUI:
+		return "rxaui";
 	default:
 		return "unknown";
 	}
-- 
cgit v1.2.3


From 648ea0134069cda7d4940f397bcc6901fb88752a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 4 Feb 2017 13:02:44 -0800
Subject: net: phy: Allow pre-declaration of MDIO devices

Allow board support code to collect pre-declarations for MDIO devices by
registering them with mdiobus_register_board_info(). SPI and I2C buses
have a similar feature, we were missing this for MDIO devices, but this
is particularly useful for e.g: MDIO-connected switches which need to
provide their port layout (often board-specific) to a MDIO Ethernet
switch driver.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h            |  3 +++
 include/linux/mod_devicetable.h |  1 +
 include/linux/phy.h             | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 55a80d73cfc1..ca08ab16ecdc 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -10,6 +10,7 @@
 #define __LINUX_MDIO_H__
 
 #include <uapi/linux/mdio.h>
+#include <linux/mod_devicetable.h>
 
 struct mii_bus;
 
@@ -29,6 +30,7 @@ struct mdio_device {
 
 	const struct dev_pm_ops *pm_ops;
 	struct mii_bus *bus;
+	char modalias[MDIO_NAME_SIZE];
 
 	int (*bus_match)(struct device *dev, struct device_driver *drv);
 	void (*device_free)(struct mdio_device *mdiodev);
@@ -71,6 +73,7 @@ int mdio_device_register(struct mdio_device *mdiodev);
 void mdio_device_remove(struct mdio_device *mdiodev);
 int mdio_driver_register(struct mdio_driver *drv);
 void mdio_driver_unregister(struct mdio_driver *drv);
+int mdio_device_bus_match(struct device *dev, struct device_driver *drv);
 
 static inline bool mdio_phy_id_is_c45(int phy_id)
 {
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 8a57f0b1242d..8850fcaf50db 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -501,6 +501,7 @@ struct platform_device_id {
 	kernel_ulong_t driver_data;
 };
 
+#define MDIO_NAME_SIZE		32
 #define MDIO_MODULE_PREFIX	"mdio:"
 
 #define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d"
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 28ae9eafec19..d9bdf53e0514 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -886,6 +886,25 @@ void mdio_bus_exit(void);
 
 extern struct bus_type mdio_bus_type;
 
+struct mdio_board_info {
+	const char	*bus_id;
+	char		modalias[MDIO_NAME_SIZE];
+	int		mdio_addr;
+	const void	*platform_data;
+};
+
+#if IS_ENABLED(CONFIG_PHYLIB)
+int mdiobus_register_board_info(const struct mdio_board_info *info,
+				unsigned int n);
+#else
+static inline int mdiobus_register_board_info(const struct mdio_board_info *i,
+					      unsigned int n)
+{
+	return 0;
+}
+#endif
+
+
 /**
  * module_phy_driver() - Helper macro for registering PHY drivers
  * @__phy_drivers: array of PHY drivers to register
-- 
cgit v1.2.3


From b08d46b01e995dd7b653b22d35bd1d958d6ee9b4 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 6 Feb 2017 13:01:16 -0800
Subject: net: phy: bcm7xxx: Add BCM74371 PHY ID

Add the BCM74371 PHY ID to the list of supported chips. This is a 28nm
technology Gigabit PHY SoC.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5881d1fdc1fb..55e517130311 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -33,6 +33,7 @@
 #define PHY_ID_BCM7425			0x600d86b0
 #define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7435			0x600d8750
+#define PHY_ID_BCM74371			0xae0252e0
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7439_2		0xae025080
 #define PHY_ID_BCM7445			0x600d8510
-- 
cgit v1.2.3


From 4ff0620354f2b39b9fe2a91c22c4de9d1fba0c8e Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 6 Feb 2017 23:14:12 +0200
Subject: net: add dst_pending_confirm flag to skbuff

Add new skbuff flag to allow protocols to confirm neighbour.
When same struct dst_entry can be used for many different
neighbours we can not use it for pending confirmations.

Add sock_confirm_neigh() helper to confirm the neighbour and
use it for IPv4, IPv6 and VRF before dst_neigh_output.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c6a78e1892b6..f1adddc1c5ac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
+ *	@dst_pending_confirm: need to confirm neighbour
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
@@ -741,6 +742,7 @@ struct sk_buff {
 	__u8			csum_level:2;
 	__u8			csum_bad:1;
 
+	__u8			dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
@@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
+static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
+{
+	skb->dst_pending_confirm = val;
+}
+
+static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
+{
+	return skb->dst_pending_confirm != 0;
+}
+
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 {
 #ifdef CONFIG_XFRM
-- 
cgit v1.2.3


From 85c727b5948344c8d559e2fda8925e9ddd41c29a Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 7 Feb 2017 11:37:56 -0200
Subject: sctp: drop __packed from almost all SCTP structures

__packed is considered harmful as it potentially generates code that
doesn't perform well and its usage should be avoided as much as
possible.

This patch drops __packed from all SCTP structures except one, which is
sctp_signed_cookie. In there it's required, as per changelog on
commit 9834a2bb4970 ("[SCTP]: Fix sctp_cookie alignment in the packet.").

After this patch, no alignment changes neither in x86 or x86_64 and
no exceptions were noticed during testing on both archs.

Code size for SCTP module also didn't change with this patch.

Cc: David Miller <davem@davemloft.net>
Cc: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 80 ++++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index a9e790685af3..2408c6877ca0 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -62,7 +62,7 @@ typedef struct sctphdr {
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} __packed sctp_sctphdr_t;
+} sctp_sctphdr_t;
 
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
@@ -74,7 +74,7 @@ typedef struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} __packed sctp_chunkhdr_t;
+} sctp_chunkhdr_t;
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -165,7 +165,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
 typedef struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} __packed sctp_paramhdr_t;
+} sctp_paramhdr_t;
 
 typedef enum {
 
@@ -233,12 +233,12 @@ typedef struct sctp_datahdr {
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} __packed sctp_datahdr_t;
+} sctp_datahdr_t;
 
 typedef struct sctp_data_chunk {
         sctp_chunkhdr_t chunk_hdr;
         sctp_datahdr_t  data_hdr;
-} __packed sctp_data_chunk_t;
+} sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
 enum {
@@ -264,78 +264,78 @@ typedef struct sctp_inithdr {
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} __packed sctp_inithdr_t;
+} sctp_inithdr_t;
 
 typedef struct sctp_init_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_inithdr_t init_hdr;
-} __packed sctp_init_chunk_t;
+} sctp_init_chunk_t;
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in_addr  addr;
-} __packed sctp_ipv4addr_param_t;
+} sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in6_addr addr;
-} __packed sctp_ipv6addr_param_t;
+} sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
 	sctp_paramhdr_t param_hdr;
 	__be32          lifespan_increment;
-} __packed sctp_cookie_preserve_param_t;
+} sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
 	sctp_paramhdr_t param_hdr;
 	uint8_t hostname[0];
-} __packed sctp_hostname_param_t;
+} sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 types[0];
-} __packed sctp_supported_addrs_param_t;
+} sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
 	sctp_paramhdr_t param_hdr;
-} __packed sctp_ecn_capable_param_t;
+} sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
 typedef struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} __packed sctp_adaptation_ind_param_t;
+} sctp_adaptation_ind_param_t;
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
 typedef struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} __packed sctp_supported_ext_param_t;
+} sctp_supported_ext_param_t;
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 random_val[0];
-} __packed sctp_random_param_t;
+} sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 chunks[0];
-} __packed sctp_chunks_param_t;
+} sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 hmac_ids[0];
-} __packed sctp_hmac_algo_param_t;
+} sctp_hmac_algo_param_t;
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
@@ -347,13 +347,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t;
 typedef struct sctp_cookie_param {
 	sctp_paramhdr_t p;
 	__u8 body[0];
-} __packed sctp_cookie_param_t;
+} sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
 	sctp_paramhdr_t param_hdr;
 	sctp_paramhdr_t unrecognized;
-} __packed sctp_unrecognized_param_t;
+} sctp_unrecognized_param_t;
 
 
@@ -368,7 +368,7 @@ typedef struct sctp_unrecognized_param {
 typedef struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} __packed sctp_gap_ack_block_t;
+} sctp_gap_ack_block_t;
 
 typedef __be32 sctp_dup_tsn_t;
 
@@ -383,12 +383,12 @@ typedef struct sctp_sackhdr {
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
 	sctp_sack_variable_t variable[0];
-} __packed sctp_sackhdr_t;
+} sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
-} __packed sctp_sack_chunk_t;
+} sctp_sack_chunk_t;
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -400,12 +400,12 @@ typedef struct sctp_sack_chunk {
 
 typedef struct sctp_heartbeathdr {
 	sctp_paramhdr_t info;
-} __packed sctp_heartbeathdr_t;
+} sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
-} __packed sctp_heartbeat_chunk_t;
+} sctp_heartbeat_chunk_t;
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
@@ -414,7 +414,7 @@ typedef struct sctp_heartbeat_chunk {
  */
 typedef struct sctp_abort_chunk {
         sctp_chunkhdr_t uh;
-} __packed sctp_abort_chunk_t;
+} sctp_abort_chunk_t;
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
@@ -422,12 +422,12 @@ typedef struct sctp_abort_chunk {
  */
 typedef struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} __packed sctp_shutdownhdr_t;
+} sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
         sctp_chunkhdr_t    chunk_hdr;
         sctp_shutdownhdr_t shutdown_hdr;
-} __packed;
+};
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
@@ -435,12 +435,12 @@ typedef struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} __packed sctp_errhdr_t;
+} sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
         sctp_chunkhdr_t chunk_hdr;
 	sctp_errhdr_t   err_hdr;
-} __packed sctp_operr_chunk_t;
+} sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -530,7 +530,7 @@ typedef struct sctp_ecnehdr {
 typedef struct sctp_ecne_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
-} __packed sctp_ecne_chunk_t;
+} sctp_ecne_chunk_t;
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
@@ -542,7 +542,7 @@ typedef struct sctp_cwrhdr {
 typedef struct sctp_cwr_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
-} __packed sctp_cwr_chunk_t;
+} sctp_cwr_chunk_t;
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -593,17 +593,17 @@ typedef struct sctp_cwr_chunk {
 struct sctp_fwdtsn_skip {
 	__be16 stream;
 	__be16 ssn;
-} __packed;
+};
 
 struct sctp_fwdtsn_hdr {
 	__be32 new_cum_tsn;
 	struct sctp_fwdtsn_skip skip[0];
-} __packed;
+};
 
 struct sctp_fwdtsn_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_fwdtsn_hdr fwdtsn_hdr;
-} __packed;
+};
 
 
 /* ADDIP
@@ -641,17 +641,17 @@ struct sctp_fwdtsn_chunk {
 typedef struct sctp_addip_param {
 	sctp_paramhdr_t	param_hdr;
 	__be32		crr_id;
-} __packed sctp_addip_param_t;
+} sctp_addip_param_t;
 
 typedef struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} __packed sctp_addiphdr_t;
+} sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
-} __packed sctp_addip_chunk_t;
+} sctp_addip_chunk_t;
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -706,12 +706,12 @@ typedef struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} __packed sctp_authhdr_t;
+} sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_authhdr_t auth_hdr;
-} __packed sctp_auth_chunk_t;
+} sctp_auth_chunk_t;
 
 struct sctp_infox {
 	struct sctp_info *sctpinfo;
-- 
cgit v1.2.3


From 376bc27150f180d9f5eddec6a14117780177589d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 19 Apr 2016 08:56:27 +0200
Subject: clockevents: Add a clkevt-of mechanism like clksrc-of

The current code uses the CLOCKSOURCE_OF_DECLARE macro to fill the clksrc
table with a t-uple (name, init_function).

Unfortunately it ends up to the clockevent and the clocksource being
both initialized with this macro. It is not a problem by itself but there
is not a clear distinction between a clockevent and a clocksource in the
code initialization path. Somebody can argue there are the same IP block
and the same DT node. But conceptually from the software side, there are
two distincts entities and as is they should be initialized separetely.
Some drivers which do not have a clocksource end up by using the
CLOCKSOURCE_OF_DECLARE macro to declare a clockevent.

Another result is the fuzzy organization in the clocksource directory,
where the clockevents are implemented in the same file than the
clocksources or file labelled timer-something implementing a clocksource.

This patch provides another macro to specifically declare a clockevent in
the same way than the clocksource and gives the opportunity to write two
separate drivers, one for the clocksource and another for the clockevents.

Hopefully, that can help to do some housework in the directory, perhaps
split the drivers in to entities, for example:
	- clksrc-rockchip.c
	- clkevt-rockchip.c

Also, it gives the possibility to declare clocksources separately in the
DT and then use a clocksource from IP block while while clockevents are
used from another IP block.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/clockchips.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 0d442e34c349..5d3053c34fb3 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -224,4 +224,13 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
 
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
+#define CLOCKEVENT_OF_DECLARE(name, compat, fn) \
+	OF_DECLARE_1_RET(clkevt, name, compat, fn)
+
+#ifdef CONFIG_CLKEVT_PROBE
+extern int clockevent_probe(void);
+#els
+static inline int clockevent_probe(void) { return 0; }
+#endif
+
 #endif /* _LINUX_CLOCKCHIPS_H */
-- 
cgit v1.2.3


From 0f5bf6d0afe4be6e1391908ff2d6dc9730e91550 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 6 Feb 2017 16:31:58 -0800
Subject: arch: Rename CONFIG_DEBUG_RODATA and CONFIG_DEBUG_MODULE_RONX

Both of these options are poorly named. The features they provide are
necessary for system security and should not be considered debug only.
Change the names to CONFIG_STRICT_KERNEL_RWX and
CONFIG_STRICT_MODULE_RWX to better describe what these options do.

Signed-off-by: Laura Abbott <labbott@redhat.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/filter.h | 4 ++--
 include/linux/init.h   | 4 ++--
 include/linux/module.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index a0934e6c9bab..c6dd53e88711 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,7 +543,7 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
-#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+#ifdef CONFIG_STRICT_MODULE_RWX
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
 	set_memory_ro((unsigned long)fp, fp->pages);
@@ -561,7 +561,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
 }
-#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+#endif /* CONFIG_STRICT_MODULE_RWX */
 
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
diff --git a/include/linux/init.h b/include/linux/init.h
index 885c3e6d0f9d..79af0962fd52 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -126,10 +126,10 @@ void prepare_namespace(void);
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
-#if defined(CONFIG_DEBUG_RODATA) || defined(CONFIG_DEBUG_SET_MODULE_RONX)
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
 extern bool rodata_enabled;
 #endif
-#ifdef CONFIG_DEBUG_RODATA
+#ifdef CONFIG_STRICT_KERNEL_RWX
 void mark_rodata_ro(void);
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..d5afd142818f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -764,7 +764,7 @@ extern int module_sysfs_initialized;
 
 #define __MODULE_STRING(x) __stringify(x)
 
-#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+#ifdef CONFIG_STRICT_MODULE_RWX
 extern void set_all_modules_text_rw(void);
 extern void set_all_modules_text_ro(void);
 extern void module_enable_ro(const struct module *mod, bool after_init);
-- 
cgit v1.2.3


From 66cd794e3c30b8af3b6befe42a378557efb3114a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 7 Feb 2017 22:40:44 +0200
Subject: nl80211: add HT/VHT capabilities to AP parameters

For the benefit of drivers that rebuild IEs in firmware, parse the
IEs for HT/VHT capabilities and the respective membership selector
in the (extended) supported rates. This avoids duplicating the same
code into all drivers that need this information.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 02768de209d6..0dd9498c694f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1043,8 +1043,9 @@ struct ieee80211_mgmt {
 	} u;
 } __packed __aligned(2);
 
-/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */
+/* Supported rates membership selectors */
 #define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
+#define BSS_MEMBERSHIP_SELECTOR_VHT_PHY	126
 
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
-- 
cgit v1.2.3


From f92bac3b141b8233e34ddf32d227e12bfba07b48 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 27 Dec 2016 23:16:05 +0900
Subject: printk: rename nmi.c and exported api

A preparation patch for printk_safe work. No functional change.
- rename nmi.c to print_safe.c
- add `printk_safe' prefix to some (which used both by printk-safe
  and printk-nmi) of the exported functions.

Link: http://lkml.kernel.org/r/20161227141611.940-3-sergey.senozhatsky@gmail.com
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Calvin Owens <calvinowens@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 3472cc6b7a60..37e933eeffb2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -147,17 +147,17 @@ void early_printk(const char *s, ...) { }
 #endif
 
 #ifdef CONFIG_PRINTK_NMI
-extern void printk_nmi_init(void);
+extern void printk_safe_init(void);
 extern void printk_nmi_enter(void);
 extern void printk_nmi_exit(void);
-extern void printk_nmi_flush(void);
-extern void printk_nmi_flush_on_panic(void);
+extern void printk_safe_flush(void);
+extern void printk_safe_flush_on_panic(void);
 #else
-static inline void printk_nmi_init(void) { }
+static inline void printk_safe_init(void) { }
 static inline void printk_nmi_enter(void) { }
 static inline void printk_nmi_exit(void) { }
-static inline void printk_nmi_flush(void) { }
-static inline void printk_nmi_flush_on_panic(void) { }
+static inline void printk_safe_flush(void) { }
+static inline void printk_safe_flush_on_panic(void) { }
 #endif /* PRINTK_NMI */
 
 #ifdef CONFIG_PRINTK
-- 
cgit v1.2.3


From 099f1c84c0052ec1b27f1c3942eed5830d86bdbb Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 27 Dec 2016 23:16:06 +0900
Subject: printk: introduce per-cpu safe_print seq buffer

This patch extends the idea of NMI per-cpu buffers to regions
that may cause recursive printk() calls and possible deadlocks.
Namely, printk() can't handle printk calls from schedule code
or printk() calls from lock debugging code (spin_dump() for instance);
because those may be called with `sem->lock' already taken or any
other `critical' locks (p->pi_lock, etc.). An example of deadlock
can be

 vprintk_emit()
  console_unlock()
   up()                        << raw_spin_lock_irqsave(&sem->lock, flags);
    wake_up_process()
     try_to_wake_up()
      ttwu_queue()
       ttwu_activate()
        activate_task()
         enqueue_task()
          enqueue_task_fair()
           cfs_rq_of()
            task_of()
             WARN_ON_ONCE(!entity_is_task(se))
              vprintk_emit()
               console_trylock()
                down_trylock()
                 raw_spin_lock_irqsave(&sem->lock, flags)
                 ^^^^ deadlock

and some other cases.

Just like in NMI implementation, the solution uses a per-cpu
`printk_func' pointer to 'redirect' printk() calls to a 'safe'
callback, that store messages in a per-cpu buffer and flushes
them back to logbuf buffer later.

Usage example:

 printk()
  printk_safe_enter_irqsave(flags)
  //
  //  any printk() call from here will endup in vprintk_safe(),
  //  that stores messages in a special per-CPU buffer.
  //
  printk_safe_exit_irqrestore(flags)

The 'redirection' mechanism, though, has been reworked, as suggested
by Petr Mladek. Instead of using a per-cpu @print_func callback we now
keep a per-cpu printk-context variable and call either default or nmi
vprintk function depending on its value. printk_nmi_entrer/exit and
printk_safe_enter/exit, thus, just set/celar corresponding bits in
printk-context functions.

The patch only adds printk_safe support, we don't use it yet.

Link: http://lkml.kernel.org/r/20161227141611.940-4-sergey.senozhatsky@gmail.com
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Calvin Owens <calvinowens@fb.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/printk.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 37e933eeffb2..571257e0f53d 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { }
 #endif
 
 #ifdef CONFIG_PRINTK_NMI
-extern void printk_safe_init(void);
 extern void printk_nmi_enter(void);
 extern void printk_nmi_exit(void);
-extern void printk_safe_flush(void);
-extern void printk_safe_flush_on_panic(void);
 #else
-static inline void printk_safe_init(void) { }
 static inline void printk_nmi_enter(void) { }
 static inline void printk_nmi_exit(void) { }
-static inline void printk_safe_flush(void) { }
-static inline void printk_safe_flush_on_panic(void) { }
 #endif /* PRINTK_NMI */
 
 #ifdef CONFIG_PRINTK
@@ -209,6 +203,9 @@ void __init setup_log_buf(int early);
 __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
 void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
+extern void printk_safe_init(void);
+extern void printk_safe_flush(void);
+extern void printk_safe_flush_on_panic(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl)
 static inline void show_regs_print_info(const char *log_lvl)
 {
 }
+
+static inline void printk_safe_init(void)
+{
+}
+
+static inline void printk_safe_flush(void)
+{
+}
+
+static inline void printk_safe_flush_on_panic(void)
+{
+}
 #endif
 
 extern asmlinkage void dump_stack(void) __cold;
-- 
cgit v1.2.3


From d3e1b617ae20c459627f501b4bc55b1ea91f662b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 2 Feb 2017 17:41:28 -0800
Subject: i2c: allow specify device properties in i2c_board_info

With many drivers converting to using generic device properties, it is
useful to provide array of device properties when instantiating new i2c
client via i2c_board_info and have them automatically added to the device
in question.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/i2c.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4b45ec46161f..7b23a3316dcb 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -51,6 +51,7 @@ enum i2c_slave_event;
 typedef int (*i2c_slave_cb_t)(struct i2c_client *, enum i2c_slave_event, u8 *);
 
 struct module;
+struct property_entry;
 
 #if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
 /*
@@ -299,6 +300,7 @@ static inline int i2c_slave_event(struct i2c_client *client,
  * @archdata: copied into i2c_client.dev.archdata
  * @of_node: pointer to OpenFirmware device node
  * @fwnode: device node supplied by the platform firmware
+ * @properties: additional device properties for the device
  * @irq: stored in i2c_client.irq
  *
  * I2C doesn't actually support hardware probing, although controllers and
@@ -320,6 +322,7 @@ struct i2c_board_info {
 	struct dev_archdata	*archdata;
 	struct device_node *of_node;
 	struct fwnode_handle *fwnode;
+	const struct property_entry *properties;
 	int		irq;
 };
 
-- 
cgit v1.2.3


From 4f46de9d2eda184b0e49e32bb2a92c6a06b336f5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 20 Dec 2016 15:50:14 +0000
Subject: irqchip/gic-v3-its: Drop deprecated GITS_BASER_TYPE_CPU

During the development of the GICv3/v4 architecture, it was
envisaged to have a CPU table, though the use for it was
never completely clear (the collection table serves that role
pretty well). It ended being dropped before the specification
was published, though it lived on in the driver.

In order to avoid people scratching their head too much, let's do
the same in the kernel.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index e808f8ae6f14..b6e7629f8bb0 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -306,7 +306,7 @@
 #define GITS_BASER_TYPE_NONE		0
 #define GITS_BASER_TYPE_DEVICE		1
 #define GITS_BASER_TYPE_VCPU		2
-#define GITS_BASER_TYPE_CPU		3
+#define GITS_BASER_TYPE_RESERVED3	3
 #define GITS_BASER_TYPE_COLLECTION	4
 #define GITS_BASER_TYPE_RESERVED5	5
 #define GITS_BASER_TYPE_RESERVED6	6
-- 
cgit v1.2.3


From 6a25ad3a9f9832f24df7987227122b8359f05c8e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 20 Dec 2016 15:52:26 +0000
Subject: irqchip/gic-v3-its: Rename MAPVI to MAPTI

Back in the days when the GICv3/v4 architecture was drafted,
the command to an event to an LPI number was called MAPVI.
Later on, and to avoid confusion with the GICv4 command VMAPI,
it was renamed MAPTI. We've carried the old name for a long
time, but it gets in the way of people reading the code in
the light of the public architecture specification.

Just repaint all the references and kill the old definition.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b6e7629f8bb0..400ed7ad2bff 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -320,8 +320,6 @@
 #define GITS_CMD_MAPD			0x08
 #define GITS_CMD_MAPC			0x09
 #define GITS_CMD_MAPTI			0x0a
-/* older GIC documentation used MAPVI for this command */
-#define GITS_CMD_MAPVI			GITS_CMD_MAPTI
 #define GITS_CMD_MAPI			0x0b
 #define GITS_CMD_MOVI			0x01
 #define GITS_CMD_DISCARD		0x0f
-- 
cgit v1.2.3


From e3c484b183d3bbef0f0df527e10359163f0bb1db Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Wed, 4 Jan 2017 13:54:20 +0530
Subject: irqchip/gic-v3: Remove duplicate definition of GICD_TYPER_LPIS

GICD_TYPER_LPIS macro is defined twice in this file. This patch removes the
duplicate entry.

Fixes: f5c1434c217f ("irqchip: GICv3: rework redistributor structure")
Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 400ed7ad2bff..725e86b506f3 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -73,7 +73,6 @@
 
 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
 #define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
-#define GICD_TYPER_LPIS			(1U << 17)
 
 #define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
 #define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
-- 
cgit v1.2.3


From 3046ec674d441562c6bb3e4284cd866743042ef3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 8 Feb 2017 14:54:12 +0000
Subject: ARM: smccc: Update HVC comment to describe new quirk parameter

Commit 680a0873e193 ("arm: kernel: Add SMC structure parameter") added
a new "quirk" parameter to the SMC and HVC SMCCC backends, but only
updated the comment for the SMC version. This patch adds the new
paramater to the comment describing the HVC version too.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/arm-smccc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index b67934164401..4c5bca38c653 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -109,6 +109,7 @@ asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
  * __arm_smccc_hvc() - make HVC calls
  * @a0-a7: arguments passed in registers 0 to 7
  * @res: result values from registers 0 to 3
+ * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required.
  *
  * This function is used to make HVC calls following SMC Calling
  * Convention.  The content of the supplied param are copied to registers 0
-- 
cgit v1.2.3


From 217e6fa24ce28ec87fca8da93c9016cb78028612 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 7 Feb 2017 15:57:20 -0500
Subject: net: introduce device min_header_len

The stack must not pass packets to device drivers that are shorter
than the minimum link layer header length.

Previously, packet sockets would drop packets smaller than or equal
to dev->hard_header_len, but this has false positives. Zero length
payload is used over Ethernet. Other link layer protocols support
variable length headers. Support for validation of these protocols
removed the min length check for all protocols.

Introduce an explicit dev->min_header_len parameter and drop all
packets below this value. Initially, set it to non-zero only for
Ethernet and loopback. Other protocols can follow in a patch to
net-next.

Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 70ad0291d517..27914672602d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1511,6 +1511,7 @@ enum netdev_priv_flags {
  *	@max_mtu:	Interface Maximum MTU value
  *	@type:		Interface hardware type
  *	@hard_header_len: Maximum hardware header length.
+ *	@min_header_len:  Minimum hardware header length
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -1728,6 +1729,7 @@ struct net_device {
 	unsigned int		max_mtu;
 	unsigned short		type;
 	unsigned short		hard_header_len;
+	unsigned short		min_header_len;
 
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
@@ -2694,6 +2696,8 @@ static inline bool dev_validate_header(const struct net_device *dev,
 {
 	if (likely(len >= dev->hard_header_len))
 		return true;
+	if (len < dev->min_header_len)
+		return false;
 
 	if (capable(CAP_SYS_RAWIO)) {
 		memset(ll_header + len, 0, dev->hard_header_len - len);
-- 
cgit v1.2.3


From be5e5099183301fb7920f8f6b66bd3ac1f820a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Mon, 16 Jan 2017 17:28:18 +0100
Subject: mtd: bcm47xxsflash: use platform_(set|get)_drvdata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have generic place & helpers for storing platform driver data so
there is no reason for using custom priv pointer.

This allows cleaning up struct bcma_sflash from unneeded fields.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Kalle Valo <kvalo@codeaurora.org>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index b20e3d56253f..2f1c690a3e66 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -593,9 +593,6 @@ struct bcma_sflash {
 	u32 blocksize;
 	u16 numblocks;
 	u32 size;
-
-	struct mtd_info *mtd;
-	void *priv;
 };
 #endif
 
-- 
cgit v1.2.3


From 21bdbb7102edeaebb5ec4ef530c8f442f7562c96 Mon Sep 17 00:00:00 2001
From: Neil Leeder <nleeder@codeaurora.org>
Date: Tue, 7 Feb 2017 13:14:04 -0500
Subject: perf: add qcom l2 cache perf events driver

Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache_0' and can be used
with perf events to profile L2 events such as cache hits
and misses on Qualcomm Technologies processors.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Neil Leeder <nleeder@codeaurora.org>
[will: minimise nesting in l2_cache_associate_cpu_with_cluster]
[will: use kstrtoul for unsigned long, remove redunant .owner setting]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 20bfefbe7594..1b7b2075b9cd 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -138,6 +138,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
-- 
cgit v1.2.3


From cbaf58032efca401834518b905f528ac912449e4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:15 -0500
Subject: svcrdma: Another sendto chunk list parsing update

Commit 5fdca6531434 ("svcrdma: Renovate sendto chunk list parsing")
missed a spot. svc_rdma_xdr_get_reply_hdr_len() also assumes the
Write list has only one Write chunk. There's no harm in making this
code more general.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/rpc_rdma.h | 9 +++++++++
 include/linux/sunrpc/svc_rdma.h | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index cfda6adcf33c..245fc59b7324 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -109,6 +109,15 @@ struct rpcrdma_msg {
 	} rm_body;
 };
 
+/*
+ * XDR sizes, in quads
+ */
+enum {
+	rpcrdma_fixed_maxsz	= 4,
+	rpcrdma_segment_maxsz	= 4,
+	rpcrdma_readchunk_maxsz	= 2 + rpcrdma_segment_maxsz,
+};
+
 /*
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 757fb963696c..551c51816352 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -218,7 +218,7 @@ extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
 					     struct rpcrdma_msg *,
 					     struct rpcrdma_msg *,
 					     enum rpcrdma_proc);
-extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
+extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
-- 
cgit v1.2.3


From 98fc21d3bfd55a36ce9eb7b32d1ce146f0d1696d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:23 -0500
Subject: svcrdma: Clean up RPC-over-RDMA Reply header encoder

Replace C structure-based XDR decoding with pointer arithmetic.
Pointer arithmetic is considered more portable, and is used
throughout the kernel's existing XDR encoders. The gcc optimizer
generates similar assembler code either way.

Byte-swapping before a memory store on x86 typically results in an
instruction pipeline stall. Avoid byte-swapping when encoding a new
header.

svcrdma currently doesn't alter a connection's credit grant value
after the connection has been accepted, so it is effectively a
constant. Cache the byte-swapped value in a separate field.

Christoph suggested pulling the header encoding logic into the only
function that uses it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 551c51816352..25ecf92cae96 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -141,7 +141,8 @@ struct svcxprt_rdma {
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
 	unsigned int	     sc_rq_depth;	/* Depth of RQ */
-	u32		     sc_max_requests;	/* Forward credits */
+	__be32		     sc_fc_credits;	/* Forward credits */
+	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
 	int                  sc_max_req_size;	/* Size of each RQ WR buf */
 
@@ -214,10 +215,6 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
 					    __be32, __be64, u32);
-extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
-					     struct rpcrdma_msg *,
-					     struct rpcrdma_msg *,
-					     enum rpcrdma_proc);
 extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
-- 
cgit v1.2.3


From aba7d14ba18c93a2ab37d50b057a885964ef285c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:48 -0500
Subject: svcrdma: Remove unused sc_dto_q field

Clean up. Commit be99bb11400c ("svcrdma: Use new CQ API for
RPC-over-RDMA server send CQs") removed code that used the sc_dto_q
field, but neglected to remove sc_dto_q at the same time.

Fixes: be99bb11400c ("svcrdma: Use new CQ API for RPC-over- ...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 25ecf92cae96..f77a7bc1612c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,7 +172,6 @@ struct svcxprt_rdma {
 
 	wait_queue_head_t    sc_send_wait;	/* SQ exhaustion waitlist */
 	unsigned long	     sc_flags;
-	struct list_head     sc_dto_q;		/* DTO tasklet I/O pending Q */
 	struct list_head     sc_read_complete_q;
 	struct work_struct   sc_work;
 };
-- 
cgit v1.2.3


From a3ab867fa64f9aedb3b01d570db5b43d2fc355fc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:56 -0500
Subject: svcrdma: Combine list fields in struct svc_rdma_op_ctxt

Clean up: The free list and the dto_q list fields are never used at
the same time. Reduce the size of struct svc_rdma_op_ctxt by
combining these fields.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f77a7bc1612c..b105f73e3ca2 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
-	struct list_head free;
+	struct list_head list;
 	struct svc_rdma_op_ctxt *read_hdr;
 	struct svc_rdma_fastreg_mr *frmr;
 	int hdr_count;
@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
 	struct ib_cqe cqe;
 	struct ib_cqe reg_cqe;
 	struct ib_cqe inv_cqe;
-	struct list_head dto_q;
 	u32 byte_len;
 	u32 position;
 	struct svcxprt_rdma *xprt;
-- 
cgit v1.2.3


From 6a2cac549b368960c9cd6a993f2f2cc6d720e935 Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 8 Feb 2017 09:31:07 +0100
Subject: net: stmmac: Remove the bus_setup function pointer

The bus_setup function pointer is not used at all, this patch remove it.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index d76033d6726d..fc273e9d5f67 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -134,7 +134,6 @@ struct plat_stmmacenet_data {
 	int tx_fifo_size;
 	int rx_fifo_size;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
-	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	void *bsp_priv;
-- 
cgit v1.2.3


From 34fe7c05400663e01e23cddd1fea68bb7a2b3d29 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Feb 2017 14:46:48 +0100
Subject: block: enumify ELEVATOR_*_MERGE

Switch these constants to an enum, and make let the compiler ensure that
all callers of blk_try_merge and elv_merge handle all potential values.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/elevator.h | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b5825c4f06f7..b38b4e651ea6 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -9,12 +9,21 @@
 struct io_cq;
 struct elevator_type;
 
-typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
+/*
+ * Return values from elevator merger
+ */
+enum elv_merge {
+	ELEVATOR_NO_MERGE	= 0,
+	ELEVATOR_FRONT_MERGE	= 1,
+	ELEVATOR_BACK_MERGE	= 2,
+};
+
+typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **,
 				 struct bio *);
 
 typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
 
-typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);
+typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge);
 
 typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
 					   struct request *, struct bio *);
@@ -87,7 +96,7 @@ struct elevator_mq_ops {
 	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
 	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
-	void (*request_merged)(struct request_queue *, struct request *, int);
+	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
 	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
 	void (*put_request)(struct request *);
@@ -166,10 +175,12 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *);
 extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
 extern void elv_add_request(struct request_queue *, struct request *, int);
 extern void __elv_add_request(struct request_queue *, struct request *, int);
-extern int elv_merge(struct request_queue *, struct request **, struct bio *);
+extern enum elv_merge elv_merge(struct request_queue *, struct request **,
+		struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
-extern void elv_merged_request(struct request_queue *, struct request *, int);
+extern void elv_merged_request(struct request_queue *, struct request *,
+		enum elv_merge);
 extern void elv_bio_merged(struct request_queue *q, struct request *,
 				struct bio *);
 extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
@@ -218,13 +229,6 @@ extern void elv_rb_add(struct rb_root *, struct request *);
 extern void elv_rb_del(struct rb_root *, struct request *);
 extern struct request *elv_rb_find(struct rb_root *, sector_t);
 
-/*
- * Return values from elevator merger
- */
-#define ELEVATOR_NO_MERGE	0
-#define ELEVATOR_FRONT_MERGE	1
-#define ELEVATOR_BACK_MERGE	2
-
 /*
  * Insertion selection
  */
-- 
cgit v1.2.3


From 1e739730c5b9ea80a2f25e9cf6e1025d47e3d8ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Feb 2017 14:46:49 +0100
Subject: block: optionally merge discontiguous discard bios into a single
 request

Add a new merge strategy that merges discard bios into a request until the
maximum number of discard ranges (or the maximum discard size) is reached
from the plug merging code.  I/O scheduler merging is not wired up yet
but might also be useful, although not for fast devices like NVMe which
are the only user for now.

Note that for now we don't support limiting the size of each discard range,
but if needed that can be added later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h   | 26 ++++++++++++++++++++++++++
 include/linux/elevator.h |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e0bac14347e6..aecca0e7d9ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -331,6 +331,7 @@ struct queue_limits {
 	unsigned short		logical_block_size;
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
+	unsigned short		max_discard_segments;
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
@@ -1146,6 +1147,8 @@ extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
+extern void blk_queue_max_discard_segments(struct request_queue *,
+		unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
@@ -1189,6 +1192,15 @@ extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 
+/*
+ * Number of physical segments as sent to the device.
+ *
+ * Normally this is the number of discontiguous data segments sent by the
+ * submitter.  But for data-less command like discard we might have no
+ * actual data segments submitted, but the driver might have to add it's
+ * own special payload.  In that case we still return 1 here so that this
+ * special payload will be mapped.
+ */
 static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
 {
 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
@@ -1196,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
 	return rq->nr_phys_segments;
 }
 
+/*
+ * Number of discard segments (or ranges) the driver needs to fill in.
+ * Each discard bio merged into a request is counted as one segment.
+ */
+static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
+{
+	return max_t(unsigned short, rq->nr_phys_segments, 1);
+}
+
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
@@ -1384,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q)
 	return q->limits.max_segments;
 }
 
+static inline unsigned short queue_max_discard_segments(struct request_queue *q)
+{
+	return q->limits.max_discard_segments;
+}
+
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
 	return q->limits.max_segment_size;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b38b4e651ea6..8265b6330cc2 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -16,6 +16,7 @@ enum elv_merge {
 	ELEVATOR_NO_MERGE	= 0,
 	ELEVATOR_FRONT_MERGE	= 1,
 	ELEVATOR_BACK_MERGE	= 2,
+	ELEVATOR_DISCARD_MERGE	= 3,
 };
 
 typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **,
-- 
cgit v1.2.3


From b35ba01ea6979125e9c23fb322517748278f15e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Feb 2017 14:46:50 +0100
Subject: nvme: support ranged discard requests

NVMe supports up to 256 ranges per DSM command, so wire up support
for ranged discards up to that limit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3d1c6f1b15c9..3e2ed49c3ad8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -553,6 +553,8 @@ enum {
 	NVME_DSMGMT_AD		= 1 << 2,
 };
 
+#define NVME_DSM_MAX_RANGES	256
+
 struct nvme_dsm_range {
 	__le32			cattr;
 	__le32			nlb;
-- 
cgit v1.2.3


From d6fc8821c2d2aba4cc18447a467f543e46e7367d Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 8 Feb 2017 09:54:42 +0800
Subject: SUNRPC/Cache: Always treat the invalid cache as unexpired

When the first time pynfs runs after rpc/nfsd startup, always get the warning,

"Got error: Connection closed"

I found the problem is caused by,
1. A new startup of nfsd, rpc.mountd, etc,
2. A rpc request from client (pynfs test, or normal mounting),
3. An ip_map cache is created but invalid, so upcall to rpc.mountd,
4. rpc.mountd process the ip_map upcall, before write the valid data to nfsd,
   do auth_reload(), and check_useipaddr(),
5. For the first time, old_use_ipaddr = -1, it causes rpc.mountd do write_flush    that doing cache_clean,
6. The ip_map cache will be treat as expired and clean,
7. When rpc.mountd write the valid data to nfsd, a new ip_map is created
   and updated, the cache_check of old ip_map(doing the upcall) will
   return -ETIMEDOUT.
8. RPC layer return SVC_CLOSE and close the xprt after commit 4d712ef1db05
   "svcauth_gss: Close connection when dropping an incoming message"

NeilBrown suggest in another email,

"If CACHE_VALID is not set, then there is no data in the cache item,
 so there is nothing to expire. So it would be nice if cache items that
 don't have CACHE_VALID are never treated as expired."

v3, change the order of the two patches
v2, change the checking of CACHE_PENDING to CACHE_VALID

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 9dcf2c8fe1c5..70738504d647 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 	kref_put(&h->ref, cd->cache_put);
 }
 
-static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
+	if (!test_bit(CACHE_VALID, &h->flags))
+		return false;
+
 	return  (h->expiry_time < seconds_since_boot()) ||
 		(detail->flush_time >= h->last_refresh);
 }
-- 
cgit v1.2.3


From 6080ef6e7c0a0592cbcca11200d879faf65e27d4 Mon Sep 17 00:00:00 2001
From: Jeff Westfahl <jeff.westfahl@ni.com>
Date: Tue, 10 Jan 2017 13:30:17 -0600
Subject: mtd: introduce function max_bad_blocks

If implemented, 'max_bad_blocks' returns the maximum number of bad
blocks to reserve for a MTD. An implementation for NAND is coming soon.

Signed-off-by: Jeff Westfahl <jeff.westfahl@ni.com>
Signed-off-by: Zach Brown <zach.brown@ni.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electron.com>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/mtd.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 13f8052b9ff9..5bb42c6dacdc 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -322,6 +322,7 @@ struct mtd_info {
 	int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs);
+	int (*_max_bad_blocks) (struct mtd_info *mtd, loff_t ofs, size_t len);
 	int (*_suspend) (struct mtd_info *mtd);
 	void (*_resume) (struct mtd_info *mtd);
 	void (*_reboot) (struct mtd_info *mtd);
@@ -397,6 +398,18 @@ static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
 	return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize;
 }
 
+static inline int mtd_max_bad_blocks(struct mtd_info *mtd,
+				     loff_t ofs, size_t len)
+{
+	if (!mtd->_max_bad_blocks)
+		return -ENOTSUPP;
+
+	if (mtd->size < (len + ofs) || ofs < 0)
+		return -EINVAL;
+
+	return mtd->_max_bad_blocks(mtd, ofs, len);
+}
+
 int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit,
 			      struct mtd_pairing_info *info);
 int mtd_pairing_info_to_wunit(struct mtd_info *mtd,
-- 
cgit v1.2.3


From 863d7d9c2e0cbbde6cd15f87c4431dd806f2d917 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:47:16 +0800
Subject: sunrpc/nfs: cleanup procfs/pipefs entry in cache_detail

Record flush/channel/content entries is useless, remove them.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/cache.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..bb5c9c80f12e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -63,15 +63,6 @@ struct cache_head {
 
 #define	CACHE_NEW_EXPIRY 120	/* keep new things pending confirmation for 120 seconds */
 
-struct cache_detail_procfs {
-	struct proc_dir_entry	*proc_ent;
-	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
-};
-
-struct cache_detail_pipefs {
-	struct dentry *dir;
-};
-
 struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
@@ -123,9 +114,9 @@ struct cache_detail {
 	time_t			last_warn;		/* when we last warned about no readers */
 
 	union {
-		struct cache_detail_procfs procfs;
-		struct cache_detail_pipefs pipefs;
-	} u;
+		struct proc_dir_entry	*procfs;
+		struct dentry		*pipefs;
+	};
 	struct net		*net;
 };
 
-- 
cgit v1.2.3


From 5786461bd8ea81433d81297215ee814a9a33ce7a Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:48:11 +0800
Subject: sunrpc: rename NFS_NGROUPS to UNX_NGROUPS for auth unix

NFS_NGROUPS has been move to sunrpc, rename to UNX_NGROUPS.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b1bc62ba20a2..39c85fb1f0ed 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -32,6 +32,7 @@
  */
 #define UNX_MAXNODENAME	__NEW_UTS_LEN
 #define UNX_CALLSLACK	(21 + XDR_QUADLEN(UNX_MAXNODENAME))
+#define UNX_NGROUPS	16
 
 struct rpcsec_gss_info;
 
-- 
cgit v1.2.3


From af4926e5619f8a33463f7202f27ba091893ed2ad Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:48:49 +0800
Subject: sunrpc: remove dead codes of cr_magic in rpc_cred

Don't found any place using the cr_magic.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 39c85fb1f0ed..8fd3504946ad 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -64,9 +64,6 @@ struct rpc_cred {
 	struct rcu_head		cr_rcu;
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	unsigned long		cr_magic;	/* 0x0f4aa4f0 */
-#endif
 	unsigned long		cr_expire;	/* when to gc */
 	unsigned long		cr_flags;	/* various flags */
 	atomic_t		cr_count;	/* ref count */
@@ -80,8 +77,6 @@ struct rpc_cred {
 #define RPCAUTH_CRED_HASHED	2
 #define RPCAUTH_CRED_NEGATIVE	3
 
-#define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
-
 /* rpc_auth au_flags */
 #define RPCAUTH_AUTH_NO_CRKEY_TIMEOUT	0x0001 /* underlying cred has no key timeout */
 
-- 
cgit v1.2.3


From ceb374eb06848212e47fb884964a3ae5e79615b5 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@ni.com>
Date: Tue, 10 Jan 2017 13:30:19 -0600
Subject: mtd: nand: Add max_bb_per_die and blocks_per_die fields to nand_chip

The fields max_bb_per_die and blocks_per_die are useful determining the
number of bad blocks a MTD needs to allocate. How they are set will
depend on if the chip is ONFI, JEDEC or a full-id entry in the nand_ids
table.

Signed-off-by: Zach Brown <zach.brown@ni.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electron.com>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c5f3a012ae62..4e1b441c3a7e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -801,6 +801,9 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *			supported, 0 otherwise.
  * @jedec_params:	[INTERN] holds the JEDEC parameter page when JEDEC is
  *			supported, 0 otherwise.
+ * @max_bb_per_die:	[INTERN] the max number of bad blocks each die of a
+ *			this nand device will encounter their life times.
+ * @blocks_per_die:	[INTERN] The number of PEBs in a die
  * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
@@ -883,6 +886,8 @@ struct nand_chip {
 		struct nand_onfi_params	onfi_params;
 		struct nand_jedec_params jedec_params;
 	};
+	u16 max_bb_per_die;
+	u32 blocks_per_die;
 
 	struct nand_data_interface *data_interface;
 
-- 
cgit v1.2.3


From 0911d0041c22922228ca52a977d7b0b0159fee4b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Feb 2017 14:30:53 -0800
Subject: mm: avoid returning VM_FAULT_RETRY from ->page_mkwrite handlers

Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return
code (GFS2 or Lustre can definitely do this).  However VM_FAULT_RETRY
from ->page_mkwrite is completely unhandled by the mm code and results
in locking and writeably mapping the page which definitely is not what
the caller wanted.

Fix Lustre and block_page_mkwrite_ret() used by other filesystems
(notably GFS2) to return VM_FAULT_NOPAGE instead which results in
bailing out from the fault code, the CPU then retries the access, and we
fault again effectively doing what the handler wanted.

Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/buffer_head.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d67ab83823ad..79591c3660cc 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -243,12 +243,10 @@ static inline int block_page_mkwrite_return(int err)
 {
 	if (err == 0)
 		return VM_FAULT_LOCKED;
-	if (err == -EFAULT)
+	if (err == -EFAULT || err == -EAGAIN)
 		return VM_FAULT_NOPAGE;
 	if (err == -ENOMEM)
 		return VM_FAULT_OOM;
-	if (err == -EAGAIN)
-		return VM_FAULT_RETRY;
 	/* -ENOSPC, -EDQUOT, -EIO ... */
 	return VM_FAULT_SIGBUS;
 }
-- 
cgit v1.2.3


From 4d59b6ccf000862beed6fc0765d3209f98a8d8a2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2017 14:30:56 -0800
Subject: cpumask: use nr_cpumask_bits for parsing functions

Commit 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and
parsing functions") converted both cpumask printing and parsing
functions to use nr_cpu_ids instead of nr_cpumask_bits.  While this was
okay for the printing functions as it just picked one of the two output
formats that we were alternating between depending on a kernel config,
doing the same for parsing wasn't okay.

nr_cpumask_bits can be either nr_cpu_ids or NR_CPUS.  We can always use
nr_cpu_ids but that is a variable while NR_CPUS is a constant, so it can
be more efficient to use NR_CPUS when we can get away with it.
Converting the printing functions to nr_cpu_ids makes sense because it
affects how the masks get presented to userspace and doesn't break
anything; however, using nr_cpu_ids for parsing functions can
incorrectly leave the higher bits uninitialized while reading in these
masks from userland.  As all testing and comparison functions use
nr_cpumask_bits which can be larger than nr_cpu_ids, the parsed cpumasks
can erroneously yield false negative results.

This made the taskstats interface incorrectly return -EINVAL even when
the inputs were correct.

Fix it by restoring the parse functions to use nr_cpumask_bits instead
of nr_cpu_ids.

Link: http://lkml.kernel.org/r/20170206182442.GB31078@htj.duckdns.org
Fixes: 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Martin Steigerwald <martin.steigerwald@teamix.de>
Debugged-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: <stable@vger.kernel.org>	[4.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c717f5ea88cb..b3d2c1a89ac4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -560,7 +560,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
 static inline int cpumask_parse_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
-	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -575,7 +575,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
 	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
-				     nr_cpu_ids);
+				     nr_cpumask_bits);
 }
 
 /**
@@ -590,7 +590,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 	char *nl = strchr(buf, '\n');
 	unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
-	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -602,7 +602,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
  */
 static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
-	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
-- 
cgit v1.2.3


From 16d0bf1882109da84b4b90d7ba4fc7ba18db0d67 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 8 Feb 2017 19:37:55 +0100
Subject: PM / Domains: Provide dummy governors if CONFIG_PM_GENERIC_DOMAINS=n

This allows to compile-test drivers that refer to governors (always by
reference) when CONFIG_PM_GENERIC_DOMAINS=n.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 81ece61075df..5339ed5bd6f9 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -182,6 +182,9 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd)
 {
 	return -ENOTSUPP;
 }
+
+#define simple_qos_governor		(*(struct dev_power_governor *)(NULL))
+#define pm_domain_always_on_gov		(*(struct dev_power_governor *)(NULL))
 #endif
 
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
-- 
cgit v1.2.3


From 72f2ff0deb870145a5a2d24cd75b4f9936159a62 Mon Sep 17 00:00:00 2001
From: Dongdong Liu <liudongdong3@huawei.com>
Date: Fri, 3 Feb 2017 15:02:07 -0600
Subject: PCI: Disable MSI for HiSilicon Hip06/Hip07 Root Ports

The PCIe Root Port in Hip06/Hip07 SoCs advertises an MSI capability, but it
cannot generate MSIs.  It can transfer MSI/MSI-X from downstream devices,
but does not support MSI/MSI-X itself.

Add a quirk to prevent use of MSI/MSI-X by the Root Port.

[bhelgaas: changelog, sort vendor ID #define, drop device ID #define]
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Reviewed-by: Zhou Wang <wangzhou1@hisilicon.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73dda0edcb97..a4f77feecbb0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2516,6 +2516,8 @@
 #define PCI_DEVICE_ID_KORENIX_JETCARDF2	0x1700
 #define PCI_DEVICE_ID_KORENIX_JETCARDF3	0x17ff
 
+#define PCI_VENDOR_ID_HUAWEI         	0x19e5
+
 #define PCI_VENDOR_ID_NETRONOME		0x19ee
 #define PCI_DEVICE_ID_NETRONOME_NFP3200	0x3200
 #define PCI_DEVICE_ID_NETRONOME_NFP3240	0x3240
-- 
cgit v1.2.3


From e553f539f2af39db5e3b2c273cc1a22d34be49ad Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand@am.sony.com>
Date: Mon, 17 Oct 2016 12:17:43 -0700
Subject: of: make of_device_make_bus_id() static

of_device_make_bus_id() was changed to non-static by commit c66012253800
("of/device: Make of_device_make_bus_id() usable by other code") more than
6 years ago, but there are no users of it outside of platform.c.  Make the
function static again.

Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index cc7dd687a89d..5c2aeed17dd4 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -13,7 +13,6 @@ struct device;
 #ifdef CONFIG_OF
 extern const struct of_device_id *of_match_device(
 	const struct of_device_id *matches, const struct device *dev);
-extern void of_device_make_bus_id(struct device *dev);
 
 /**
  * of_driver_match_device - Tell if a driver's of_match_table matches a device.
-- 
cgit v1.2.3


From d23bb113952db88b03a71c9533e5a40e444e18d3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:52 -0500
Subject: SUNRPC: Remove unused function rpc_get_timeout()

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/clnt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 333ad11b3dd9..33f216edb434 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -182,7 +182,6 @@ int		rpc_protocol(struct rpc_clnt *);
 struct net *	rpc_net_ns(struct rpc_clnt *);
 size_t		rpc_max_payload(struct rpc_clnt *);
 size_t		rpc_max_bc_payload(struct rpc_clnt *);
-unsigned long	rpc_get_timeout(struct rpc_clnt *clnt);
 void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
-- 
cgit v1.2.3


From 7196dbb02ea05835b9ee56910ee82cb55422c7f1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:54 -0500
Subject: SUNRPC: Allow changing of the TCP timeout parameters on the fly

When the NFSv4 server tells us the lease period, we usually want
to adjust down the timeout parameters on the TCP connection to
ensure that we don't miss lease renewals due to a faulty connection.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h     | 4 ++++
 include/linux/sunrpc/xprtsock.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a5da60b24d83..eab1c749e192 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -137,6 +137,9 @@ struct rpc_xprt_ops {
 	void		(*release_request)(struct rpc_task *task);
 	void		(*close)(struct rpc_xprt *xprt);
 	void		(*destroy)(struct rpc_xprt *xprt);
+	void		(*set_connect_timeout)(struct rpc_xprt *xprt,
+					unsigned long connect_timeout,
+					unsigned long reconnect_timeout);
 	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 	int		(*enable_swap)(struct rpc_xprt *xprt);
 	void		(*disable_swap)(struct rpc_xprt *xprt);
@@ -221,6 +224,7 @@ struct rpc_xprt {
 	struct timer_list	timer;
 	unsigned long		last_used,
 				idle_timeout,
+				connect_timeout,
 				max_reconnect_timeout;
 
 	/*
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index bef3fb0abb8f..c9959d7e3579 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -55,6 +55,8 @@ struct sock_xprt {
 	size_t			rcvsize,
 				sndsize;
 
+	struct rpc_timeout	tcp_timeout;
+
 	/*
 	 * Saved socket callback addresses
 	 */
@@ -81,6 +83,7 @@ struct sock_xprt {
 
 #define XPRT_SOCK_CONNECTING	1U
 #define XPRT_SOCK_DATA_READY	(2)
+#define XPRT_SOCK_UPD_TIMEOUT	(3)
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 26ae102f2cfd0215daa57dc790aa3bfe534403a9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:55 -0500
Subject: NFSv4: Set the connection timeout to match the lease period

Set the timeout for TCP connections to be 1 lease period to ensure
that we don't lose our lease due to a faulty TCP connection.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/clnt.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 33f216edb434..6095ecba0dde 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -201,8 +201,9 @@ int		rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
 				struct rpc_xprt *,
 				void *),
 			void *data);
-void		rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
-			unsigned long timeo);
+void		rpc_set_connect_timeout(struct rpc_clnt *clnt,
+			unsigned long connect_timeout,
+			unsigned long reconnect_timeout);
 
 int		rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *,
 			struct rpc_xprt_switch *,
-- 
cgit v1.2.3


From 90858794c96028ec1294fda12488a19f3a2b1d4a Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 24 Jul 2016 20:51:44 -0400
Subject: module.h: remove extable.h include now users have migrated

With hopefully most/all users of module.h that were looking for
exception table functions moved over to the new extable.h header,
we can remove the back-compat include that let us transition
without introducing build regressions.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jessica Yu <jeyu@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/module.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..2e6df4c41c86 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -18,7 +18,6 @@
 #include <linux/moduleparam.h>
 #include <linux/jump_label.h>
 #include <linux/export.h>
-#include <linux/extable.h>	/* only as arch move module.h -> extable.h */
 #include <linux/rbtree_latch.h>
 
 #include <linux/percpu.h>
-- 
cgit v1.2.3


From 0764c604c8128f17fd740ff8b1701d0a1301eb7e Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Fri, 3 Feb 2017 11:29:26 -0600
Subject: PM / OPP: Expose _of_get_opp_desc_node as dev_pm_opp API

Rename _of_get_opp_desc_node to dev_pm_opp_of_get_opp_desc_node and add it
to include/linux/pm_opp.h to allow other drivers, such as platform OPP
and cpufreq drivers, to make use of it.

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_opp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 731d548657aa..a6685b3dde26 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -288,6 +288,7 @@ void dev_pm_opp_of_remove_table(struct device *dev);
 int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
+struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -311,6 +312,11 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct
 {
 	return -ENOTSUPP;
 }
+
+static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
+{
+	return NULL;
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
-- 
cgit v1.2.3


From 9faf1c0fd5a943e498dbc0c86ff42e965b347d08 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 9 Feb 2017 01:18:15 +0800
Subject: sctp: drop unnecessary __packed from some stream reconf structures

commit 85c727b59483 ("sctp: drop __packed from almost all SCTP structures")
has removed __packed from almost all SCTP structures. But there still are
three structures where it should be dropped.

This patch is to remove it from some stream reconf structures.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 2408c6877ca0..d74fca3f3141 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -721,7 +721,7 @@ struct sctp_infox {
 struct sctp_reconf_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	__u8 params[0];
-} __packed;
+};
 
 struct sctp_strreset_outreq {
 	sctp_paramhdr_t param_hdr;
@@ -729,12 +729,12 @@ struct sctp_strreset_outreq {
 	__u32 response_seq;
 	__u32 send_reset_at_tsn;
 	__u16 list_of_streams[0];
-} __packed;
+};
 
 struct sctp_strreset_inreq {
 	sctp_paramhdr_t param_hdr;
 	__u32 request_seq;
 	__u16 list_of_streams[0];
-} __packed;
+};
 
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From c56480a1e90261842f54f3a5a9ebc12d827f0c3e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 9 Feb 2017 01:18:17 +0800
Subject: sctp: add support for generating stream reconf ssn/tsn reset request
 chunk

This patch is to define SSN/TSN Reset Request Parameter described
in rfc6525 section 4.3.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d74fca3f3141..71c0d41d9a59 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -737,4 +737,9 @@ struct sctp_strreset_inreq {
 	__u16 list_of_streams[0];
 };
 
+struct sctp_strreset_tsnreq {
+	sctp_paramhdr_t param_hdr;
+	__u32 request_seq;
+};
+
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From 78098117f8bfad4f2104c3f7b6b69071af95a246 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 9 Feb 2017 01:18:19 +0800
Subject: sctp: add support for generating stream reconf add incoming/outgoing
 streams request chunk

This patch is to define Add Incoming/Outgoing Streams Request
Parameter described in rfc6525 section 4.5 and 4.6. They can
be in one same chunk trunk as rfc6525 section 3.1-7 describes,
so make them in one function.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 71c0d41d9a59..b055788de0cf 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -742,4 +742,11 @@ struct sctp_strreset_tsnreq {
 	__u32 request_seq;
 };
 
+struct sctp_strreset_addstrm {
+	sctp_paramhdr_t param_hdr;
+	__u32 request_seq;
+	__u16 number_of_streams;
+	__u16 reserved;
+};
+
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From 2fd260f03b6a365bad48522f3948463928f91c2c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 24 Jan 2017 19:35:56 +0200
Subject: PCI/AER: Remove unused .link_reset() callback

No hardware seems to actually call .link_reset(), and no driver implements
it as more than a nop stub.

Drop mentions of the callback from everywhere.  It's dropped from the
documentation as well, but the doc really needs to be updated to reflect
reality better (e.g., on PCIe, slot reset is the link reset).  This will be
done in a later patch.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index e2d1a124216a..2c0158b4dbed 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -678,9 +678,6 @@ struct pci_error_handlers {
 	/* MMIO has been re-enabled, but not DMA */
 	pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev);
 
-	/* PCI Express link has been reset */
-	pci_ers_result_t (*link_reset)(struct pci_dev *dev);
-
 	/* PCI slot has been reset */
 	pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From 42e9401bd1467d22c4dc4d2c637347b874e6a80b Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 9 Feb 2017 11:50:24 +0100
Subject: mtd: Add partition device node to mtd partition devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The user visible change here is that mtd partitions get an of_node link
in sysfs.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/partitions.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 70736e1e6c8f..06df1e06b6e0 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -41,6 +41,7 @@ struct mtd_partition {
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
+	struct device_node *of_node;
 };
 
 #define MTDPART_OFS_RETAIN	(-3)
-- 
cgit v1.2.3


From f405df5de3170c00e5c54f8b7cf4766044a032ba Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 18:06:19 +0100
Subject: refcount_t: Introduce a special purpose refcount type

Provide refcount_t, an atomic_t like primitive built just for
refcounting.

It provides saturation semantics such that overflow becomes impossible
and thereby 'spurious' use-after-free is avoided.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 294 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 294 insertions(+)
 create mode 100644 include/linux/refcount.h

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
new file mode 100644
index 000000000000..600aadf9cca4
--- /dev/null
+++ b/include/linux/refcount.h
@@ -0,0 +1,294 @@
+#ifndef _LINUX_REFCOUNT_H
+#define _LINUX_REFCOUNT_H
+
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_REFCOUNT
+#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
+#define __refcount_check	__must_check
+#else
+#define REFCOUNT_WARN(cond, str) (void)(cond)
+#define __refcount_check
+#endif
+
+typedef struct refcount_struct {
+	atomic_t refs;
+} refcount_t;
+
+#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
+
+static inline void refcount_set(refcount_t *r, unsigned int n)
+{
+	atomic_set(&r->refs, n);
+}
+
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+	return atomic_read(&r->refs);
+}
+
+static inline __refcount_check
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (!val)
+			return false;
+
+		if (unlikely(val == UINT_MAX))
+			return true;
+
+		new = val + i;
+		if (new < val)
+			new = UINT_MAX;
+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+	return true;
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+	REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_inc_not_zero(refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		new = val + 1;
+
+		if (!val)
+			return false;
+
+		if (unlikely(!new))
+			return true;
+
+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+	return true;
+}
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (unlikely(val == UINT_MAX))
+			return false;
+
+		new = val - i;
+		if (new > val) {
+			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			return false;
+		}
+
+		old = atomic_cmpxchg_release(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	return !new;
+}
+
+static inline __refcount_check
+bool refcount_dec_and_test(refcount_t *r)
+{
+	return refcount_sub_and_test(1, r);
+}
+
+/*
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+static inline
+void refcount_dec(refcount_t *r)
+{
+	REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+}
+
+/*
+ * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
+ * success thereof.
+ *
+ * Like all decrement operations, it provides release memory order and provides
+ * a control dependency.
+ *
+ * It can be used like a try-delete operator; this explicit case is provided
+ * and not cmpxchg in generic, because that would allow implementing unsafe
+ * operations.
+ */
+static inline __refcount_check
+bool refcount_dec_if_one(refcount_t *r)
+{
+	return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
+}
+
+/*
+ * No atomic_t counterpart, it decrements unless the value is 1, in which case
+ * it will return false.
+ *
+ * Was often done like: atomic_add_unless(&var, -1, 1)
+ */
+static inline __refcount_check
+bool refcount_dec_not_one(refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (unlikely(val == UINT_MAX))
+			return true;
+
+		if (val == 1)
+			return false;
+
+		new = val - 1;
+		if (new > val) {
+			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			return true;
+		}
+
+		old = atomic_cmpxchg_release(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	return true;
+}
+
+/*
+ * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
+ * to decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	mutex_lock(lock);
+	if (!refcount_dec_and_test(r)) {
+		mutex_unlock(lock);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock(lock);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock(lock);
+		return false;
+	}
+
+	return true;
+}
+
+#endif /* _LINUX_REFCOUNT_H */
-- 
cgit v1.2.3


From 10383aea2f445bce9b2a2b308def08134b438c8e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 14 Nov 2016 18:06:19 +0100
Subject: kref: Implement 'struct kref' using refcount_t

Use the refcount_t 'atomic' type to implement 'struct kref', this makes kref
more robust by bringing saturation semantics.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kref.h | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 9db9685fed84..f4156f88f557 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -15,17 +15,14 @@
 #ifndef _KREF_H_
 #define _KREF_H_
 
-#include <linux/bug.h>
-#include <linux/atomic.h>
-#include <linux/kernel.h>
-#include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/refcount.h>
 
 struct kref {
-	atomic_t refcount;
+	refcount_t refcount;
 };
 
-#define KREF_INIT(n)	{ .refcount = ATOMIC_INIT(n), }
+#define KREF_INIT(n)	{ .refcount = REFCOUNT_INIT(n), }
 
 /**
  * kref_init - initialize object.
@@ -33,12 +30,12 @@ struct kref {
  */
 static inline void kref_init(struct kref *kref)
 {
-	atomic_set(&kref->refcount, 1);
+	refcount_set(&kref->refcount, 1);
 }
 
-static inline int kref_read(const struct kref *kref)
+static inline unsigned int kref_read(const struct kref *kref)
 {
-	return atomic_read(&kref->refcount);
+	return refcount_read(&kref->refcount);
 }
 
 /**
@@ -47,11 +44,7 @@ static inline int kref_read(const struct kref *kref)
  */
 static inline void kref_get(struct kref *kref)
 {
-	/* If refcount was 0 before incrementing then we have a race
-	 * condition when this kref is freeing by some other thread right now.
-	 * In this case one should use kref_get_unless_zero()
-	 */
-	WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2);
+	refcount_inc(&kref->refcount);
 }
 
 /**
@@ -75,7 +68,7 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 {
 	WARN_ON(release == NULL);
 
-	if (atomic_dec_and_test(&kref->refcount)) {
+	if (refcount_dec_and_test(&kref->refcount)) {
 		release(kref);
 		return 1;
 	}
@@ -88,7 +81,7 @@ static inline int kref_put_mutex(struct kref *kref,
 {
 	WARN_ON(release == NULL);
 
-	if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) {
+	if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) {
 		release(kref);
 		return 1;
 	}
@@ -101,7 +94,7 @@ static inline int kref_put_lock(struct kref *kref,
 {
 	WARN_ON(release == NULL);
 
-	if (atomic_dec_and_lock(&kref->refcount, lock)) {
+	if (refcount_dec_and_lock(&kref->refcount, lock)) {
 		release(kref);
 		return 1;
 	}
@@ -126,6 +119,6 @@ static inline int kref_put_lock(struct kref *kref,
  */
 static inline int __must_check kref_get_unless_zero(struct kref *kref)
 {
-	return atomic_add_unless(&kref->refcount, 1, 0);
+	return refcount_inc_not_zero(&kref->refcount);
 }
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From 6ce77bfd6cedbff61eabf8837dc0901bb671cc86 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Thu, 26 Jan 2017 11:40:57 +0200
Subject: perf/core: Allow kernel filters on CPU events

While supporting file-based address filters for CPU events requires some
extra context switch handling, kernel address filters are easy, since the
kernel mapping is preserved across address spaces. It is also useful as
it permits tracing scheduling paths of the kernel.

This patch allows setting up kernel filters for CPU events.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Will Deacon <will.deacon@arm.com>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20170126094057.13805-4-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5c58e93c130c..000fdb211c7d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -482,6 +482,7 @@ struct perf_addr_filter {
  * @list:	list of filters for this event
  * @lock:	spinlock that serializes accesses to the @list and event's
  *		(and its children's) filter generations.
+ * @nr_file_filters:	number of file-based filters
  *
  * A child event will use parent's @list (and therefore @lock), so they are
  * bundled together; see perf_event_addr_filters().
@@ -489,6 +490,7 @@ struct perf_addr_filter {
 struct perf_addr_filters_head {
 	struct list_head	list;
 	raw_spinlock_t		lock;
+	unsigned int		nr_file_filters;
 };
 
 /**
-- 
cgit v1.2.3


From dfb4357da6ddbdf57d583ba64361c9d792b0e0b1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 8 Feb 2017 11:26:59 -0800
Subject: time: Remove CONFIG_TIMER_STATS

Currently CONFIG_TIMER_STATS exposes process information across namespaces:

kernel/time/timer_list.c print_timer():

        SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);

/proc/timer_list:

 #11: <0000000000000000>, hrtimer_wakeup, S:01, do_nanosleep, cron/2570

Given that the tracer can give the same information, this patch entirely
removes CONFIG_TIMER_STATS.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: linux-doc@vger.kernel.org
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Xing Gao <xgao01@email.wm.edu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jessica Frazelle <me@jessfraz.com>
Cc: kernel-hardening@lists.openwall.com
Cc: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-api@vger.kernel.org
Cc: Arjan van de Ven <arjan@linux.intel.com>
Link: http://lkml.kernel.org/r/20170208192659.GA32582@beast
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 11 -----------
 include/linux/timer.h   | 45 ---------------------------------------------
 2 files changed, 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cdab81ba29f8..e52b427223ba 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -88,12 +88,6 @@ enum hrtimer_restart {
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
  * @is_rel:	Set if the timer was armed relative
- * @start_pid:  timer statistics field to store the pid of the task which
- *		started the timer
- * @start_site:	timer statistics field to store the site where the timer
- *		was started
- * @start_comm: timer statistics field to store the name of the process which
- *		started the timer
  *
  * The hrtimer structure must be initialized by hrtimer_init()
  */
@@ -104,11 +98,6 @@ struct hrtimer {
 	struct hrtimer_clock_base	*base;
 	u8				state;
 	u8				is_rel;
-#ifdef CONFIG_TIMER_STATS
-	int				start_pid;
-	void				*start_site;
-	char				start_comm[16];
-#endif
 };
 
 /**
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 51d601f192d4..5a209b84fd9e 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -20,11 +20,6 @@ struct timer_list {
 	unsigned long		data;
 	u32			flags;
 
-#ifdef CONFIG_TIMER_STATS
-	int			start_pid;
-	void			*start_site;
-	char			start_comm[16];
-#endif
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -197,46 +192,6 @@ extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
  */
 #define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
 
-/*
- * Timer-statistics info:
- */
-#ifdef CONFIG_TIMER_STATS
-
-extern int timer_stats_active;
-
-extern void init_timer_stats(void);
-
-extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
-				     void *timerf, char *comm, u32 flags);
-
-extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
-					       void *addr);
-
-static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
-{
-	if (likely(!timer_stats_active))
-		return;
-	__timer_stats_timer_set_start_info(timer, __builtin_return_address(0));
-}
-
-static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
-{
-	timer->start_site = NULL;
-}
-#else
-static inline void init_timer_stats(void)
-{
-}
-
-static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
-{
-}
-
-static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
-{
-}
-#endif
-
 extern void add_timer(struct timer_list *timer);
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
-- 
cgit v1.2.3


From 534766dfef999f7e7349bbd91cd19c1673792af3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 31 Jan 2017 16:58:42 +0100
Subject: iommu: Rename iommu_get_instance()

Rename the function to iommu_ops_from_fwnode(), because that
is what the function actually does. The new name is much
more descriptive about what the function does.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h    | 4 ++--
 include/linux/of_iommu.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111f6959..085e1f0e6c07 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -354,7 +354,7 @@ void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
 void iommu_register_instance(struct fwnode_handle *fwnode,
 			     const struct iommu_ops *ops);
-const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode);
+const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 
 #else /* CONFIG_IOMMU_API */
 
@@ -590,7 +590,7 @@ static inline void iommu_register_instance(struct fwnode_handle *fwnode,
 }
 
 static inline
-const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode)
+const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 6a7fc5051099..66fcbc949899 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -39,7 +39,7 @@ static inline void of_iommu_set_ops(struct device_node *np,
 
 static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
-	return iommu_get_instance(&np->fwnode);
+	return iommu_ops_from_fwnode(&np->fwnode);
 }
 
 extern struct of_device_id __iommu_of_table;
-- 
cgit v1.2.3


From b0119e870837dcd15a207b4701542ebac5d19b45 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 13:23:08 +0100
Subject: iommu: Introduce new 'struct iommu_device'

This struct represents one hardware iommu in the iommu core
code. For now it only has the iommu-ops associated with it,
but that will be extended soon.

The register/unregister interface is also added, as well as
making use of it in the Intel and AMD IOMMU drivers.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h |  2 ++
 include/linux/iommu.h       | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d49e26c6cdc7..99a65a397861 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -29,6 +29,7 @@
 #include <linux/dma_remapping.h>
 #include <linux/mmu_notifier.h>
 #include <linux/list.h>
+#include <linux/iommu.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 
@@ -440,6 +441,7 @@ struct intel_iommu {
 	struct irq_domain *ir_msi_domain;
 #endif
 	struct device	*iommu_dev; /* IOMMU-sysfs device */
+	struct iommu_device iommu;  /* IOMMU core code handle */
 	int		node;
 	u32		flags;      /* Software defined flags */
 };
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 085e1f0e6c07..900ddd212364 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -204,6 +204,26 @@ struct iommu_ops {
 	unsigned long pgsize_bitmap;
 };
 
+/**
+ * struct iommu_device - IOMMU core representation of one IOMMU hardware
+ *			 instance
+ * @list: Used by the iommu-core to keep a list of registered iommus
+ * @ops: iommu-ops for talking to this iommu
+ */
+struct iommu_device {
+	struct list_head list;
+	const struct iommu_ops *ops;
+};
+
+int  iommu_device_register(struct iommu_device *iommu);
+void iommu_device_unregister(struct iommu_device *iommu);
+
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
+{
+	iommu->ops = ops;
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -361,6 +381,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 struct iommu_ops {};
 struct iommu_group {};
 struct iommu_fwspec {};
+struct iommu_device {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
@@ -558,6 +579,20 @@ static inline void iommu_device_destroy(struct device *dev)
 {
 }
 
+static inline int  iommu_device_register(struct iommu_device *iommu)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
+{
+}
+
+static inline void iommu_device_unregister(struct iommu_device *iommu)
+{
+}
+
 static inline int iommu_device_link(struct device *dev, struct device *link)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 39ab9555c24110671f8dc671311a26e5c985b592 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 16:56:46 +0100
Subject: iommu: Add sysfs bindings for struct iommu_device

There is currently support for iommu sysfs bindings, but
those need to be implemented in the IOMMU drivers. Add a
more generic version of this by adding a struct device to
struct iommu_device and use that for the sysfs bindings.

Also convert the AMD and Intel IOMMU driver to make use of
it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h |  1 -
 include/linux/iommu.h       | 33 ++++++++++++++++++---------------
 2 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 99a65a397861..3ba9b536387b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -440,7 +440,6 @@ struct intel_iommu {
 	struct irq_domain *ir_domain;
 	struct irq_domain *ir_msi_domain;
 #endif
-	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	struct iommu_device iommu;  /* IOMMU core code handle */
 	int		node;
 	u32		flags;      /* Software defined flags */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 900ddd212364..c578ca135bed 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -209,14 +209,21 @@ struct iommu_ops {
  *			 instance
  * @list: Used by the iommu-core to keep a list of registered iommus
  * @ops: iommu-ops for talking to this iommu
+ * @dev: struct device for sysfs handling
  */
 struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
+	struct device dev;
 };
 
 int  iommu_device_register(struct iommu_device *iommu);
 void iommu_device_unregister(struct iommu_device *iommu);
+int  iommu_device_sysfs_add(struct iommu_device *iommu,
+			    struct device *parent,
+			    const struct attribute_group **groups,
+			    const char *fmt, ...) __printf(4, 5);
+void iommu_device_sysfs_remove(struct iommu_device *iommu);
 
 static inline void iommu_device_set_ops(struct iommu_device *iommu,
 					const struct iommu_ops *ops)
@@ -287,10 +294,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-struct device *iommu_device_create(struct device *parent, void *drvdata,
-				   const struct attribute_group **groups,
-				   const char *fmt, ...) __printf(4, 5);
-void iommu_device_destroy(struct device *dev);
 int iommu_device_link(struct device *dev, struct device *link);
 void iommu_device_unlink(struct device *dev, struct device *link);
 
@@ -567,29 +570,29 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain,
 	return -EINVAL;
 }
 
-static inline struct device *iommu_device_create(struct device *parent,
-					void *drvdata,
-					const struct attribute_group **groups,
-					const char *fmt, ...)
+static inline int  iommu_device_register(struct iommu_device *iommu)
 {
-	return ERR_PTR(-ENODEV);
+	return -ENODEV;
 }
 
-static inline void iommu_device_destroy(struct device *dev)
+static inline void iommu_device_set_ops(struct iommu_device *iommu,
+					const struct iommu_ops *ops)
 {
 }
 
-static inline int  iommu_device_register(struct iommu_device *iommu)
+static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
-	return -ENODEV;
 }
 
-static inline void iommu_device_set_ops(struct iommu_device *iommu,
-					const struct iommu_ops *ops)
+static inline int  iommu_device_sysfs_add(struct iommu_device *iommu,
+					  struct device *parent,
+					  const struct attribute_group **groups,
+					  const char *fmt, ...)
 {
+	return -ENODEV;
 }
 
-static inline void iommu_device_unregister(struct iommu_device *iommu)
+static inline void iommu_device_sysfs_remove(struct iommu_device *iommu)
 {
 }
 
-- 
cgit v1.2.3


From e3d10af1128b6bc394f21656ff13753130f3c107 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 1 Feb 2017 17:23:22 +0100
Subject: iommu: Make iommu_device_link/unlink take a struct iommu_device

This makes the interface more consistent with
iommu_device_sysfs_add/remove.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c578ca135bed..bae3cfc8b4a3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -224,6 +224,8 @@ int  iommu_device_sysfs_add(struct iommu_device *iommu,
 			    const struct attribute_group **groups,
 			    const char *fmt, ...) __printf(4, 5);
 void iommu_device_sysfs_remove(struct iommu_device *iommu);
+int  iommu_device_link(struct iommu_device   *iommu, struct device *link);
+void iommu_device_unlink(struct iommu_device *iommu, struct device *link);
 
 static inline void iommu_device_set_ops(struct iommu_device *iommu,
 					const struct iommu_ops *ops)
@@ -294,8 +296,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-int iommu_device_link(struct device *dev, struct device *link);
-void iommu_device_unlink(struct device *dev, struct device *link);
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-- 
cgit v1.2.3


From c73e1ac8b2bc6ab18d9f9a96b17ee7388b49a0c0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 7 Feb 2017 18:18:46 +0100
Subject: iommu: Add iommu_device_set_fwnode() interface

Allow to store a fwnode in 'struct iommu_device';

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bae3cfc8b4a3..626c935edee1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -214,6 +214,7 @@ struct iommu_ops {
 struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
+	struct fwnode_handle *fwnode;
 	struct device dev;
 };
 
@@ -233,6 +234,12 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu,
 	iommu->ops = ops;
 }
 
+static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
+					   struct fwnode_handle *fwnode)
+{
+	iommu->fwnode = fwnode;
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -580,6 +587,11 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu,
 {
 }
 
+static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
+					   struct fwnode_handle *fwnode)
+{
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }
-- 
cgit v1.2.3


From e99ca98f1d7190c16601b00d0c96212d7c00577d Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Fri, 2 Dec 2016 12:31:44 +0100
Subject: mtd: spi-nor: Add support for S3AN spi-nor devices

Xilinx Spartan-3AN FPGAs contain an In-System Flash where they keep
their configuration data and (optionally) some user data.

The protocol of this flash follows most of the spi-nor standard. With
the following differences:

- Page size might not be a power of two.
- The address calculation (default addressing mode).
- The spi nor commands used.

Protocol is described on Xilinx User Guide UG333

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 include/linux/mtd/spi-nor.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c425c7b4c2a0..4950b2ef08c0 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -68,6 +68,15 @@
 #define SPINOR_OP_WRDI		0x04	/* Write disable */
 #define SPINOR_OP_AAI_WP	0xad	/* Auto address increment word program */
 
+/* Used for S3AN flashes only */
+#define SPINOR_OP_XSE		0x50	/* Sector erase */
+#define SPINOR_OP_XPP		0x82	/* Page program */
+#define SPINOR_OP_XRDSR		0xd7	/* Read status register */
+
+#define XSR_PAGESIZE		BIT(0)	/* Page size in Po2 or Linear */
+#define XSR_RDY			BIT(7)	/* Ready */
+
+
 /* Used for Macronix and Winbond flashes. */
 #define SPINOR_OP_EN4B		0xb7	/* Enter 4-byte mode */
 #define SPINOR_OP_EX4B		0xe9	/* Exit 4-byte mode */
@@ -119,6 +128,9 @@ enum spi_nor_ops {
 enum spi_nor_option_flags {
 	SNOR_F_USE_FSR		= BIT(0),
 	SNOR_F_HAS_SR_TB	= BIT(1),
+	SNOR_F_NO_OP_CHIP_ERASE	= BIT(2),
+	SNOR_F_S3AN_ADDR_DEFAULT = BIT(3),
+	SNOR_F_READY_XSR_RDY	= BIT(4),
 };
 
 /**
-- 
cgit v1.2.3


From 902cc69a0820252c84c6f7caed350882cea166ba Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Thu, 27 Oct 2016 11:55:39 +0200
Subject: mtd: spi-nor: rename SPINOR_OP_* macros of the 4-byte address op
 codes

This patch renames the SPINOR_OP_* macros of the 4-byte address
instruction set so the new names all share a common pattern: the 4-byte
address name is built from the 3-byte address name appending the "_4B"
suffix.

The patch also introduces new op codes to support other SPI protocols such
as SPI 1-4-4 and SPI 1-2-2.

This is a transitional patch and will help a later patch of spi-nor.c
to automate the translation from the 3-byte address op codes into their
4-byte address version.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
---
 include/linux/mtd/spi-nor.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 4950b2ef08c0..f2a718030476 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -43,9 +43,13 @@
 #define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
 #define SPINOR_OP_READ		0x03	/* Read data bytes (low frequency) */
 #define SPINOR_OP_READ_FAST	0x0b	/* Read data bytes (high frequency) */
-#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual Output SPI) */
+#define SPINOR_OP_READ_1_2_2	0xbb	/* Read data bytes (Dual I/O SPI) */
+#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad Output SPI) */
+#define SPINOR_OP_READ_1_4_4	0xeb	/* Read data bytes (Quad I/O SPI) */
 #define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_PP_1_1_4	0x32	/* Quad page program */
+#define SPINOR_OP_PP_1_4_4	0x38	/* Quad page program */
 #define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
 #define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
 #define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
@@ -56,11 +60,17 @@
 #define SPINOR_OP_RDFSR		0x70	/* Read flag status register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
-#define SPINOR_OP_READ4_FAST	0x0c	/* Read data bytes (high frequency) */
-#define SPINOR_OP_READ4_1_1_2	0x3c	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_READ4_1_1_4	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ_FAST_4B	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ_1_1_2_4B	0x3c	/* Read data bytes (Dual Output SPI) */
+#define SPINOR_OP_READ_1_2_2_4B	0xbc	/* Read data bytes (Dual I/O SPI) */
+#define SPINOR_OP_READ_1_1_4_4B	0x6c	/* Read data bytes (Quad Output SPI) */
+#define SPINOR_OP_READ_1_4_4_4B	0xec	/* Read data bytes (Quad I/O SPI) */
 #define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_PP_1_1_4_4B	0x34	/* Quad page program */
+#define SPINOR_OP_PP_1_4_4_4B	0x3e	/* Quad page program */
+#define SPINOR_OP_BE_4K_4B	0x21	/* Erase 4KiB block */
+#define SPINOR_OP_BE_32K_4B	0x5c	/* Erase 32KiB block */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
 /* Used for SST flashes only. */
-- 
cgit v1.2.3


From 2b5e77308f3356faad640b24af6dc5aa7233eb2d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 10 Feb 2017 13:23:23 +0100
Subject: irqdesc: Add a resource managed version of irq_alloc_descs()

Add a devres flavor of __devm_irq_alloc_descs() and corresponding
helper macros.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: Jonathan Corbet <corbet@lwn.net>
Link: http://lkml.kernel.org/r/1486729403-21132-1-git-send-email-bgolaszewski@baylibre.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e79875574b39..d915caecafa1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -715,6 +715,10 @@ unsigned int arch_dynirq_lower_bound(unsigned int from);
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 		      struct module *owner, const struct cpumask *affinity);
 
+int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
+			   unsigned int cnt, int node, struct module *owner,
+			   const struct cpumask *affinity);
+
 /* use macros to avoid needing export.h for THIS_MODULE */
 #define irq_alloc_descs(irq, from, cnt, node)	\
 	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL)
@@ -731,6 +735,21 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 #define irq_alloc_descs_from(from, cnt, node)	\
 	irq_alloc_descs(-1, from, cnt, node)
 
+#define devm_irq_alloc_descs(dev, irq, from, cnt, node)		\
+	__devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL)
+
+#define devm_irq_alloc_desc(dev, node)				\
+	devm_irq_alloc_descs(dev, -1, 0, 1, node)
+
+#define devm_irq_alloc_desc_at(dev, at, node)			\
+	devm_irq_alloc_descs(dev, at, at, 1, node)
+
+#define devm_irq_alloc_desc_from(dev, from, node)		\
+	devm_irq_alloc_descs(dev, -1, from, 1, node)
+
+#define devm_irq_alloc_descs_from(dev, from, cnt, node)		\
+	devm_irq_alloc_descs(dev, -1, from, cnt, node)
+
 void irq_free_descs(unsigned int irq, unsigned int cnt);
 static inline void irq_free_desc(unsigned int irq)
 {
-- 
cgit v1.2.3


From d0f6f5832603931b0a8da044fb9abe8289e201ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 2 Feb 2017 12:19:12 +0100
Subject: iommu: Remove iommu_register_instance interface

And also move its remaining functionality to
iommu_device_register() and 'struct iommu_device'.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h    | 7 -------
 include/linux/of_iommu.h | 6 ------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 626c935edee1..9e82fc83765e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -382,8 +382,6 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
 		      const struct iommu_ops *ops);
 void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
-void iommu_register_instance(struct fwnode_handle *fwnode,
-			     const struct iommu_ops *ops);
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
 
 #else /* CONFIG_IOMMU_API */
@@ -634,11 +632,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
 	return -ENODEV;
 }
 
-static inline void iommu_register_instance(struct fwnode_handle *fwnode,
-					   const struct iommu_ops *ops)
-{
-}
-
 static inline
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 66fcbc949899..fc4add39361a 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,12 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-static inline void of_iommu_set_ops(struct device_node *np,
-				    const struct iommu_ops *ops)
-{
-	iommu_register_instance(&np->fwnode, ops);
-}
-
 static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
 	return iommu_ops_from_fwnode(&np->fwnode);
-- 
cgit v1.2.3


From 0508ad1fff11a8b0acdf0333b5fe108d7bd5fce4 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 1 Feb 2017 10:53:41 -0600
Subject: drivers/fsi: Add empty fsi bus definitions

This change adds the initial (empty) fsi bus definition, and introduces
drivers/fsi/.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Chris Bostic <cbostic@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsi.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 include/linux/fsi.h

(limited to 'include/linux')

diff --git a/include/linux/fsi.h b/include/linux/fsi.h
new file mode 100644
index 000000000000..47aa181b6404
--- /dev/null
+++ b/include/linux/fsi.h
@@ -0,0 +1,22 @@
+/* FSI device & driver interfaces
+ *
+ * Copyright (C) IBM Corporation 2016
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef LINUX_FSI_H
+#define LINUX_FSI_H
+
+#include <linux/device.h>
+
+extern struct bus_type fsi_bus_type;
+
+#endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From fda07a6c94ac5c9bd73d7b0134c6cc6861375341 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 1 Feb 2017 10:53:42 -0600
Subject: drivers/fsi: Add device & driver definitions

Add structs for fsi devices & drivers, and struct device conversion
functions.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Chris Bostic <cbostic@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 47aa181b6404..f73886a5af01 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -17,6 +17,17 @@
 
 #include <linux/device.h>
 
+struct fsi_device {
+	struct device dev;
+};
+
+struct fsi_driver {
+	struct device_driver drv;
+};
+
+#define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev)
+#define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv)
+
 extern struct bus_type fsi_bus_type;
 
 #endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From dd37eed7db0f7607fa41887c11c1e428faa15d0c Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Wed, 1 Feb 2017 10:53:43 -0600
Subject: drivers/fsi: add driver to device matches

Driver bind to devices based on the engine types & (optional) versions.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Chris Bostic <cbostic@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fsi.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index f73886a5af01..273cbf6400ea 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -18,11 +18,28 @@
 #include <linux/device.h>
 
 struct fsi_device {
-	struct device dev;
+	struct device		dev;
+	u8			engine_type;
+	u8			version;
 };
 
+struct fsi_device_id {
+	u8	engine_type;
+	u8	version;
+};
+
+#define FSI_VERSION_ANY		0
+
+#define FSI_DEVICE(t) \
+	.engine_type = (t), .version = FSI_VERSION_ANY,
+
+#define FSI_DEVICE_VERSIONED(t, v) \
+	.engine_type = (t), .version = (v),
+
+
 struct fsi_driver {
-	struct device_driver drv;
+	struct device_driver		drv;
+	const struct fsi_device_id	*id_table;
 };
 
 #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev)
-- 
cgit v1.2.3


From baa6d396635129d8a67793e884f3b2182c7354b3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 1 Feb 2017 12:48:44 -0700
Subject: fpga: Add scatterlist based programming

Requiring contiguous kernel memory is not a good idea, this is a limited
resource and allocation can fail under normal work loads.

This introduces a .write_sg op that supporting drivers can provide
to DMA directly from dis-contiguous memory and a new entry point
fpga_mgr_buf_load_sg that users can call to directly provide page
lists.

The full matrix of compatibility is provided, either the linear or sg
interface can be used by the user with a driver supporting either
interface.

A notable change for drivers is that the .write op can now be called
multiple times.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Alan Tull <atull@opensource.altera.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 16551d5eac36..57beb5d09bfc 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -22,6 +22,7 @@
 #define _LINUX_FPGA_MGR_H
 
 struct fpga_manager;
+struct sg_table;
 
 /**
  * enum fpga_mgr_states - fpga framework states
@@ -88,6 +89,7 @@ struct fpga_image_info {
  * @state: returns an enum value of the FPGA's state
  * @write_init: prepare the FPGA to receive confuration data
  * @write: write count bytes of configuration data to the FPGA
+ * @write_sg: write the scatter list of configuration data to the FPGA
  * @write_complete: set FPGA to operating state after writing is done
  * @fpga_remove: optional: Set FPGA into a specific state during driver remove
  *
@@ -102,6 +104,7 @@ struct fpga_manager_ops {
 			  struct fpga_image_info *info,
 			  const char *buf, size_t count);
 	int (*write)(struct fpga_manager *mgr, const char *buf, size_t count);
+	int (*write_sg)(struct fpga_manager *mgr, struct sg_table *sgt);
 	int (*write_complete)(struct fpga_manager *mgr,
 			      struct fpga_image_info *info);
 	void (*fpga_remove)(struct fpga_manager *mgr);
@@ -129,6 +132,8 @@ struct fpga_manager {
 
 int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
 		      const char *buf, size_t count);
+int fpga_mgr_buf_load_sg(struct fpga_manager *mgr, struct fpga_image_info *info,
+			 struct sg_table *sgt);
 
 int fpga_mgr_firmware_load(struct fpga_manager *mgr,
 			   struct fpga_image_info *info,
-- 
cgit v1.2.3


From f6c4391553573d592212e6624cfba5e2752cd5c8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 5 Feb 2017 17:20:33 -0700
Subject: vmbus: remove no longer used signal_policy

The explicit signal policy is no longer used. A different mechanism
will be added later when xmit_more is supported.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c37a4a145036..7795966af0f9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -684,11 +684,6 @@ struct hv_input_signal_event_buffer {
 	struct hv_input_signal_event event;
 };
 
-enum hv_signal_policy {
-	HV_SIGNAL_POLICY_DEFAULT = 0,
-	HV_SIGNAL_POLICY_EXPLICIT,
-};
-
 enum hv_numa_policy {
 	HV_BALANCED = 0,
 	HV_LOCALIZED,
@@ -850,13 +845,6 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
-	/*
-	 * Host signaling policy: The default policy will be
-	 * based on the ring buffer state. We will also support
-	 * a policy where the client driver can have explicit
-	 * signaling control.
-	 */
-	enum hv_signal_policy  signal_policy;
 	/*
 	 * On the channel send side, many of the VMBUS
 	 * device drivers explicity serialize access to the
@@ -918,12 +906,6 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 		  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
-static inline void set_channel_signal_state(struct vmbus_channel *c,
-					    enum hv_signal_policy policy)
-{
-	c->signal_policy = policy;
-}
-
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
 					      enum hv_numa_policy policy)
 {
-- 
cgit v1.2.3


From 3454323c954775098871559b5c23d877c3e23f77 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 5 Feb 2017 17:20:34 -0700
Subject: vmbus: remove unused kickq argument to sendpacket

Since sendpacket no longer uses kickq argument remove it.
Remove it no longer used xmit_more in sendpacket in netvsc as well.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 7795966af0f9..e208e6437f5b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1037,8 +1037,7 @@ extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel,
 				  u32 bufferLen,
 				  u64 requestid,
 				  enum vmbus_packet_type type,
-				  u32 flags,
-				  bool kick_q);
+				  u32 flags);
 
 extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 					    struct hv_page_buffer pagebuffers[],
@@ -1053,8 +1052,7 @@ extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 					   void *buffer,
 					   u32 bufferlen,
 					   u64 requestid,
-					   u32 flags,
-					   bool kick_q);
+					   u32 flags);
 
 extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 					struct hv_multipage_buffer *mpb,
-- 
cgit v1.2.3


From f1ba82616c3368e1ae9e64ef29cf3edc1be0860d Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Tue, 7 Feb 2017 18:24:43 +0100
Subject: blk-mq: pass bio to blk_mq_sched_get_rq_priv

bio is used in bfq-mq's get_rq_priv, to get the request group. We could
pass directly the group here, but I thought that passing the bio was
more general, giving the possibility to get other pieces of information
if needed.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/elevator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 8265b6330cc2..aebecc4ed088 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -109,7 +109,7 @@ struct elevator_mq_ops {
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-	int (*get_rq_priv)(struct request_queue *, struct request *);
+	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
 	void (*put_rq_priv)(struct request_queue *, struct request *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
-- 
cgit v1.2.3


From ff548773106ec7f8031bc6172e0234bd2a02c19c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Feb 2017 16:34:07 +0000
Subject: afs: Move UUID struct to linux/uuid.h

Move the afs_uuid struct to linux/uuid.h, rename it to uuid_v1 and change
the u16/u32 fields to __be16/__be32 instead so that the structure can be
cast to a 16-octet network-order buffer.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de
---
 include/linux/uuid.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 2d095fc60204..4dff73a89758 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,6 +18,30 @@
 
 #include <uapi/linux/uuid.h>
 
+/*
+ * V1 (time-based) UUID definition [RFC 4122].
+ * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
+ *   increments since midnight 15th October 1582
+ *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
+ *     time
+ * - the clock sequence is a 14-bit counter to avoid duplicate times
+ */
+struct uuid_v1 {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+#define UUID_TO_UNIX_TIME	0x01b21dd213814000ULL
+#define UUID_TIMEHI_MASK	0x0fff
+#define UUID_VERSION_TIME	0x1000	/* time-based UUID */
+#define UUID_VERSION_NAME	0x3000	/* name-based UUID */
+#define UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
+	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+#define UUID_CLOCKHI_MASK	0x3f
+#define UUID_VARIANT_STD	0x80
+	u8		clock_seq_low;			/* clock seq low */
+	u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
  * not including trailing NUL.
-- 
cgit v1.2.3


From 28309572aac4c632666053dc8bf9906a3594b8d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Thu, 9 Feb 2017 10:21:07 +0100
Subject: mtd: name the mtd device with an optional label property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can be used to easily identify a specific chip on a system with
multiple chips.

Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/mtd.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 5bb42c6dacdc..eebdc63cf6af 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -24,6 +24,7 @@
 #include <linux/uio.h>
 #include <linux/notifier.h>
 #include <linux/device.h>
+#include <linux/of.h>
 
 #include <mtd/mtd-abi.h>
 
@@ -386,6 +387,8 @@ static inline void mtd_set_of_node(struct mtd_info *mtd,
 				   struct device_node *np)
 {
 	mtd->dev.of_node = np;
+	if (!mtd->name)
+		of_property_read_string(np, "label", &mtd->name);
 }
 
 static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd)
-- 
cgit v1.2.3


From ea6da4fd388a1eeab30d64da3eab3c5338714c74 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Tue, 7 Feb 2017 09:56:06 +0200
Subject: net/skbuff: Introduce skb_mac_offset()

Introduce skb_mac_offset() that could be used to get mac header offset.

Signed-off-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1adddc1c5ac..69ccd2636911 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2184,6 +2184,11 @@ static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 	return skb->head + skb->mac_header;
 }
 
+static inline int skb_mac_offset(const struct sk_buff *skb)
+{
+	return skb_mac_header(skb) - skb->data;
+}
+
 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
 	return skb->mac_header != (typeof(skb->mac_header))~0U;
-- 
cgit v1.2.3


From 47feb41888bc82ba5c9268c344775adc483d6de7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Feb 2017 18:17:43 +0100
Subject: PCI/MSI: Remove unused pci_msi_create_default_irq_domain()

pci_msi_create_default_irq_domain() is never called in the whole tree, so
remove it as well as all the supporting code for a default PCI MSI domain.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 0db320b7bb15..18b8566b3ce3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -319,9 +319,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
 			      int nvec, int type);
 void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev);
-struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnode,
-		 struct msi_domain_info *info, struct irq_domain *parent);
-
 irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
 					  struct msi_desc *desc);
 int pci_msi_domain_check_cap(struct irq_domain *domain,
-- 
cgit v1.2.3


From 699c4cec238731a4c466f73fe6e9e45ab6f49a41 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Feb 2017 18:17:44 +0100
Subject: PCI/MSI: Remove pci_msi_domain_{alloc,free}_irqs()

Just call the msi_* version directly instead of having trivial wrappers for
one or two callsites.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 18b8566b3ce3..1b6f3ebbe876 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -316,9 +316,6 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg);
 struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 					     struct msi_domain_info *info,
 					     struct irq_domain *parent);
-int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
-			      int nvec, int type);
-void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev);
 irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
 					  struct msi_desc *desc);
 int pci_msi_domain_check_cap(struct irq_domain *domain,
-- 
cgit v1.2.3


From 1697599ee301a52cded6499a09bd609f7f63fd06 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 9 Feb 2017 09:17:27 -0800
Subject: bitfield.h: add FIELD_FIT() helper

Add a helper for checking at runtime that a value will fit inside
a specified field/mask.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bitfield.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index f6505d83069d..8b9d6fff002d 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -62,6 +62,19 @@
 					      (1ULL << __bf_shf(_mask))); \
 	})
 
+/**
+ * FIELD_FIT() - check if value fits in the field
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to test against the field
+ *
+ * Return: true if @_val can fit inside @_mask, false if @_val is too big.
+ */
+#define FIELD_FIT(_mask, _val)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: ");	\
+		!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
+	})
+
 /**
  * FIELD_PREP() - prepare a bitfield element
  * @_mask: shifted mask defining the field's length and position
-- 
cgit v1.2.3


From a8e04698732736f59fefe72c675791a006b76e1d Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:46 -0800
Subject: tap: Refactoring macvtap.c

macvtap module has code for tap/queue management and link management. This patch splits
the code into macvtap_main.c for link management and tap.c for tap/queue management.
Functionality in tap.c can be re-used for implementing tap on other virtual interfaces.

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvtap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 include/linux/if_macvtap.h

(limited to 'include/linux')

diff --git a/include/linux/if_macvtap.h b/include/linux/if_macvtap.h
new file mode 100644
index 000000000000..c9bf84b75b27
--- /dev/null
+++ b/include/linux/if_macvtap.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_IF_MACVTAP_H_
+#define _LINUX_IF_MACVTAP_H_
+
+rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb);
+void macvtap_del_queues(struct net_device *dev);
+int macvtap_get_minor(struct macvlan_dev *vlan);
+void macvtap_free_minor(struct macvlan_dev *vlan);
+int macvtap_queue_resize(struct macvlan_dev *vlan);
+
+#endif /*_LINUX_IF_MACVTAP_H_*/
-- 
cgit v1.2.3


From 635b8c8ecdd27142d7fdab0df334b2e9201481cf Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:47 -0800
Subject: tap: Renaming tap related APIs, data structures, macros

Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.*

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_macvlan.h | 17 ++---------------
 include/linux/if_macvtap.h | 10 ----------
 include/linux/if_tap.h     | 23 +++++++++++++++++++++++
 3 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/if_macvtap.h
 create mode 100644 include/linux/if_tap.h

(limited to 'include/linux')

diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a4ccc3122f93..c9ec1343d187 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -9,19 +9,6 @@
 #include <net/netlink.h>
 #include <linux/u64_stats_sync.h>
 
-#if IS_ENABLED(CONFIG_MACVTAP)
-struct socket *macvtap_get_socket(struct file *);
-#else
-#include <linux/err.h>
-#include <linux/errno.h>
-struct file;
-struct socket;
-static inline struct socket *macvtap_get_socket(struct file *f)
-{
-	return ERR_PTR(-EINVAL);
-}
-#endif /* CONFIG_MACVTAP */
-
 struct macvlan_port;
 struct macvtap_queue;
 
@@ -29,7 +16,7 @@ struct macvtap_queue;
  * Maximum times a macvtap device can be opened. This can be used to
  * configure the number of receive queue, e.g. for multiqueue virtio.
  */
-#define MAX_MACVTAP_QUEUES	256
+#define MAX_TAP_QUEUES	256
 
 #define MACVLAN_MC_FILTER_BITS	8
 #define MACVLAN_MC_FILTER_SZ	(1 << MACVLAN_MC_FILTER_BITS)
@@ -49,7 +36,7 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	u16			flags;
 	/* This array tracks active taps. */
-	struct macvtap_queue	__rcu *taps[MAX_MACVTAP_QUEUES];
+	struct tap_queue	__rcu *taps[MAX_TAP_QUEUES];
 	/* This list tracks all taps (both enabled and disabled) */
 	struct list_head	queue_list;
 	int			numvtaps;
diff --git a/include/linux/if_macvtap.h b/include/linux/if_macvtap.h
deleted file mode 100644
index c9bf84b75b27..000000000000
--- a/include/linux/if_macvtap.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _LINUX_IF_MACVTAP_H_
-#define _LINUX_IF_MACVTAP_H_
-
-rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb);
-void macvtap_del_queues(struct net_device *dev);
-int macvtap_get_minor(struct macvlan_dev *vlan);
-void macvtap_free_minor(struct macvlan_dev *vlan);
-int macvtap_queue_resize(struct macvlan_dev *vlan);
-
-#endif /*_LINUX_IF_MACVTAP_H_*/
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
new file mode 100644
index 000000000000..97d27b8ebd55
--- /dev/null
+++ b/include/linux/if_tap.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_IF_TAP_H_
+#define _LINUX_IF_TAP_H_
+
+#if IS_ENABLED(CONFIG_MACVTAP)
+struct socket *tap_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tap_get_socket(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_MACVTAP */
+
+rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
+void tap_del_queues(struct net_device *dev);
+int tap_get_minor(struct macvlan_dev *vlan);
+void tap_free_minor(struct macvlan_dev *vlan);
+int tap_queue_resize(struct macvlan_dev *vlan);
+
+#endif /*_LINUX_IF_TAP_H_*/
-- 
cgit v1.2.3


From ebc05ba7e8600b52a2a0c87a43105143368aca2a Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:48 -0800
Subject: tap: Tap character device creation/destroy API

This patch provides tap device create/destroy APIs in tap.c.

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 97d27b8ebd55..a2dfd9063a6c 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -19,5 +19,8 @@ void tap_del_queues(struct net_device *dev);
 int tap_get_minor(struct macvlan_dev *vlan);
 void tap_free_minor(struct macvlan_dev *vlan);
 int tap_queue_resize(struct macvlan_dev *vlan);
+int tap_create_cdev(struct cdev *tap_cdev,
+		    dev_t *tap_major, const char *device_name);
+void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
 
 #endif /*_LINUX_IF_TAP_H_*/
-- 
cgit v1.2.3


From 6fe3faf86757eb7f078ff06b23b206f17dc4fb36 Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:49 -0800
Subject: tap: Abstract type of virtual interface from tap implementation

macvlan object is re-structured to hold tap related elements in a separate
entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with
idr and fetched again on tap_open. Few of the tap functions are modified to
accepted tap_dev as argument. tap_dev object includes callbacks to be used by
underlying virtual interface to take care of tx and rx accounting.

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tap.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index a2dfd9063a6c..75031e5d0a65 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -14,11 +14,60 @@ static inline struct socket *tap_get_socket(struct file *f)
 }
 #endif /* CONFIG_MACVTAP */
 
+#include <net/sock.h>
+#include <linux/skb_array.h>
+
+#define MAX_TAP_QUEUES 256
+
+struct tap_queue;
+
+struct tap_dev {
+	struct net_device	*dev;
+	u16			flags;
+	/* This array tracks active taps. */
+	struct tap_queue    __rcu *taps[MAX_TAP_QUEUES];
+	/* This list tracks all taps (both enabled and disabled) */
+	struct list_head	queue_list;
+	int			numvtaps;
+	int			numqueues;
+	netdev_features_t	tap_features;
+	int			minor;
+
+	void (*update_features)(struct tap_dev *tap, netdev_features_t features);
+	void (*count_tx_dropped)(struct tap_dev *tap);
+	void (*count_rx_dropped)(struct tap_dev *tap);
+};
+
+/*
+ * A tap queue is the central object of tap module, it connects
+ * an open character device to virtual interface. There can be
+ * multiple queues on one interface, which map back to queues
+ * implemented in hardware on the underlying device.
+ *
+ * tap_proto is used to allocate queues through the sock allocation
+ * mechanism.
+ *
+ */
+
+struct tap_queue {
+	struct sock sk;
+	struct socket sock;
+	struct socket_wq wq;
+	int vnet_hdr_sz;
+	struct tap_dev __rcu *tap;
+	struct file *file;
+	unsigned int flags;
+	u16 queue_index;
+	bool enabled;
+	struct list_head next;
+	struct skb_array skb_array;
+};
+
 rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
-void tap_del_queues(struct net_device *dev);
-int tap_get_minor(struct macvlan_dev *vlan);
-void tap_free_minor(struct macvlan_dev *vlan);
-int tap_queue_resize(struct macvlan_dev *vlan);
+void tap_del_queues(struct tap_dev *tap);
+int tap_get_minor(struct tap_dev *tap);
+void tap_free_minor(struct tap_dev *tap);
+int tap_queue_resize(struct tap_dev *tap);
 int tap_create_cdev(struct cdev *tap_cdev,
 		    dev_t *tap_major, const char *device_name);
 void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
-- 
cgit v1.2.3


From d9f1f61c0801a73ff36d416a7ede54229b231e1d Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:50 -0800
Subject: tap: Extending tap device create/destroy APIs

Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one
type of virtual interface.

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 75031e5d0a65..362e71c16efb 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -65,8 +65,8 @@ struct tap_queue {
 
 rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
 void tap_del_queues(struct tap_dev *tap);
-int tap_get_minor(struct tap_dev *tap);
-void tap_free_minor(struct tap_dev *tap);
+int tap_get_minor(dev_t major, struct tap_dev *tap);
+void tap_free_minor(dev_t major, struct tap_dev *tap);
 int tap_queue_resize(struct tap_dev *tap);
 int tap_create_cdev(struct cdev *tap_cdev,
 		    dev_t *tap_major, const char *device_name);
-- 
cgit v1.2.3


From 9a393b5d5988ea4eaa3e0da138321abe0dc03a68 Mon Sep 17 00:00:00 2001
From: Sainath Grandhi <sainath.grandhi@intel.com>
Date: Fri, 10 Feb 2017 16:03:51 -0800
Subject: tap: tap as an independent module

This patch makes tap a separate module for other types of virtual interfaces, for example,
ipvlan to use.

Signed-off-by: Sainath Grandhi <sainath.grandhi@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 362e71c16efb..3482c3c2037d 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_IF_TAP_H_
 #define _LINUX_IF_TAP_H_
 
-#if IS_ENABLED(CONFIG_MACVTAP)
+#if IS_ENABLED(CONFIG_TAP)
 struct socket *tap_get_socket(struct file *);
 #else
 #include <linux/err.h>
@@ -12,7 +12,7 @@ static inline struct socket *tap_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
-#endif /* CONFIG_MACVTAP */
+#endif /* CONFIG_TAP */
 
 #include <net/sock.h>
 #include <linux/skb_array.h>
-- 
cgit v1.2.3


From 8c4d4e8b5626fec965fd5034e5bd5e57790f243f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 10 Feb 2017 12:08:17 +0100
Subject: netfilter: nfnetlink: allow to check for generation ID

This patch allows userspace to specify the generation ID that has been
used to build an incremental batch update.

If userspace specifies the generation ID in the batch message as
attribute, then nfnetlink compares it to the current generation ID so
you make sure that you work against the right baseline. Otherwise, bail
out with ERESTART so userspace knows that its changeset is stale and
needs to respin. Userspace can do this transparently at the cost of
taking slightly more time to refresh caches and rework the changeset.

This check is optional, if there is no NFNL_BATCH_GENID attribute in the
batch begin message, then no check is performed.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1d82dd5e9a08..1b49209dd5c7 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -28,6 +28,7 @@ struct nfnetlink_subsystem {
 	const struct nfnl_callback *cb;	/* callback for individual types */
 	int (*commit)(struct net *net, struct sk_buff *skb);
 	int (*abort)(struct net *net, struct sk_buff *skb);
+	bool (*valid_genid)(struct net *net, u32 genid);
 };
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
-- 
cgit v1.2.3


From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 10 Feb 2017 20:28:24 -0800
Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag

If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
to the given cgroup the descendent cgroup will be able to override
effective bpf program that was inherited from this cgroup.
By default it's not passed, therefore override is disallowed.

Examples:
1.
prog X attached to /A with default
prog Y fails to attach to /A/B and /A/B/C
Everything under /A runs prog X

2.
prog X attached to /A with allow_override.
prog Y fails to attach to /A/B with default (non-override)
prog M attached to /A/B with allow_override.
Everything under /A/B runs prog M only.

3.
prog X attached to /A with allow_override.
prog Y fails to attach to /A with default.
The user has to detach first to switch the mode.

In the future this behavior may be extended with a chain of
non-overridable programs.

Also fix the bug where detach from cgroup where nothing is attached
was not throwing error. Return ENOENT in such case.

Add several testcases and adjust libbpf.

Fixes: 3007098494be ("cgroup: add support for eBPF programs")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 92bc89ae7e20..c970a25d2a49 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -21,20 +21,19 @@ struct cgroup_bpf {
 	 */
 	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
 	struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
+	bool disallow_override[MAX_BPF_ATTACH_TYPE];
 };
 
 void cgroup_bpf_put(struct cgroup *cgrp);
 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
 
-void __cgroup_bpf_update(struct cgroup *cgrp,
-			 struct cgroup *parent,
-			 struct bpf_prog *prog,
-			 enum bpf_attach_type type);
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool overridable);
 
 /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
-void cgroup_bpf_update(struct cgroup *cgrp,
-		       struct bpf_prog *prog,
-		       enum bpf_attach_type type);
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable);
 
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
-- 
cgit v1.2.3


From d6cffbbe9a7e51eb705182965a189457c17ba8a3 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 10 Feb 2017 10:35:02 +0300
Subject: proc/sysctl: prune stale dentries during unregistering

Currently unregistering sysctl table does not prune its dentries.
Stale dentries could slowdown sysctl operations significantly.

For example, command:

 # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done
 creates a millions of stale denties around sysctls of loopback interface:

 # sysctl fs.dentry-state
 fs.dentry-state = 25812579  24724135        45      0       0       0

 All of them have matching names thus lookup have to scan though whole
 hash chain and call d_compare (proc_sys_compare) which checks them
 under system-wide spinlock (sysctl_lock).

 # time sysctl -a > /dev/null
 real    1m12.806s
 user    0m0.016s
 sys     1m12.400s

Currently only memory reclaimer could remove this garbage.
But without significant memory pressure this never happens.

This patch collects sysctl inodes into list on sysctl table header and
prunes all their dentries once that table unregisters.

Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes:
> On 10.02.2017 10:47, Al Viro wrote:
>> how about >> the matching stats *after* that patch?
>
> dcache size doesn't grow endlessly, so stats are fine
>
> # sysctl fs.dentry-state
> fs.dentry-state = 92712	58376	45	0	0	0
>
> # time sysctl -a &>/dev/null
>
> real	0m0.013s
> user	0m0.004s
> sys	0m0.008s

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index adf4e51cf597..b7e82049fec7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -143,6 +143,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
+	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {
-- 
cgit v1.2.3


From 34a23327697d558a582e356573d7d36773996fbb Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Mon, 5 Dec 2016 13:15:28 +0100
Subject: mfd: axp20x: Correct a typo in axp20x_device_remove documentation

The documentation of axp20x_device_remove() have a typo and use
axp20x_device_probe() as name. This patch fix this typo.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/axp20x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 812806d6319b..39f1da18c917 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -582,7 +582,7 @@ int axp20x_match_device(struct axp20x_dev *axp20x);
 int axp20x_device_probe(struct axp20x_dev *axp20x);
 
 /**
- * axp20x_device_probe(): Remove a axp20x device
+ * axp20x_device_remove(): Remove a axp20x device
  *
  * @axp20x: axp20x device to remove
  *
-- 
cgit v1.2.3


From 0a5454c901aea0fef99f5ef7910c69c501817ae1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 14 Dec 2016 14:52:05 +0100
Subject: mfd: axp20x: Use IRQF_TRIGGER_LOW on the axp288

The interrupt line of the entire family of axp2xx pmics is active-low,
for devicetree enumerated irqs, this is dealt with in the devicetree.

ACPI irq resources have a flag field for this too, I tried using this
on my CUBE iwork8 Air tablet, but it does not contain the right data.

The dstd shows the irq listed as either ActiveLow or ActiveHigh,
depending on the OSID variable, which seems to be set by the
"OS IMAGE ID" in the BIOS/EFI setup screen.

Since the acpi-resource info is no good, simply pass in IRQF_TRIGGER_LOW
on the axp288.

Together with the other axp288 fixes in this series, this fixes the axp288
irq contineously triggering.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/axp20x.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 39f1da18c917..6d5dd3f96d33 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -523,6 +523,7 @@ enum axp809_irqs {
 struct axp20x_dev {
 	struct device			*dev;
 	int				irq;
+	unsigned long			irq_flags;
 	struct regmap			*regmap;
 	struct regmap_irq_chip_data	*regmap_irqc;
 	long				variant;
-- 
cgit v1.2.3


From 59f10f7e62a28b9b664a9b2efbc5bd10d8d751f0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 14 Dec 2016 14:52:08 +0100
Subject: mfd: axp20x: Drop wrong AXP288_PMIC_ADC_EN define

The adc-enable register for the axp288 is 0x82, not 0x84.
0x82 is already defined as AXP20X_ADC_EN1 and that is what the
axp288_adc driver is actually using, so simply drop the wrong
AXP288_PMIC_ADC_EN define.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/axp20x.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 6d5dd3f96d33..35418ccb3bb7 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -238,7 +238,6 @@ enum {
 #define AXP288_PMIC_ADC_H               0x56
 #define AXP288_PMIC_ADC_L               0x57
 #define AXP288_ADC_TS_PIN_CTRL          0x84
-#define AXP288_PMIC_ADC_EN              0x84
 
 /* Fuel Gauge */
 #define AXP288_FG_RDC1_REG          0xba
-- 
cgit v1.2.3


From 178e8351cef3607821ec9a4e2fe3f414145fdf34 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Dec 2016 21:09:05 +0100
Subject: mfd: axp20x: Add a few missing defines for AXP288 specific registers

Add defines for the AXP288_POWER_REASON and AXP288_RT_BATT_V_H and
AXP288_RT_BATT_V_L and AXP288_BC_* registers. While at it also move the
AXP288_TS_ADC_H-AXP288_GP_ADC_L defines, which for some reason where
in a different place, together with the rest of the AXP288 specific
defines.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/axp20x.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index 35418ccb3bb7..0aa4ef7157b8 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -235,9 +235,20 @@ enum {
 #define AXP22X_BATLOW_THRES1		0xe6
 
 /* AXP288 specific registers */
+#define AXP288_POWER_REASON		0x02
+#define AXP288_BC_GLOBAL		0x2c
+#define AXP288_BC_VBUS_CNTL		0x2d
+#define AXP288_BC_USB_STAT		0x2e
+#define AXP288_BC_DET_STAT		0x2f
 #define AXP288_PMIC_ADC_H               0x56
 #define AXP288_PMIC_ADC_L               0x57
+#define AXP288_TS_ADC_H			0x58
+#define AXP288_TS_ADC_L			0x59
+#define AXP288_GP_ADC_H			0x5a
+#define AXP288_GP_ADC_L			0x5b
 #define AXP288_ADC_TS_PIN_CTRL          0x84
+#define AXP288_RT_BATT_V_H		0xa0
+#define AXP288_RT_BATT_V_L		0xa1
 
 /* Fuel Gauge */
 #define AXP288_FG_RDC1_REG          0xba
@@ -514,11 +525,6 @@ enum axp809_irqs {
 	AXP809_IRQ_GPIO0_INPUT,
 };
 
-#define AXP288_TS_ADC_H		0x58
-#define AXP288_TS_ADC_L		0x59
-#define AXP288_GP_ADC_H		0x5a
-#define AXP288_GP_ADC_L		0x5b
-
 struct axp20x_dev {
 	struct device			*dev;
 	int				irq;
-- 
cgit v1.2.3


From a042a7a4e1ce31380d1370a4d45a3f48d0972655 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Dec 2016 21:55:47 +0000
Subject: mfd: abx500: Fix spelling mistake: "Celcius" -> "Celsius"

Trivial fix to spelling mistake in MFD headers.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/abx500.h           | 2 +-
 include/linux/mfd/abx500/ab8500-bm.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 552cc1d61cc7..44412c9d26e1 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -45,7 +45,7 @@ enum abx500_adc_therm {
  * struct abx500_res_to_temp - defines one point in a temp to res curve. To
  * be used in battery packs that combines the identification resistor with a
  * NTC resistor.
- * @temp:			battery pack temperature in Celcius
+ * @temp:			battery pack temperature in Celsius
  * @resist:			NTC resistor net total resistance
  */
 struct abx500_res_to_temp {
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 12a5b396921e..e63681eb6c62 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -279,7 +279,7 @@ enum bup_vch_sel {
  * struct res_to_temp - defines one point in a temp to res curve. To
  * be used in battery packs that combines the identification resistor with a
  * NTC resistor.
- * @temp:			battery pack temperature in Celcius
+ * @temp:			battery pack temperature in Celsius
  * @resist:			NTC resistor net total resistance
  */
 struct res_to_temp {
@@ -290,7 +290,7 @@ struct res_to_temp {
 /**
  * struct batres_vs_temp - defines one point in a temp vs battery internal
  * resistance curve.
- * @temp:			battery pack temperature in Celcius
+ * @temp:			battery pack temperature in Celsius
  * @resist:			battery internal reistance in mOhm
  */
 struct batres_vs_temp {
-- 
cgit v1.2.3


From a9eb186e13144782232cc6fa731441be54baf505 Mon Sep 17 00:00:00 2001
From: Joseph Lo <josephl@nvidia.com>
Date: Fri, 16 Dec 2016 18:57:36 +0100
Subject: mfd: cros_ec: Prevent data transfer while device is suspended

The cros_ec driver is still active while the device is suspended.
Besides that, it also tries to transfer data even after the I2C host had
been suspended. This patch uses a simple flag to prevent this.

Signed-off-by: Joseph Lo <josephl@nvidia.com>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index f62043a75f43..7a01c94496f1 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -103,6 +103,7 @@ struct cros_ec_command {
  * @din_size: size of din buffer to allocate (zero to use static din)
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
  * @wake_enabled: true if this device can wake the system from sleep
+ * @suspended: true if this device had been suspended
  * @cmd_xfer: send command to EC and get response
  *     Returns the number of bytes received if the communication succeeded, but
  *     that doesn't mean the EC was happy with the command. The caller
@@ -136,6 +137,7 @@ struct cros_ec_device {
 	int din_size;
 	int dout_size;
 	bool wake_enabled;
+	bool suspended;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
 	int (*pkt_xfer)(struct cros_ec_device *ec,
-- 
cgit v1.2.3


From f00c06fd98576face871e62bb3aa045c5f647661 Mon Sep 17 00:00:00 2001
From: Shawn Nematbakhsh <shawnn@chromium.org>
Date: Fri, 16 Dec 2016 18:57:37 +0100
Subject: mfd: cros_ec: Send suspend state notification to EC

Notify EC when going to or returning from suspend so that proper actions
related to wake events can be taken.

Signed-off-by: Shawn Nematbakhsh <shawnn@chromium.org>
Signed-off-by: Thierry Escande <thierry.escande@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index da1c188562bc..34ecae4d612c 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2547,6 +2547,20 @@ struct ec_params_ext_power_current_limit {
 	uint32_t limit; /* in mA */
 } __packed;
 
+/* Inform the EC when entering a sleep state */
+#define EC_CMD_HOST_SLEEP_EVENT 0xa9
+
+enum host_sleep_event {
+	HOST_SLEEP_EVENT_S3_SUSPEND   = 1,
+	HOST_SLEEP_EVENT_S3_RESUME    = 2,
+	HOST_SLEEP_EVENT_S0IX_SUSPEND = 3,
+	HOST_SLEEP_EVENT_S0IX_RESUME  = 4
+};
+
+struct ec_params_host_sleep_event {
+	uint8_t sleep_event;
+} __packed;
+
 /*****************************************************************************/
 /* Smart battery pass-through */
 
-- 
cgit v1.2.3


From 56e1d40d3beab2f247d48574bf51fc5daeebc285 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 5 Jan 2017 16:44:39 -0800
Subject: mfd: cpcap: Add minimal support

Many Motorola phones like droid 4 are using a custom PMIC called CPCAP
or 6556002. We can support it's core features quite easily with regmap_spi
and regmap_irq.

The children of cpcap, such as regulators, ADC and USB, can be just regular
device drivers and defined in the dts file. They get probed as we call
of_platform_populate() at the end of our probe, and then the children
can just call dev_get_regmap(dev.parent, NULL) to get the regmap.

Cc: devicetree@vger.kernel.org
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Scott <michael.scott@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/motorola-cpcap.h | 292 +++++++++++++++++++++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 include/linux/mfd/motorola-cpcap.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/motorola-cpcap.h b/include/linux/mfd/motorola-cpcap.h
new file mode 100644
index 000000000000..b4031c2b2214
--- /dev/null
+++ b/include/linux/mfd/motorola-cpcap.h
@@ -0,0 +1,292 @@
+/*
+ * The register defines are based on earlier cpcap.h in Motorola Linux kernel
+ * tree.
+ *
+ * Copyright (C) 2007-2009 Motorola, Inc.
+ *
+ * Rewritten for the real register offsets instead of enumeration
+ * to make the defines usable with Linux kernel regmap support
+ *
+ * Copyright (C) 2016 Tony Lindgren <tony@atomide.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define CPCAP_VENDOR_ST		0
+#define CPCAP_VENDOR_TI		1
+
+#define CPCAP_REVISION_MAJOR(r)	(((r) >> 4) + 1)
+#define CPCAP_REVISION_MINOR(r)	((r) & 0xf)
+
+#define CPCAP_REVISION_1_0	0x08
+#define CPCAP_REVISION_1_1	0x09
+#define CPCAP_REVISION_2_0	0x10
+#define CPCAP_REVISION_2_1	0x11
+
+/* CPCAP registers */
+#define CPCAP_REG_INT1		0x0000	/* Interrupt 1 */
+#define CPCAP_REG_INT2		0x0004	/* Interrupt 2 */
+#define CPCAP_REG_INT3		0x0008	/* Interrupt 3 */
+#define CPCAP_REG_INT4		0x000c	/* Interrupt 4 */
+#define CPCAP_REG_INTM1		0x0010	/* Interrupt Mask 1 */
+#define CPCAP_REG_INTM2		0x0014	/* Interrupt Mask 2 */
+#define CPCAP_REG_INTM3		0x0018	/* Interrupt Mask 3 */
+#define CPCAP_REG_INTM4		0x001c	/* Interrupt Mask 4 */
+#define CPCAP_REG_INTS1		0x0020	/* Interrupt Sense 1 */
+#define CPCAP_REG_INTS2		0x0024	/* Interrupt Sense 2 */
+#define CPCAP_REG_INTS3		0x0028	/* Interrupt Sense 3 */
+#define CPCAP_REG_INTS4		0x002c	/* Interrupt Sense 4 */
+#define CPCAP_REG_ASSIGN1	0x0030	/* Resource Assignment 1 */
+#define CPCAP_REG_ASSIGN2	0x0034	/* Resource Assignment 2 */
+#define CPCAP_REG_ASSIGN3	0x0038	/* Resource Assignment 3 */
+#define CPCAP_REG_ASSIGN4	0x003c	/* Resource Assignment 4 */
+#define CPCAP_REG_ASSIGN5	0x0040	/* Resource Assignment 5 */
+#define CPCAP_REG_ASSIGN6	0x0044	/* Resource Assignment 6 */
+#define CPCAP_REG_VERSC1	0x0048	/* Version Control 1 */
+#define CPCAP_REG_VERSC2	0x004c	/* Version Control 2 */
+
+#define CPCAP_REG_MI1		0x0200	/* Macro Interrupt 1 */
+#define CPCAP_REG_MIM1		0x0204	/* Macro Interrupt Mask 1 */
+#define CPCAP_REG_MI2		0x0208	/* Macro Interrupt 2 */
+#define CPCAP_REG_MIM2		0x020c	/* Macro Interrupt Mask 2 */
+#define CPCAP_REG_UCC1		0x0210	/* UC Control 1 */
+#define CPCAP_REG_UCC2		0x0214	/* UC Control 2 */
+
+#define CPCAP_REG_PC1		0x021c	/* Power Cut 1 */
+#define CPCAP_REG_PC2		0x0220	/* Power Cut 2 */
+#define CPCAP_REG_BPEOL		0x0224	/* BP and EOL */
+#define CPCAP_REG_PGC		0x0228	/* Power Gate and Control */
+#define CPCAP_REG_MT1		0x022c	/* Memory Transfer 1 */
+#define CPCAP_REG_MT2		0x0230	/* Memory Transfer 2 */
+#define CPCAP_REG_MT3		0x0234	/* Memory Transfer 3 */
+#define CPCAP_REG_PF		0x0238	/* Print Format */
+
+#define CPCAP_REG_SCC		0x0400	/* System Clock Control */
+#define CPCAP_REG_SW1		0x0404	/* Stop Watch 1 */
+#define CPCAP_REG_SW2		0x0408	/* Stop Watch 2 */
+#define CPCAP_REG_UCTM		0x040c	/* UC Turbo Mode */
+#define CPCAP_REG_TOD1		0x0410	/* Time of Day 1 */
+#define CPCAP_REG_TOD2		0x0414	/* Time of Day 2 */
+#define CPCAP_REG_TODA1		0x0418	/* Time of Day Alarm 1 */
+#define CPCAP_REG_TODA2		0x041c	/* Time of Day Alarm 2 */
+#define CPCAP_REG_DAY		0x0420	/* Day */
+#define CPCAP_REG_DAYA		0x0424	/* Day Alarm */
+#define CPCAP_REG_VAL1		0x0428	/* Validity 1 */
+#define CPCAP_REG_VAL2		0x042c	/* Validity 2 */
+
+#define CPCAP_REG_SDVSPLL	0x0600	/* Switcher DVS and PLL */
+#define CPCAP_REG_SI2CC1	0x0604	/* Switcher I2C Control 1 */
+#define CPCAP_REG_Si2CC2	0x0608	/* Switcher I2C Control 2 */
+#define CPCAP_REG_S1C1		0x060c	/* Switcher 1 Control 1 */
+#define CPCAP_REG_S1C2		0x0610	/* Switcher 1 Control 2 */
+#define CPCAP_REG_S2C1		0x0614	/* Switcher 2 Control 1 */
+#define CPCAP_REG_S2C2		0x0618	/* Switcher 2 Control 2 */
+#define CPCAP_REG_S3C		0x061c	/* Switcher 3 Control */
+#define CPCAP_REG_S4C1		0x0620	/* Switcher 4 Control 1 */
+#define CPCAP_REG_S4C2		0x0624	/* Switcher 4 Control 2 */
+#define CPCAP_REG_S5C		0x0628	/* Switcher 5 Control */
+#define CPCAP_REG_S6C		0x062c	/* Switcher 6 Control */
+#define CPCAP_REG_VCAMC		0x0630	/* VCAM Control */
+#define CPCAP_REG_VCSIC		0x0634	/* VCSI Control */
+#define CPCAP_REG_VDACC		0x0638	/* VDAC Control */
+#define CPCAP_REG_VDIGC		0x063c	/* VDIG Control */
+#define CPCAP_REG_VFUSEC	0x0640	/* VFUSE Control */
+#define CPCAP_REG_VHVIOC	0x0644	/* VHVIO Control */
+#define CPCAP_REG_VSDIOC	0x0648	/* VSDIO Control */
+#define CPCAP_REG_VPLLC		0x064c	/* VPLL Control */
+#define CPCAP_REG_VRF1C		0x0650	/* VRF1 Control */
+#define CPCAP_REG_VRF2C		0x0654	/* VRF2 Control */
+#define CPCAP_REG_VRFREFC	0x0658	/* VRFREF Control */
+#define CPCAP_REG_VWLAN1C	0x065c	/* VWLAN1 Control */
+#define CPCAP_REG_VWLAN2C	0x0660	/* VWLAN2 Control */
+#define CPCAP_REG_VSIMC		0x0664	/* VSIM Control */
+#define CPCAP_REG_VVIBC		0x0668	/* VVIB Control */
+#define CPCAP_REG_VUSBC		0x066c	/* VUSB Control */
+#define CPCAP_REG_VUSBINT1C	0x0670	/* VUSBINT1 Control */
+#define CPCAP_REG_VUSBINT2C	0x0674	/* VUSBINT2 Control */
+#define CPCAP_REG_URT		0x0678	/* Useroff Regulator Trigger */
+#define CPCAP_REG_URM1		0x067c	/* Useroff Regulator Mask 1 */
+#define CPCAP_REG_URM2		0x0680	/* Useroff Regulator Mask 2 */
+
+#define CPCAP_REG_VAUDIOC	0x0800	/* VAUDIO Control */
+#define CPCAP_REG_CC		0x0804	/* Codec Control */
+#define CPCAP_REG_CDI		0x0808	/* Codec Digital Interface */
+#define CPCAP_REG_SDAC		0x080c	/* Stereo DAC */
+#define CPCAP_REG_SDACDI	0x0810	/* Stereo DAC Digital Interface */
+#define CPCAP_REG_TXI		0x0814	/* TX Inputs */
+#define CPCAP_REG_TXMP		0x0818	/* TX MIC PGA's */
+#define CPCAP_REG_RXOA		0x081c	/* RX Output Amplifiers */
+#define CPCAP_REG_RXVC		0x0820	/* RX Volume Control */
+#define CPCAP_REG_RXCOA		0x0824	/* RX Codec to Output Amps */
+#define CPCAP_REG_RXSDOA	0x0828	/* RX Stereo DAC to Output Amps */
+#define CPCAP_REG_RXEPOA	0x082c	/* RX External PGA to Output Amps */
+#define CPCAP_REG_RXLL		0x0830	/* RX Low Latency */
+#define CPCAP_REG_A2LA		0x0834	/* A2 Loudspeaker Amplifier */
+#define CPCAP_REG_MIPIS1	0x0838	/* MIPI Slimbus 1 */
+#define CPCAP_REG_MIPIS2	0x083c	/* MIPI Slimbus 2 */
+#define CPCAP_REG_MIPIS3	0x0840	/* MIPI Slimbus 3. */
+#define CPCAP_REG_LVAB		0x0844	/* LMR Volume and A4 Balanced. */
+
+#define CPCAP_REG_CCC1		0x0a00	/* Coulomb Counter Control 1 */
+#define CPCAP_REG_CRM		0x0a04	/* Charger and Reverse Mode */
+#define CPCAP_REG_CCCC2		0x0a08	/* Coincell and Coulomb Ctr Ctrl 2 */
+#define CPCAP_REG_CCS1		0x0a0c	/* Coulomb Counter Sample 1 */
+#define CPCAP_REG_CCS2		0x0a10	/* Coulomb Counter Sample 2 */
+#define CPCAP_REG_CCA1		0x0a14	/* Coulomb Counter Accumulator 1 */
+#define CPCAP_REG_CCA2		0x0a18	/* Coulomb Counter Accumulator 2 */
+#define CPCAP_REG_CCM		0x0a1c	/* Coulomb Counter Mode */
+#define CPCAP_REG_CCO		0x0a20	/* Coulomb Counter Offset */
+#define CPCAP_REG_CCI		0x0a24	/* Coulomb Counter Integrator */
+
+#define CPCAP_REG_ADCC1		0x0c00	/* A/D Converter Configuration 1 */
+#define CPCAP_REG_ADCC2		0x0c04	/* A/D Converter Configuration 2 */
+#define CPCAP_REG_ADCD0		0x0c08	/* A/D Converter Data 0 */
+#define CPCAP_REG_ADCD1		0x0c0c	/* A/D Converter Data 1 */
+#define CPCAP_REG_ADCD2		0x0c10	/* A/D Converter Data 2 */
+#define CPCAP_REG_ADCD3		0x0c14	/* A/D Converter Data 3 */
+#define CPCAP_REG_ADCD4		0x0c18	/* A/D Converter Data 4 */
+#define CPCAP_REG_ADCD5		0x0c1c	/* A/D Converter Data 5 */
+#define CPCAP_REG_ADCD6		0x0c20	/* A/D Converter Data 6 */
+#define CPCAP_REG_ADCD7		0x0c24	/* A/D Converter Data 7 */
+#define CPCAP_REG_ADCAL1	0x0c28	/* A/D Converter Calibration 1 */
+#define CPCAP_REG_ADCAL2	0x0c2c	/* A/D Converter Calibration 2 */
+
+#define CPCAP_REG_USBC1		0x0e00	/* USB Control 1 */
+#define CPCAP_REG_USBC2		0x0e04	/* USB Control 2 */
+#define CPCAP_REG_USBC3		0x0e08	/* USB Control 3 */
+#define CPCAP_REG_UVIDL		0x0e0c	/* ULPI Vendor ID Low */
+#define CPCAP_REG_UVIDH		0x0e10	/* ULPI Vendor ID High */
+#define CPCAP_REG_UPIDL		0x0e14	/* ULPI Product ID Low */
+#define CPCAP_REG_UPIDH		0x0e18	/* ULPI Product ID High */
+#define CPCAP_REG_UFC1		0x0e1c	/* ULPI Function Control 1 */
+#define CPCAP_REG_UFC2		0x0e20	/* ULPI Function Control 2 */
+#define CPCAP_REG_UFC3		0x0e24	/* ULPI Function Control 3 */
+#define CPCAP_REG_UIC1		0x0e28	/* ULPI Interface Control 1 */
+#define CPCAP_REG_UIC2		0x0e2c	/* ULPI Interface Control 2 */
+#define CPCAP_REG_UIC3		0x0e30	/* ULPI Interface Control 3 */
+#define CPCAP_REG_USBOTG1	0x0e34	/* USB OTG Control 1 */
+#define CPCAP_REG_USBOTG2	0x0e38	/* USB OTG Control 2 */
+#define CPCAP_REG_USBOTG3	0x0e3c	/* USB OTG Control 3 */
+#define CPCAP_REG_UIER1		0x0e40	/* USB Interrupt Enable Rising 1 */
+#define CPCAP_REG_UIER2		0x0e44	/* USB Interrupt Enable Rising 2 */
+#define CPCAP_REG_UIER3		0x0e48	/* USB Interrupt Enable Rising 3 */
+#define CPCAP_REG_UIEF1		0x0e4c	/* USB Interrupt Enable Falling 1 */
+#define CPCAP_REG_UIEF2		0x0e50	/* USB Interrupt Enable Falling 1 */
+#define CPCAP_REG_UIEF3		0x0e54	/* USB Interrupt Enable Falling 1 */
+#define CPCAP_REG_UIS		0x0e58	/* USB Interrupt Status */
+#define CPCAP_REG_UIL		0x0e5c	/* USB Interrupt Latch */
+#define CPCAP_REG_USBD		0x0e60	/* USB Debug */
+#define CPCAP_REG_SCR1		0x0e64	/* Scratch 1 */
+#define CPCAP_REG_SCR2		0x0e68	/* Scratch 2 */
+#define CPCAP_REG_SCR3		0x0e6c	/* Scratch 3 */
+
+#define CPCAP_REG_VMC		0x0eac	/* Video Mux Control */
+#define CPCAP_REG_OWDC		0x0eb0	/* One Wire Device Control */
+#define CPCAP_REG_GPIO0		0x0eb4	/* GPIO 0 Control */
+
+#define CPCAP_REG_GPIO1		0x0ebc	/* GPIO 1 Control */
+
+#define CPCAP_REG_GPIO2		0x0ec4	/* GPIO 2 Control */
+
+#define CPCAP_REG_GPIO3		0x0ecc	/* GPIO 3 Control */
+
+#define CPCAP_REG_GPIO4		0x0ed4	/* GPIO 4 Control */
+
+#define CPCAP_REG_GPIO5		0x0edc	/* GPIO 5 Control */
+
+#define CPCAP_REG_GPIO6		0x0ee4	/* GPIO 6 Control */
+
+#define CPCAP_REG_MDLC		0x1000	/* Main Display Lighting Control */
+#define CPCAP_REG_KLC		0x1004	/* Keypad Lighting Control */
+#define CPCAP_REG_ADLC		0x1008	/* Aux Display Lighting Control */
+#define CPCAP_REG_REDC		0x100c	/* Red Triode Control */
+#define CPCAP_REG_GREENC	0x1010	/* Green Triode Control */
+#define CPCAP_REG_BLUEC		0x1014	/* Blue Triode Control */
+#define CPCAP_REG_CFC		0x1018	/* Camera Flash Control */
+#define CPCAP_REG_ABC		0x101c	/* Adaptive Boost Control */
+#define CPCAP_REG_BLEDC		0x1020	/* Bluetooth LED Control */
+#define CPCAP_REG_CLEDC		0x1024	/* Camera Privacy LED Control */
+
+#define CPCAP_REG_OW1C		0x1200	/* One Wire 1 Command */
+#define CPCAP_REG_OW1D		0x1204	/* One Wire 1 Data */
+#define CPCAP_REG_OW1I		0x1208	/* One Wire 1 Interrupt */
+#define CPCAP_REG_OW1IE		0x120c	/* One Wire 1 Interrupt Enable */
+
+#define CPCAP_REG_OW1		0x1214	/* One Wire 1 Control */
+
+#define CPCAP_REG_OW2C		0x1220	/* One Wire 2 Command */
+#define CPCAP_REG_OW2D		0x1224	/* One Wire 2 Data */
+#define CPCAP_REG_OW2I		0x1228	/* One Wire 2 Interrupt */
+#define CPCAP_REG_OW2IE		0x122c	/* One Wire 2 Interrupt Enable */
+
+#define CPCAP_REG_OW2		0x1234	/* One Wire 2 Control */
+
+#define CPCAP_REG_OW3C		0x1240	/* One Wire 3 Command */
+#define CPCAP_REG_OW3D		0x1244	/* One Wire 3 Data */
+#define CPCAP_REG_OW3I		0x1248	/* One Wire 3 Interrupt */
+#define CPCAP_REG_OW3IE		0x124c	/* One Wire 3 Interrupt Enable */
+
+#define CPCAP_REG_OW3		0x1254	/* One Wire 3 Control */
+#define CPCAP_REG_GCAIC		0x1258	/* GCAI Clock Control */
+#define CPCAP_REG_GCAIM		0x125c	/* GCAI GPIO Mode */
+#define CPCAP_REG_LGDIR		0x1260	/* LMR GCAI GPIO Direction */
+#define CPCAP_REG_LGPU		0x1264	/* LMR GCAI GPIO Pull-up */
+#define CPCAP_REG_LGPIN		0x1268	/* LMR GCAI GPIO Pin */
+#define CPCAP_REG_LGMASK	0x126c	/* LMR GCAI GPIO Mask */
+#define CPCAP_REG_LDEB		0x1270	/* LMR Debounce Settings */
+#define CPCAP_REG_LGDET		0x1274	/* LMR GCAI Detach Detect */
+#define CPCAP_REG_LMISC		0x1278	/* LMR Misc Bits */
+#define CPCAP_REG_LMACE		0x127c	/* LMR Mace IC Support */
+
+#define CPCAP_REG_TEST		0x7c00	/* Test */
+
+#define CPCAP_REG_ST_TEST1	0x7d08	/* ST Test1 */
+
+#define CPCAP_REG_ST_TEST2	0x7d18	/* ST Test2 */
+
+/*
+ * Helpers for child devices to check the revision and vendor.
+ *
+ * REVISIT: No documentation for the bits below, please update
+ * to use proper names for defines when available.
+ */
+
+static inline int cpcap_get_revision(struct device *dev,
+				     struct regmap *regmap,
+				     u16 *revision)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(regmap, CPCAP_REG_VERSC1, &val);
+	if (ret) {
+		dev_err(dev, "Could not read revision\n");
+
+		return ret;
+	}
+
+	*revision = ((val >> 3) & 0x7) | ((val << 3) & 0x38);
+
+	return 0;
+}
+
+static inline int cpcap_get_vendor(struct device *dev,
+				   struct regmap *regmap,
+				   u16 *vendor)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(regmap, CPCAP_REG_VERSC1, &val);
+	if (ret) {
+		dev_err(dev, "Could not read vendor\n");
+
+		return ret;
+	}
+
+	*vendor = (val >> 6) & 0x7;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 46bc5c408e4e325efbfff26609c76099979180a7 Mon Sep 17 00:00:00 2001
From: Jungseung Lee <js07.lee@samsung.com>
Date: Thu, 22 Dec 2016 12:37:34 +0900
Subject: mmc: core: Export device lifetime information through sysfs

In the eMMC 5.0 version of the spec, several EXT_CSD fields about
device lifetime are added.

 - Two types of estimated indications reflected by averaged wear out of memory
 - An indication reflected by average reserved blocks

Export the information through sysfs.

Signed-off-by: Jungseung Lee <js07.lee@samsung.com>
Reviewed-by: Jaehoon Chung <jh80.chung@samsung.com>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 3 +++
 include/linux/mmc/mmc.h  | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 95d69d498296..00449e51838a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -121,6 +121,9 @@ struct mmc_ext_csd {
 	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
+	u8			pre_eol_info;		/* 267 */
+	u8			device_life_time_est_typ_a;	/* 268 */
+	u8			device_life_time_est_typ_b;	/* 269 */
 
 	unsigned int            feature_support;
 #define MMC_DISCARD_FEATURE	BIT(0)                  /* CMD38 feature */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 672730acc705..a0740829ff00 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -339,6 +339,9 @@ struct _mmc_csd {
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
 #define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_FIRMWARE_VERSION	254	/* RO, 8 bytes */
+#define EXT_CSD_PRE_EOL_INFO		267	/* RO */
+#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A	268	/* RO */
+#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B	269	/* RO */
 #define EXT_CSD_CMDQ_DEPTH		307	/* RO */
 #define EXT_CSD_CMDQ_SUPPORT		308	/* RO */
 #define EXT_CSD_SUPPORTED_MODE		493	/* RO */
-- 
cgit v1.2.3


From 13f00f9f2460cfa106de6c30512b648d85831468 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:15 +0100
Subject: mmc: Removed the unused public mmc boot.h header

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mmc/boot.h | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 include/linux/mmc/boot.h

(limited to 'include/linux')

diff --git a/include/linux/mmc/boot.h b/include/linux/mmc/boot.h
deleted file mode 100644
index 23acc3baa07d..000000000000
--- a/include/linux/mmc/boot.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef LINUX_MMC_BOOT_H
-#define LINUX_MMC_BOOT_H
-
-enum { MMC_PROGRESS_ENTER, MMC_PROGRESS_INIT,
-       MMC_PROGRESS_LOAD, MMC_PROGRESS_DONE };
-
-#endif /* LINUX_MMC_BOOT_H */
-- 
cgit v1.2.3


From 95cc4df716a210a19f0611215c49484d460250fd Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:16 +0100
Subject: mmc: sh_mmcif: Remove unused use_cd_gpio/cd_gpio from platform data

Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mmc/sh_mmcif.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index ccd8fb2cad52..5ce5a2c1a1f5 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -35,10 +35,8 @@ struct sh_mmcif_plat_data {
 	int (*get_cd)(struct platform_device *pdef);
 	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
-	bool			use_cd_gpio : 1;
 	bool			ccs_unsupported : 1;
 	bool			clk_ctrl2_present : 1;
-	unsigned int		cd_gpio;
 	u8			sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
 	unsigned long		caps;
 	u32			ocr;
-- 
cgit v1.2.3


From 5957eeba530747e9d77daf2f300a186758be51d9 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:17 +0100
Subject: mmc: sh_mmcif: Remove unused ->get_cd() platform callback

Removing the callback also enables us to remove the sh_mmcif_get_cd()
altogether, as we convert to use mmc_gpio_get_cd() to the same kind of
work.

Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/sh_mmcif.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 5ce5a2c1a1f5..7cafc95a2b4e 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -32,7 +32,6 @@
  */
 
 struct sh_mmcif_plat_data {
-	int (*get_cd)(struct platform_device *pdef);
 	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
 	bool			ccs_unsupported : 1;
-- 
cgit v1.2.3


From 8020f71117042ed82287e4f51c48b57ce4c783df Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:18 +0100
Subject: mmc: sh_mmcif: Remove unused ccs_unsupported from the platform data

There are currently no users of the ccs_unsupported member from the
platform data, so let's remove it.

Note, as some of the sh_mmcif variants may not support ccs, let's keep the
current code in the driver, which deals with this. For future support, we
should invent a DT binding instead, but let's leave that until it's needed.

Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mmc/sh_mmcif.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 7cafc95a2b4e..384b86b90473 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -34,7 +34,6 @@
 struct sh_mmcif_plat_data {
 	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
-	bool			ccs_unsupported : 1;
 	bool			clk_ctrl2_present : 1;
 	u8			sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
 	unsigned long		caps;
-- 
cgit v1.2.3


From dba4bb484e9e495478f2bcf474393d33f7e0ec27 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:19 +0100
Subject: mmc: sh_mmcif: Remove unused clk_ctrl2_present from the platform data

There are currently no users of the clk_ctrl2_present member from the
platform data, so let's remove it.

Note, as some of the sh_mmcif variants may support clk_ctrl2, let's keep
the current code in the driver, which deals with this. For future support,
we should invent a DT binding instead, but let's leave that until it's
needed.

Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mmc/sh_mmcif.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 384b86b90473..a7baa29484c3 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -34,7 +34,6 @@
 struct sh_mmcif_plat_data {
 	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
-	bool			clk_ctrl2_present : 1;
 	u8			sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
 	unsigned long		caps;
 	u32			ocr;
-- 
cgit v1.2.3


From 0f21c58c63add705065a75495e7a1860a62470ed Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 30 Dec 2016 13:47:20 +0100
Subject: mmc: dw_mmc: Remove the public dw_mmc header file

There are currently no external users of the public dw_mmc header file,
except the dw_mmc driver itself. Therefore let's move the definitions from
the public dw_mmc header file into the existing private dw_mmc header file
and then remove the public one.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mmc/dw_mmc.h | 274 ---------------------------------------------
 1 file changed, 274 deletions(-)
 delete mode 100644 include/linux/mmc/dw_mmc.h

(limited to 'include/linux')

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
deleted file mode 100644
index 15db6f83f53f..000000000000
--- a/include/linux/mmc/dw_mmc.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Synopsys DesignWare Multimedia Card Interface driver
- *  (Based on NXP driver for lpc 31xx)
- *
- * Copyright (C) 2009 NXP Semiconductors
- * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef LINUX_MMC_DW_MMC_H
-#define LINUX_MMC_DW_MMC_H
-
-#include <linux/scatterlist.h>
-#include <linux/mmc/core.h>
-#include <linux/dmaengine.h>
-#include <linux/reset.h>
-
-#define MAX_MCI_SLOTS	2
-
-enum dw_mci_state {
-	STATE_IDLE = 0,
-	STATE_SENDING_CMD,
-	STATE_SENDING_DATA,
-	STATE_DATA_BUSY,
-	STATE_SENDING_STOP,
-	STATE_DATA_ERROR,
-	STATE_SENDING_CMD11,
-	STATE_WAITING_CMD11_DONE,
-};
-
-enum {
-	EVENT_CMD_COMPLETE = 0,
-	EVENT_XFER_COMPLETE,
-	EVENT_DATA_COMPLETE,
-	EVENT_DATA_ERROR,
-};
-
-enum dw_mci_cookie {
-	COOKIE_UNMAPPED,
-	COOKIE_PRE_MAPPED,	/* mapped by pre_req() of dwmmc */
-	COOKIE_MAPPED,		/* mapped by prepare_data() of dwmmc */
-};
-
-struct mmc_data;
-
-enum {
-	TRANS_MODE_PIO = 0,
-	TRANS_MODE_IDMAC,
-	TRANS_MODE_EDMAC
-};
-
-struct dw_mci_dma_slave {
-	struct dma_chan *ch;
-	enum dma_transfer_direction direction;
-};
-
-/**
- * struct dw_mci - MMC controller state shared between all slots
- * @lock: Spinlock protecting the queue and associated data.
- * @irq_lock: Spinlock protecting the INTMASK setting.
- * @regs: Pointer to MMIO registers.
- * @fifo_reg: Pointer to MMIO registers for data FIFO
- * @sg: Scatterlist entry currently being processed by PIO code, if any.
- * @sg_miter: PIO mapping scatterlist iterator.
- * @cur_slot: The slot which is currently using the controller.
- * @mrq: The request currently being processed on @cur_slot,
- *	or NULL if the controller is idle.
- * @cmd: The command currently being sent to the card, or NULL.
- * @data: The data currently being transferred, or NULL if no data
- *	transfer is in progress.
- * @stop_abort: The command currently prepared for stoping transfer.
- * @prev_blksz: The former transfer blksz record.
- * @timing: Record of current ios timing.
- * @use_dma: Whether DMA channel is initialized or not.
- * @using_dma: Whether DMA is in use for the current transfer.
- * @dma_64bit_address: Whether DMA supports 64-bit address mode or not.
- * @sg_dma: Bus address of DMA buffer.
- * @sg_cpu: Virtual address of DMA buffer.
- * @dma_ops: Pointer to platform-specific DMA callbacks.
- * @cmd_status: Snapshot of SR taken upon completion of the current
- * @ring_size: Buffer size for idma descriptors.
- *	command. Only valid when EVENT_CMD_COMPLETE is pending.
- * @dms: structure of slave-dma private data.
- * @phy_regs: physical address of controller's register map
- * @data_status: Snapshot of SR taken upon completion of the current
- *	data transfer. Only valid when EVENT_DATA_COMPLETE or
- *	EVENT_DATA_ERROR is pending.
- * @stop_cmdr: Value to be loaded into CMDR when the stop command is
- *	to be sent.
- * @dir_status: Direction of current transfer.
- * @tasklet: Tasklet running the request state machine.
- * @pending_events: Bitmask of events flagged by the interrupt handler
- *	to be processed by the tasklet.
- * @completed_events: Bitmask of events which the state machine has
- *	processed.
- * @state: Tasklet state.
- * @queue: List of slots waiting for access to the controller.
- * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus
- *	rate and timeout calculations.
- * @current_speed: Configured rate of the controller.
- * @num_slots: Number of slots available.
- * @fifoth_val: The value of FIFOTH register.
- * @verid: Denote Version ID.
- * @dev: Device associated with the MMC controller.
- * @pdata: Platform data associated with the MMC controller.
- * @drv_data: Driver specific data for identified variant of the controller
- * @priv: Implementation defined private data.
- * @biu_clk: Pointer to bus interface unit clock instance.
- * @ciu_clk: Pointer to card interface unit clock instance.
- * @slot: Slots sharing this MMC controller.
- * @fifo_depth: depth of FIFO.
- * @data_shift: log2 of FIFO item size.
- * @part_buf_start: Start index in part_buf.
- * @part_buf_count: Bytes of partial data in part_buf.
- * @part_buf: Simple buffer for partial fifo reads/writes.
- * @push_data: Pointer to FIFO push function.
- * @pull_data: Pointer to FIFO pull function.
- * @vqmmc_enabled: Status of vqmmc, should be true or false.
- * @irq_flags: The flags to be passed to request_irq.
- * @irq: The irq value to be passed to request_irq.
- * @sdio_id0: Number of slot0 in the SDIO interrupt registers.
- * @cmd11_timer: Timer for SD3.0 voltage switch over scheme.
- * @dto_timer: Timer for broken data transfer over scheme.
- *
- * Locking
- * =======
- *
- * @lock is a softirq-safe spinlock protecting @queue as well as
- * @cur_slot, @mrq and @state. These must always be updated
- * at the same time while holding @lock.
- *
- * @irq_lock is an irq-safe spinlock protecting the INTMASK register
- * to allow the interrupt handler to modify it directly.  Held for only long
- * enough to read-modify-write INTMASK and no other locks are grabbed when
- * holding this one.
- *
- * The @mrq field of struct dw_mci_slot is also protected by @lock,
- * and must always be written at the same time as the slot is added to
- * @queue.
- *
- * @pending_events and @completed_events are accessed using atomic bit
- * operations, so they don't need any locking.
- *
- * None of the fields touched by the interrupt handler need any
- * locking. However, ordering is important: Before EVENT_DATA_ERROR or
- * EVENT_DATA_COMPLETE is set in @pending_events, all data-related
- * interrupts must be disabled and @data_status updated with a
- * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the
- * CMDRDY interrupt must be disabled and @cmd_status updated with a
- * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the
- * bytes_xfered field of @data must be written. This is ensured by
- * using barriers.
- */
-struct dw_mci {
-	spinlock_t		lock;
-	spinlock_t		irq_lock;
-	void __iomem		*regs;
-	void __iomem		*fifo_reg;
-
-	struct scatterlist	*sg;
-	struct sg_mapping_iter	sg_miter;
-
-	struct dw_mci_slot	*cur_slot;
-	struct mmc_request	*mrq;
-	struct mmc_command	*cmd;
-	struct mmc_data		*data;
-	struct mmc_command	stop_abort;
-	unsigned int		prev_blksz;
-	unsigned char		timing;
-
-	/* DMA interface members*/
-	int			use_dma;
-	int			using_dma;
-	int			dma_64bit_address;
-
-	dma_addr_t		sg_dma;
-	void			*sg_cpu;
-	const struct dw_mci_dma_ops	*dma_ops;
-	/* For idmac */
-	unsigned int		ring_size;
-
-	/* For edmac */
-	struct dw_mci_dma_slave *dms;
-	/* Registers's physical base address */
-	resource_size_t		phy_regs;
-
-	u32			cmd_status;
-	u32			data_status;
-	u32			stop_cmdr;
-	u32			dir_status;
-	struct tasklet_struct	tasklet;
-	unsigned long		pending_events;
-	unsigned long		completed_events;
-	enum dw_mci_state	state;
-	struct list_head	queue;
-
-	u32			bus_hz;
-	u32			current_speed;
-	u32			num_slots;
-	u32			fifoth_val;
-	u16			verid;
-	struct device		*dev;
-	struct dw_mci_board	*pdata;
-	const struct dw_mci_drv_data	*drv_data;
-	void			*priv;
-	struct clk		*biu_clk;
-	struct clk		*ciu_clk;
-	struct dw_mci_slot	*slot[MAX_MCI_SLOTS];
-
-	/* FIFO push and pull */
-	int			fifo_depth;
-	int			data_shift;
-	u8			part_buf_start;
-	u8			part_buf_count;
-	union {
-		u16		part_buf16;
-		u32		part_buf32;
-		u64		part_buf;
-	};
-	void (*push_data)(struct dw_mci *host, void *buf, int cnt);
-	void (*pull_data)(struct dw_mci *host, void *buf, int cnt);
-
-	bool			vqmmc_enabled;
-	unsigned long		irq_flags; /* IRQ flags */
-	int			irq;
-
-	int			sdio_id0;
-
-	struct timer_list       cmd11_timer;
-	struct timer_list       dto_timer;
-};
-
-/* DMA ops for Internal/External DMAC interface */
-struct dw_mci_dma_ops {
-	/* DMA Ops */
-	int (*init)(struct dw_mci *host);
-	int (*start)(struct dw_mci *host, unsigned int sg_len);
-	void (*complete)(void *host);
-	void (*stop)(struct dw_mci *host);
-	void (*cleanup)(struct dw_mci *host);
-	void (*exit)(struct dw_mci *host);
-};
-
-struct dma_pdata;
-
-/* Board platform data */
-struct dw_mci_board {
-	u32 num_slots;
-
-	unsigned int bus_hz; /* Clock speed at the cclk_in pad */
-
-	u32 caps;	/* Capabilities */
-	u32 caps2;	/* More capabilities */
-	u32 pm_caps;	/* PM capabilities */
-	/*
-	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
-	 * but note that this may not be reliable after a bootloader has used
-	 * it.
-	 */
-	unsigned int fifo_depth;
-
-	/* delay in mS before detecting cards after interrupt */
-	u32 detect_delay_ms;
-
-	struct reset_control *rstc;
-	struct dw_mci_dma_ops *dma_ops;
-	struct dma_pdata *data;
-};
-
-#endif /* LINUX_MMC_DW_MMC_H */
-- 
cgit v1.2.3


From 8336bf68b57e1e736be8d4ef1f46a789fe7b9bde Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 11 Jan 2017 12:28:15 +0100
Subject: mmc: mxcmmc: Include interrupt.h in the platform data header

The mxcmmc platform data header depends on interrupt.h. Don't rely on the
public mmc header host.h to include it, bud instead make that dependency
explicit.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/platform_data/mmc-mxcmmc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mmc-mxcmmc.h b/include/linux/platform_data/mmc-mxcmmc.h
index 29115f405af9..b0fdaa9bd185 100644
--- a/include/linux/platform_data/mmc-mxcmmc.h
+++ b/include/linux/platform_data/mmc-mxcmmc.h
@@ -1,6 +1,7 @@
 #ifndef ASMARM_ARCH_MMC_H
 #define ASMARM_ARCH_MMC_H
 
+#include <linux/interrupt.h>
 #include <linux/mmc/host.h>
 
 struct device;
-- 
cgit v1.2.3


From 8da007348bf52a91e5137d27d7dcd528edbb80ce Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:06 +0100
Subject: mmc: core: First step in cleaning up public mmc header files

This is the first step in cleaning up the public mmc header files. In this
change we makes sure each header file builds standalone, as that helps to
resolve dependencies.

While changing this, it also seems reasonable to stop including other
headers from inside a header itself which it don't depend upon.
Additionally, in some cases such dependencies are better resolved by
forward declaring the needed struct.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/card.h      | 2 --
 include/linux/mmc/core.h      | 3 +--
 include/linux/mmc/host.h      | 9 ++-------
 include/linux/mmc/mmc.h       | 2 ++
 include/linux/mmc/slot-gpio.h | 3 +++
 5 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 00449e51838a..ca64f5b67983 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -11,7 +11,6 @@
 #define LINUX_MMC_CARD_H
 
 #include <linux/device.h>
-#include <linux/mmc/core.h>
 #include <linux/mod_devicetable.h>
 
 struct mmc_cid {
@@ -206,7 +205,6 @@ struct sdio_cis {
 };
 
 struct mmc_host;
-struct mmc_ios;
 struct sdio_func;
 struct sdio_func_tuple;
 
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index e33cc748dcfe..64e2ddf33f38 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -8,10 +8,9 @@
 #ifndef LINUX_MMC_CORE_H
 #define LINUX_MMC_CORE_H
 
-#include <linux/interrupt.h>
 #include <linux/completion.h>
+#include <linux/types.h>
 
-struct request;
 struct mmc_data;
 struct mmc_request;
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8bc884121465..8d38c76b06d6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -10,16 +10,12 @@
 #ifndef LINUX_MMC_HOST_H
 #define LINUX_MMC_HOST_H
 
-#include <linux/leds.h>
-#include <linux/mutex.h>
-#include <linux/timer.h>
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
 
 #include <linux/mmc/core.h>
 #include <linux/mmc/card.h>
-#include <linux/mmc/mmc.h>
 #include <linux/mmc/pm.h>
 
 struct mmc_ios {
@@ -82,6 +78,8 @@ struct mmc_ios {
 	bool enhanced_strobe;			/* hs400es selection */
 };
 
+struct mmc_host;
+
 struct mmc_host_ops {
 	/*
 	 * It is optional for the host to implement pre_req and post_req in
@@ -162,9 +160,6 @@ struct mmc_host_ops {
 				  unsigned int direction, int blk_size);
 };
 
-struct mmc_card;
-struct device;
-
 struct mmc_async_req {
 	/* active mmc request */
 	struct mmc_request	*mrq;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index a0740829ff00..261772e3effe 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -24,6 +24,8 @@
 #ifndef LINUX_MMC_MMC_H
 #define LINUX_MMC_MMC_H
 
+#include <linux/types.h>
+
 /* Standard MMC commands (4.1)           type  argument     response */
    /* class 1 */
 #define MMC_GO_IDLE_STATE         0   /* bc                          */
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index a7972cd3bc14..82f0d289f110 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -11,6 +11,9 @@
 #ifndef MMC_SLOT_GPIO_H
 #define MMC_SLOT_GPIO_H
 
+#include <linux/types.h>
+#include <linux/irqreturn.h>
+
 struct mmc_host;
 
 int mmc_gpio_get_ro(struct mmc_host *host);
-- 
cgit v1.2.3


From 55244c5659b5e73a969b285a2e763223d8aab979 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:08 +0100
Subject: mmc: core: Move public functions from core.h to private headers

A significant amount of functions are available through the public mmc
core.h header file. Let's slim down this public mmc interface, as to
prevent users from abusing it, by moving some of the functions to private
mmc header files.

This change concentrates on moving the functions into private mmc headers,
following changes may continue with additional clean-ups, as an example
some functions can be turned into static.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/core.h | 50 ------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 64e2ddf33f38..e679a8678594 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -158,25 +158,13 @@ struct mmc_request {
 struct mmc_card;
 struct mmc_async_req;
 
-extern int mmc_stop_bkops(struct mmc_card *);
-extern int mmc_read_bkops_status(struct mmc_card *);
 extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
 					   struct mmc_async_req *,
 					   enum mmc_blk_status *);
-extern int mmc_interrupt_hpi(struct mmc_card *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
-extern void mmc_wait_for_req_done(struct mmc_host *host,
-				  struct mmc_request *mrq);
-extern bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
-extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
-extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
-	struct mmc_command *, int);
-extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
-extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
-extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
@@ -188,46 +176,8 @@ extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 #define MMC_SECURE_ARGS		0x80000000
 #define MMC_TRIM_ARGS		0x00008001
 
-extern int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
-		     unsigned int arg);
-extern int mmc_can_erase(struct mmc_card *card);
-extern int mmc_can_trim(struct mmc_card *card);
-extern int mmc_can_discard(struct mmc_card *card);
-extern int mmc_can_sanitize(struct mmc_card *card);
-extern int mmc_can_secure_erase_trim(struct mmc_card *card);
-extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
-				   unsigned int nr);
-extern unsigned int mmc_calc_max_discard(struct mmc_card *card);
-
-extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen);
-extern int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount,
-			      bool is_rel_write);
 extern int mmc_hw_reset(struct mmc_host *host);
-extern int mmc_can_reset(struct mmc_card *card);
-
 extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
-extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
-
-extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
-extern void mmc_release_host(struct mmc_host *host);
-
-extern void mmc_get_card(struct mmc_card *card);
-extern void mmc_put_card(struct mmc_card *card);
-
-extern int mmc_flush_cache(struct mmc_card *);
-
-extern int mmc_detect_card_removed(struct mmc_host *host);
-
-/**
- *	mmc_claim_host - exclusively claim a host
- *	@host: mmc host to claim
- *
- *	Claim a host for a set of operations.
- */
-static inline void mmc_claim_host(struct mmc_host *host)
-{
-	__mmc_claim_host(host, NULL);
-}
 
 struct device_node;
 extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
-- 
cgit v1.2.3


From 9e1bbc72727506c173cc6ed1d704cb2d7911d9d5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:09 +0100
Subject: mmc: core: Move some host specific public functions to host.h

Ideally the public mmc header file, core.h, shouldn't contain interfaces
particularly intended to be used by host drivers. Instead those should
remain in the host.h header file. Therefore, let's move a couple functions
from core.h to host.h.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/core.h | 6 ------
 include/linux/mmc/host.h | 6 ++++++
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index e679a8678594..faacc90f8711 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -163,8 +163,6 @@ extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
 					   enum mmc_blk_status *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
-extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
-extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
@@ -179,8 +177,4 @@ extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
 extern int mmc_hw_reset(struct mmc_host *host);
 extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
 
-struct device_node;
-extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
-extern int mmc_of_parse_voltage(struct device_node *np, u32 *mask);
-
 #endif /* LINUX_MMC_CORE_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8d38c76b06d6..7de05195bff6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -392,11 +392,14 @@ struct mmc_host {
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
+struct device_node;
+
 struct mmc_host *mmc_alloc_host(int extra, struct device *);
 int mmc_add_host(struct mmc_host *);
 void mmc_remove_host(struct mmc_host *);
 void mmc_free_host(struct mmc_host *);
 int mmc_of_parse(struct mmc_host *host);
+int mmc_of_parse_voltage(struct device_node *np, u32 *mask);
 
 static inline void *mmc_priv(struct mmc_host *host)
 {
@@ -452,6 +455,7 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc,
 }
 #endif
 
+u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
 int mmc_regulator_get_supply(struct mmc_host *mmc);
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
@@ -538,6 +542,8 @@ static inline bool mmc_can_retune(struct mmc_host *host)
 	return host->can_retune == 1;
 }
 
+int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
+int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
 void mmc_retune_pause(struct mmc_host *host);
 void mmc_retune_unpause(struct mmc_host *host);
 
-- 
cgit v1.2.3


From c0a3e080f9294117a7d510a4f01a0b7c6dbcadae Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:10 +0100
Subject: mmc: core: Move erase/trim/discard defines from public core.h to
 mmc.h

As the public mmc.h header already contains similar defines for other mmc
commands and arguments, let's move those for erase/trim/discard into here
as well.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/core.h | 10 ----------
 include/linux/mmc/mmc.h  | 13 ++++++++++++-
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index faacc90f8711..6440e10c86cd 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -164,16 +164,6 @@ extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
 
-#define MMC_ERASE_ARG		0x00000000
-#define MMC_SECURE_ERASE_ARG	0x80000000
-#define MMC_TRIM_ARG		0x00000001
-#define MMC_DISCARD_ARG		0x00000003
-#define MMC_SECURE_TRIM1_ARG	0x80000001
-#define MMC_SECURE_TRIM2_ARG	0x80008000
-
-#define MMC_SECURE_ARGS		0x80000000
-#define MMC_TRIM_ARGS		0x00008001
-
 extern int mmc_hw_reset(struct mmc_host *host);
 extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 261772e3effe..8f7854324d2b 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -462,12 +462,23 @@ struct _mmc_csd {
 /*
  * MMC_SWITCH access modes
  */
-
 #define MMC_SWITCH_MODE_CMD_SET		0x00	/* Change the command set */
 #define MMC_SWITCH_MODE_SET_BITS	0x01	/* Set bits which are 1 in value */
 #define MMC_SWITCH_MODE_CLEAR_BITS	0x02	/* Clear bits which are 1 in value */
 #define MMC_SWITCH_MODE_WRITE_BYTE	0x03	/* Set target to value */
 
+/*
+ * Erase/trim/discard
+ */
+#define MMC_ERASE_ARG			0x00000000
+#define MMC_SECURE_ERASE_ARG		0x80000000
+#define MMC_TRIM_ARG			0x00000001
+#define MMC_DISCARD_ARG			0x00000003
+#define MMC_SECURE_TRIM1_ARG		0x80000001
+#define MMC_SECURE_TRIM2_ARG		0x80008000
+#define MMC_SECURE_ARGS			0x80000000
+#define MMC_TRIM_ARGS			0x00008001
+
 #define mmc_driver_type_mask(n)		(1 << (n))
 
 #endif /* LINUX_MMC_MMC_H */
-- 
cgit v1.2.3


From 009b0fcea5dfe77663f7ec3927c7c50a97a7675c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:11 +0100
Subject: mmc: core: Remove unused struct _mmc_csd from public mmc.h header

The struct _mmc_csd isn't being used and has been lurking around for a
while. Let's kill it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/mmc.h | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 8f7854324d2b..7406d9badda0 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -184,50 +184,6 @@ static inline bool mmc_op_multi(u32 opcode)
 #define R2_SPI_OUT_OF_RANGE	(1 << 15)	/* or CSD overwrite */
 #define R2_SPI_CSD_OVERWRITE	R2_SPI_OUT_OF_RANGE
 
-/* These are unpacked versions of the actual responses */
-
-struct _mmc_csd {
-	u8  csd_structure;
-	u8  spec_vers;
-	u8  taac;
-	u8  nsac;
-	u8  tran_speed;
-	u16 ccc;
-	u8  read_bl_len;
-	u8  read_bl_partial;
-	u8  write_blk_misalign;
-	u8  read_blk_misalign;
-	u8  dsr_imp;
-	u16 c_size;
-	u8  vdd_r_curr_min;
-	u8  vdd_r_curr_max;
-	u8  vdd_w_curr_min;
-	u8  vdd_w_curr_max;
-	u8  c_size_mult;
-	union {
-		struct { /* MMC system specification version 3.1 */
-			u8  erase_grp_size;
-			u8  erase_grp_mult;
-		} v31;
-		struct { /* MMC system specification version 2.2 */
-			u8  sector_size;
-			u8  erase_grp_size;
-		} v22;
-	} erase;
-	u8  wp_grp_size;
-	u8  wp_grp_enable;
-	u8  default_ecc;
-	u8  r2w_factor;
-	u8  write_bl_len;
-	u8  write_bl_partial;
-	u8  file_format_grp;
-	u8  copy;
-	u8  perm_write_protect;
-	u8  tmp_write_protect;
-	u8  file_format;
-	u8  ecc;
-};
-
 /*
  * OCR bits are mostly in host.h
  */
-- 
cgit v1.2.3


From 4facdde11394d44b3869807841042d059f074a07 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:14 +0100
Subject: mmc: core: Move public functions from card.h to private headers

A significant amount of functions and other definitions are available
through the public mmc card.h header file. Let's slim down this public mmc
interface, as to prevent users from abusing it, by moving some of the
functions/definitions to private mmc header files.

This change concentrates on moving the functions into private mmc headers,
following changes may continue with additional clean-ups.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/card.h | 240 -----------------------------------------------
 1 file changed, 240 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index ca64f5b67983..29d00c91c25c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -248,13 +248,6 @@ struct mmc_card {
 #define MMC_TYPE_SDIO		2		/* SDIO card */
 #define MMC_TYPE_SD_COMBO	3		/* SD combo (IO+mem) card */
 	unsigned int		state;		/* (our) card state */
-#define MMC_STATE_PRESENT	(1<<0)		/* present in sysfs */
-#define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
-#define MMC_STATE_BLOCKADDR	(1<<2)		/* card uses block-addressing */
-#define MMC_CARD_SDXC		(1<<3)		/* card is SDXC */
-#define MMC_CARD_REMOVED	(1<<4)		/* card has been removed */
-#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED	(1<<6)		/* card is suspended */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -273,7 +266,6 @@ struct mmc_card {
 #define MMC_QUIRK_TRIM_BROKEN	(1<<12)		/* Skip trim */
 #define MMC_QUIRK_BROKEN_HPI	(1<<13)		/* Disable broken HPI support */
 
-
 	unsigned int		erase_size;	/* erase size in sectors */
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
  	unsigned int		pref_erase;	/* in sectors */
@@ -309,245 +301,13 @@ struct mmc_card {
 	unsigned int    nr_parts;
 };
 
-/*
- * This function fill contents in mmc_part.
- */
-static inline void mmc_part_add(struct mmc_card *card, unsigned int size,
-			unsigned int part_cfg, char *name, int idx, bool ro,
-			int area_type)
-{
-	card->part[card->nr_parts].size = size;
-	card->part[card->nr_parts].part_cfg = part_cfg;
-	sprintf(card->part[card->nr_parts].name, name, idx);
-	card->part[card->nr_parts].force_ro = ro;
-	card->part[card->nr_parts].area_type = area_type;
-	card->nr_parts++;
-}
-
 static inline bool mmc_large_sector(struct mmc_card *card)
 {
 	return card->ext_csd.data_sector_size == 4096;
 }
 
-/*
- *  The world is not perfect and supplies us with broken mmc/sdio devices.
- *  For at least some of these bugs we need a work-around.
- */
-
-struct mmc_fixup {
-	/* CID-specific fields. */
-	const char *name;
-
-	/* Valid revision range */
-	u64 rev_start, rev_end;
-
-	unsigned int manfid;
-	unsigned short oemid;
-
-	/* SDIO-specfic fields. You can use SDIO_ANY_ID here of course */
-	u16 cis_vendor, cis_device;
-
-	/* for MMC cards */
-	unsigned int ext_csd_rev;
-
-	void (*vendor_fixup)(struct mmc_card *card, int data);
-	int data;
-};
-
-#define CID_MANFID_ANY (-1u)
-#define CID_OEMID_ANY ((unsigned short) -1)
-#define CID_NAME_ANY (NULL)
-
-#define EXT_CSD_REV_ANY (-1u)
-
-#define CID_MANFID_SANDISK      0x2
-#define CID_MANFID_TOSHIBA      0x11
-#define CID_MANFID_MICRON       0x13
-#define CID_MANFID_SAMSUNG      0x15
-#define CID_MANFID_KINGSTON     0x70
-#define CID_MANFID_HYNIX	0x90
-
-#define END_FIXUP { NULL }
-
-#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
-		   _cis_vendor, _cis_device,				\
-		   _fixup, _data, _ext_csd_rev)				\
-	{						   \
-		.name = (_name),			   \
-		.manfid = (_manfid),			   \
-		.oemid = (_oemid),			   \
-		.rev_start = (_rev_start),		   \
-		.rev_end = (_rev_end),			   \
-		.cis_vendor = (_cis_vendor),		   \
-		.cis_device = (_cis_device),		   \
-		.vendor_fixup = (_fixup),		   \
-		.data = (_data),			   \
-		.ext_csd_rev = (_ext_csd_rev),		   \
-	 }
-
-#define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end,	\
-		      _fixup, _data, _ext_csd_rev)			\
-	_FIXUP_EXT(_name, _manfid,					\
-		   _oemid, _rev_start, _rev_end,			\
-		   SDIO_ANY_ID, SDIO_ANY_ID,				\
-		   _fixup, _data, _ext_csd_rev)				\
-
-#define MMC_FIXUP(_name, _manfid, _oemid, _fixup, _data) \
-	MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data,	\
-		      EXT_CSD_REV_ANY)
-
-#define MMC_FIXUP_EXT_CSD_REV(_name, _manfid, _oemid, _fixup, _data,	\
-			      _ext_csd_rev)				\
-	MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data,	\
-		      _ext_csd_rev)
-
-#define SDIO_FIXUP(_vendor, _device, _fixup, _data)			\
-	_FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY,			\
-		    CID_OEMID_ANY, 0, -1ull,				\
-		   _vendor, _device,					\
-		   _fixup, _data, EXT_CSD_REV_ANY)			\
-
-#define cid_rev(hwrev, fwrev, year, month)	\
-	(((u64) hwrev) << 40 |                  \
-	 ((u64) fwrev) << 32 |                  \
-	 ((u64) year) << 16 |                   \
-	 ((u64) month))
-
-#define cid_rev_card(card)		  \
-	cid_rev(card->cid.hwrev,	  \
-		    card->cid.fwrev,      \
-		    card->cid.year,	  \
-		    card->cid.month)
-
-/*
- * Unconditionally quirk add/remove.
- */
-
-static inline void __maybe_unused add_quirk(struct mmc_card *card, int data)
-{
-	card->quirks |= data;
-}
-
-static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
-{
-	card->quirks &= ~data;
-}
-
 #define mmc_card_mmc(c)		((c)->type == MMC_TYPE_MMC)
 #define mmc_card_sd(c)		((c)->type == MMC_TYPE_SD)
 #define mmc_card_sdio(c)	((c)->type == MMC_TYPE_SDIO)
 
-#define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
-#define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
-#define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
-#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
-#define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
-#define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
-#define mmc_card_suspended(c)	((c)->state & MMC_STATE_SUSPENDED)
-
-#define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
-#define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
-#define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
-#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
-#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
-#define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)
-#define mmc_card_clr_doing_bkops(c)	((c)->state &= ~MMC_STATE_DOING_BKOPS)
-#define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED)
-#define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED)
-
-/*
- * Quirk add/remove for MMC products.
- */
-
-static inline void __maybe_unused add_quirk_mmc(struct mmc_card *card, int data)
-{
-	if (mmc_card_mmc(card))
-		card->quirks |= data;
-}
-
-static inline void __maybe_unused remove_quirk_mmc(struct mmc_card *card,
-						   int data)
-{
-	if (mmc_card_mmc(card))
-		card->quirks &= ~data;
-}
-
-/*
- * Quirk add/remove for SD products.
- */
-
-static inline void __maybe_unused add_quirk_sd(struct mmc_card *card, int data)
-{
-	if (mmc_card_sd(card))
-		card->quirks |= data;
-}
-
-static inline void __maybe_unused remove_quirk_sd(struct mmc_card *card,
-						   int data)
-{
-	if (mmc_card_sd(card))
-		card->quirks &= ~data;
-}
-
-static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_LENIENT_FN0;
-}
-
-static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
-}
-
-static inline int mmc_card_disable_cd(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_DISABLE_CD;
-}
-
-static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF;
-}
-
-static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512;
-}
-
-static inline int mmc_card_long_read_time(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_LONG_READ_TIME;
-}
-
-static inline int mmc_card_broken_irq_polling(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_BROKEN_IRQ_POLLING;
-}
-
-static inline int mmc_card_broken_hpi(const struct mmc_card *c)
-{
-	return c->quirks & MMC_QUIRK_BROKEN_HPI;
-}
-
-#define mmc_card_name(c)	((c)->cid.prod_name)
-#define mmc_card_id(c)		(dev_name(&(c)->dev))
-
-#define mmc_dev_to_card(d)	container_of(d, struct mmc_card, dev)
-
-/*
- * MMC device driver (e.g., Flash card, I/O card...)
- */
-struct mmc_driver {
-	struct device_driver drv;
-	int (*probe)(struct mmc_card *);
-	void (*remove)(struct mmc_card *);
-	void (*shutdown)(struct mmc_card *);
-};
-
-extern int mmc_register_driver(struct mmc_driver *);
-extern void mmc_unregister_driver(struct mmc_driver *);
-
-extern void mmc_fixup_device(struct mmc_card *card,
-			     const struct mmc_fixup *table);
-
 #endif /* LINUX_MMC_CARD_H */
-- 
cgit v1.2.3


From 5857b29b96dcf208e4903ec6f20d132e6a77cac2 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:15 +0100
Subject: mmc: core: Move public functions from host.h to private headers

A significant amount of functions are available through the public mmc
host.h header file. Let's slim down this public mmc interface, as to
prevent users from abusing it, by moving some of the functions to private
mmc host.h header file.

This change concentrates on moving the functions into private mmc headers,
following changes may continue with additional clean-ups.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/host.h | 48 ++----------------------------------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7de05195bff6..97699d55b2ae 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -473,56 +473,20 @@ static inline int mmc_card_wake_sdio_irq(struct mmc_host *host)
 	return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ;
 }
 
-static inline int mmc_host_cmd23(struct mmc_host *host)
-{
-	return host->caps & MMC_CAP_CMD23;
-}
-
-static inline int mmc_boot_partition_access(struct mmc_host *host)
-{
-	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
-}
-
-static inline int mmc_host_uhs(struct mmc_host *host)
-{
-	return host->caps &
-		(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-		 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
-		 MMC_CAP_UHS_DDR50);
-}
-
+/* TODO: Move to private header */
 static inline int mmc_card_hs(struct mmc_card *card)
 {
 	return card->host->ios.timing == MMC_TIMING_SD_HS ||
 		card->host->ios.timing == MMC_TIMING_MMC_HS;
 }
 
+/* TODO: Move to private header */
 static inline int mmc_card_uhs(struct mmc_card *card)
 {
 	return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 &&
 		card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
 }
 
-static inline bool mmc_card_hs200(struct mmc_card *card)
-{
-	return card->host->ios.timing == MMC_TIMING_MMC_HS200;
-}
-
-static inline bool mmc_card_ddr52(struct mmc_card *card)
-{
-	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
-}
-
-static inline bool mmc_card_hs400(struct mmc_card *card)
-{
-	return card->host->ios.timing == MMC_TIMING_MMC_HS400;
-}
-
-static inline bool mmc_card_hs400es(struct mmc_card *card)
-{
-	return card->host->ios.enhanced_strobe;
-}
-
 void mmc_retune_timer_stop(struct mmc_host *host);
 
 static inline void mmc_retune_needed(struct mmc_host *host)
@@ -531,12 +495,6 @@ static inline void mmc_retune_needed(struct mmc_host *host)
 		host->need_retune = 1;
 }
 
-static inline void mmc_retune_recheck(struct mmc_host *host)
-{
-	if (host->hold_retune <= 1)
-		host->retune_now = 1;
-}
-
 static inline bool mmc_can_retune(struct mmc_host *host)
 {
 	return host->can_retune == 1;
@@ -544,7 +502,5 @@ static inline bool mmc_can_retune(struct mmc_host *host)
 
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
-void mmc_retune_pause(struct mmc_host *host);
-void mmc_retune_unpause(struct mmc_host *host);
 
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 23888bfe9bd730d42e326cc5f6f1188beb948f97 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 13 Jan 2017 14:14:16 +0100
Subject: mmc: core: Don't use extern declarations of public mmc functions

Using extern when declaring functions in the public header, core.h, is
redundant. Let's just remove the use of it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
---
 include/linux/mmc/core.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 6440e10c86cd..6dcb339fcd45 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -158,13 +158,14 @@ struct mmc_request {
 struct mmc_card;
 struct mmc_async_req;
 
-extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
-					   struct mmc_async_req *,
-					   enum mmc_blk_status *);
-extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
-extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
-
-extern int mmc_hw_reset(struct mmc_host *host);
-extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
+struct mmc_async_req *mmc_start_req(struct mmc_host *host,
+				struct mmc_async_req *areq,
+				enum mmc_blk_status *ret_stat);
+void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq);
+int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd,
+		int retries);
+
+int mmc_hw_reset(struct mmc_host *host);
+void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card);
 
 #endif /* LINUX_MMC_CORE_H */
-- 
cgit v1.2.3


From 20dd03734cac41a0545dd24f5e81d8ff0c80874b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 19 Jan 2017 21:07:17 +0100
Subject: mmc: host: tmio: SDIO_STATUS_QUIRK is rather SDIO_STATUS_SETBITS

QUIRK sounds like there is something wrong, but actually there are just
some bits which need to be 1. Rename it to be more clear.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mfd/tmio.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index fba44abd05ba..a1520d88ebf3 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -94,10 +94,8 @@
  */
 #define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
 
-/*
- * Some controllers needs to set 1 on SDIO status reserved bits
- */
-#define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
+/* Controller has some SDIO status bits which must be 1 */
+#define TMIO_MMC_SDIO_STATUS_SETBITS	(1 << 8)
 
 /*
  * Some controllers have a 32-bit wide data port register
-- 
cgit v1.2.3


From 20f921bb01140207983e9fc3af4bb60f1f9fc0d5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 25 Jan 2017 12:04:17 +0100
Subject: mmc: core: Invent MMC_CAP_3_3V_DDR

According the JEDEC specification an eMMC card supporting 1.8V vccq in DDR
mode should also be capable of 3.3V. However, it's been reported that some
mmc hosts supports 3.3V, but not 1.8V.

Currently the mmc core implements an error handling when the host fails to
set 1.8V for vccq, by falling back to 3.3V. Unfortunate, this seems to be
insufficient for some mmc hosts. To enable these to use eMMC DDR mode let's
invent a new mmc cap, MMC_CAP_3_3V_DDR, which tells whether they support
the eMMC 3.3V DDR mode.

In case MMC_CAP_3_3V_DDR is set, but not MMC_CAP_1_8V_DDR, let's change to
remain on the 3.3V, as it's the default voltage level for vccq, set by the
earlier power up sequence.

As this change introduces MMC_CAP_3_3V_DDR, let's take the opportunity to
do some re-formatting of the related defines in the header file.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Jan Glauber <jglauber@cavium.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
---
 include/linux/mmc/host.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 97699d55b2ae..83f1c4a9f03b 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -259,17 +259,16 @@ struct mmc_host {
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 #define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 #define MMC_CAP_ERASE		(1 << 10)	/* Allow erase/trim commands */
-#define MMC_CAP_1_8V_DDR	(1 << 11)	/* can support */
-						/* DDR mode at 1.8V */
-#define MMC_CAP_1_2V_DDR	(1 << 12)	/* can support */
-						/* DDR mode at 1.2V */
-#define MMC_CAP_POWER_OFF_CARD	(1 << 13)	/* Can power off after boot */
-#define MMC_CAP_BUS_WIDTH_TEST	(1 << 14)	/* CMD14/CMD19 bus width ok */
-#define MMC_CAP_UHS_SDR12	(1 << 15)	/* Host supports UHS SDR12 mode */
-#define MMC_CAP_UHS_SDR25	(1 << 16)	/* Host supports UHS SDR25 mode */
-#define MMC_CAP_UHS_SDR50	(1 << 17)	/* Host supports UHS SDR50 mode */
-#define MMC_CAP_UHS_SDR104	(1 << 18)	/* Host supports UHS SDR104 mode */
-#define MMC_CAP_UHS_DDR50	(1 << 19)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_3_3V_DDR	(1 << 11)	/* Host supports eMMC DDR 3.3V */
+#define MMC_CAP_1_8V_DDR	(1 << 12)	/* Host supports eMMC DDR 1.8V */
+#define MMC_CAP_1_2V_DDR	(1 << 13)	/* Host supports eMMC DDR 1.2V */
+#define MMC_CAP_POWER_OFF_CARD	(1 << 14)	/* Can power off after boot */
+#define MMC_CAP_BUS_WIDTH_TEST	(1 << 15)	/* CMD14/CMD19 bus width ok */
+#define MMC_CAP_UHS_SDR12	(1 << 16)	/* Host supports UHS SDR12 mode */
+#define MMC_CAP_UHS_SDR25	(1 << 17)	/* Host supports UHS SDR25 mode */
+#define MMC_CAP_UHS_SDR50	(1 << 18)	/* Host supports UHS SDR50 mode */
+#define MMC_CAP_UHS_SDR104	(1 << 19)	/* Host supports UHS SDR104 mode */
+#define MMC_CAP_UHS_DDR50	(1 << 20)	/* Host supports UHS DDR50 mode */
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
-- 
cgit v1.2.3


From c3399ef55d8e8295293808eba32e3f7056526324 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 1 Feb 2017 13:47:53 +0100
Subject: mmc: core: rename mmc_start_req() to *areq()

With the coexisting __mmc_start_request(), mmc_start_request()
and __mmc_start_req() it is a bit confusing that mmc_start_req()
actually does not start a normal request, but an asynchronous
request.

Rename it to mmc_start_areq() to make it explicit what the
function is doing, also fix the kerneldoc for this function
while we're at it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 6dcb339fcd45..a0c63ea28796 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -158,7 +158,7 @@ struct mmc_request {
 struct mmc_card;
 struct mmc_async_req;
 
-struct mmc_async_req *mmc_start_req(struct mmc_host *host,
+struct mmc_async_req *mmc_start_areq(struct mmc_host *host,
 				struct mmc_async_req *areq,
 				enum mmc_blk_status *ret_stat);
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq);
-- 
cgit v1.2.3


From 30b888ba950d9f77326b50a4aa2d7d99557d5718 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 09:55:20 -0500
Subject: radix-tree: Add radix_tree_iter_tag_clear()

The counterpart to radix_tree_iter_tag_set(), used by the IDR code

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 include/linux/radix-tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 32683c7c2e0d..8bf4ef448ce1 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -332,7 +332,9 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
+void radix_tree_iter_tag_set(struct radix_tree_root *,
+		const struct radix_tree_iter *iter, unsigned int tag);
+void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int
 radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
-- 
cgit v1.2.3


From 0ac398ef391b53122976325ab6953456ce8e8310 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 09:56:22 -0500
Subject: radix-tree: Add radix_tree_iter_delete

Factor the deletion code out into __radix_tree_delete() and provide a
nice iterator-based wrapper around it.  If we free the node, advance
the iterator to avoid reading from freed memory.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 8bf4ef448ce1..05f715cb8062 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -311,6 +311,8 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node,
 			      radix_tree_update_node_t update_node,
 			      void *private);
+void radix_tree_iter_delete(struct radix_tree_root *,
+				struct radix_tree_iter *iter, void **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 void radix_tree_clear_tags(struct radix_tree_root *root,
-- 
cgit v1.2.3


From ca86cad7380e373fa17bc0ee8aff121380323e69 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Sat, 4 Feb 2017 13:10:38 -0500
Subject: audit: log module name on init_module

This adds a new auxiliary record MODULE_INIT to the SYSCALL event.

We get finit_module for free since it made most sense to hook this in to
load_module().

https://github.com/linux-audit/audit-kernel/issues/7
https://github.com/linux-audit/audit-kernel/wiki/RFE-Module-Load-Record-Format

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
[PM: corrected links in the commit description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2be99b276d29..aba3a2684300 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -360,6 +360,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *old);
 extern void __audit_log_capset(const struct cred *new, const struct cred *old);
 extern void __audit_mmap_fd(int fd, int flags);
+extern void __audit_log_kern_module(char *name);
 
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -450,6 +451,12 @@ static inline void audit_mmap_fd(int fd, int flags)
 		__audit_mmap_fd(fd, flags);
 }
 
+static inline void audit_log_kern_module(char *name)
+{
+	if (!audit_dummy_context())
+		__audit_log_kern_module(name);
+}
+
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
@@ -561,6 +568,11 @@ static inline void audit_log_capset(const struct cred *new,
 { }
 static inline void audit_mmap_fd(int fd, int flags)
 { }
+
+static inline void audit_log_kern_module(char *name)
+{
+}
+
 static inline void audit_ptrace(struct task_struct *t)
 { }
 #define audit_n_rules 0
-- 
cgit v1.2.3


From 251af29c320d86071664f02c76f0d063a19fefdf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 11 Feb 2017 10:37:38 -0500
Subject: nlm: Ensure callback code also checks that the files match

It is not sufficient to just check that the lock pids match when
granting a callback, we also need to ensure that we're granting
the callback on the right file.

Reported-by: Pankaj Singh <psingh.ait@gmail.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/lockd/lockd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c15373894a42..b37dee3acaba 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -355,7 +355,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 static inline int nlm_compare_locks(const struct file_lock *fl1,
 				    const struct file_lock *fl2)
 {
-	return	fl1->fl_pid   == fl2->fl_pid
+	return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
+	     && fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_owner == fl2->fl_owner
 	     && fl1->fl_start == fl2->fl_start
 	     && fl1->fl_end   == fl2->fl_end
-- 
cgit v1.2.3


From 0a835c4f090af2c76fc2932c539c3b32fd21fbbb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Tue, 20 Dec 2016 10:27:56 -0500
Subject: Reimplement IDR and IDA using the radix tree

The IDR is very similar to the radix tree.  It has some functionality that
the radix tree did not have (alloc next free, cyclic allocation, a
callback-based for_each, destroy tree), which is readily implementable on
top of the radix tree.  A few small changes were needed in order to use a
tag to represent nodes with free space below them.  More extensive
changes were needed to support storing NULL as a valid entry in an IDR.
Plain radix trees still interpret NULL as a not-present entry.

The IDA is reimplemented as a client of the newly enhanced radix tree.  As
in the current implementation, it uses a bitmap at the last level of the
tree.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/idr.h        | 145 ++++++++++++++++++++-------------------------
 include/linux/radix-tree.h |  49 +++++++++++++--
 2 files changed, 110 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3c01b89aed67..f58c0a3addc3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -12,47 +12,28 @@
 #ifndef __IDR_H__
 #define __IDR_H__
 
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
+#include <linux/radix-tree.h>
+#include <linux/gfp.h>
+
+struct idr {
+	struct radix_tree_root	idr_rt;
+	unsigned int		idr_next;
+};
 
 /*
- * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
- * 8 bits only gave us 3 layers out of every pair of pages, which is less
- * efficient except for trees with a largest element between 192-255 inclusive.
+ * The IDR API does not expose the tagging functionality of the radix tree
+ * to users.  Use tag 0 to track whether a node has free space below it.
  */
-#define IDR_BITS 6
-#define IDR_SIZE (1 << IDR_BITS)
-#define IDR_MASK ((1 << IDR_BITS)-1)
-
-struct idr_layer {
-	int			prefix;	/* the ID prefix of this idr_layer */
-	int			layer;	/* distance from leaf */
-	struct idr_layer __rcu	*ary[1<<IDR_BITS];
-	int			count;	/* When zero, we can release it */
-	union {
-		/* A zero bit means "space here" */
-		DECLARE_BITMAP(bitmap, IDR_SIZE);
-		struct rcu_head		rcu_head;
-	};
-};
+#define IDR_FREE	0
 
-struct idr {
-	struct idr_layer __rcu	*hint;	/* the last layer allocated from */
-	struct idr_layer __rcu	*top;
-	int			layers;	/* only valid w/o concurrent changes */
-	int			cur;	/* current pos for cyclic allocation */
-	spinlock_t		lock;
-	int			id_free_cnt;
-	struct idr_layer	*id_free;
-};
+/* Set the IDR flag and the IDR_FREE tag */
+#define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
 
-#define IDR_INIT(name)							\
+#define IDR_INIT							\
 {									\
-	.lock			= __SPIN_LOCK_UNLOCKED(name.lock),	\
+	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)			\
 }
-#define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
+#define DEFINE_IDR(name)	struct idr name = IDR_INIT
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -62,9 +43,9 @@ struct idr {
  * idr_alloc_cyclic() if it is free (otherwise the search will start from
  * this position).
  */
-static inline unsigned int idr_get_cursor(struct idr *idr)
+static inline unsigned int idr_get_cursor(const struct idr *idr)
 {
-	return READ_ONCE(idr->cur);
+	return READ_ONCE(idr->idr_next);
 }
 
 /**
@@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
  */
 static inline void idr_set_cursor(struct idr *idr, unsigned int val)
 {
-	WRITE_ONCE(idr->cur, val);
+	WRITE_ONCE(idr->idr_next, val);
 }
 
 /**
@@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
-/*
- * This is what we export.
- */
-
-void *idr_find_slowpath(struct idr *idp, int id);
 void idr_preload(gfp_t gfp_mask);
-int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_for_each(struct idr *idp,
+int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_for_each(const struct idr *,
 		 int (*fn)(int id, void *p, void *data), void *data);
-void *idr_get_next(struct idr *idp, int *nextid);
-void *idr_replace(struct idr *idp, void *ptr, int id);
-void idr_remove(struct idr *idp, int id);
-void idr_destroy(struct idr *idp);
-void idr_init(struct idr *idp);
-bool idr_is_empty(struct idr *idp);
+void *idr_get_next(struct idr *, int *nextid);
+void *idr_replace(struct idr *, void *, int id);
+void idr_destroy(struct idr *);
+
+static inline void idr_remove(struct idr *idr, int id)
+{
+	radix_tree_delete(&idr->idr_rt, id);
+}
+
+static inline void idr_init(struct idr *idr)
+{
+	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+	idr->idr_next = 0;
+}
+
+static inline bool idr_is_empty(const struct idr *idr)
+{
+	return radix_tree_empty(&idr->idr_rt) &&
+		radix_tree_tagged(&idr->idr_rt, IDR_FREE);
+}
 
 /**
  * idr_preload_end - end preload section started with idr_preload()
@@ -137,19 +127,14 @@ static inline void idr_preload_end(void)
  * This function can be called under rcu_read_lock(), given that the leaf
  * pointers lifetimes are correctly managed.
  */
-static inline void *idr_find(struct idr *idr, int id)
+static inline void *idr_find(const struct idr *idr, int id)
 {
-	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
-
-	if (hint && (id & ~IDR_MASK) == hint->prefix)
-		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
-
-	return idr_find_slowpath(idr, id);
+	return radix_tree_lookup(&idr->idr_rt, id);
 }
 
 /**
  * idr_for_each_entry - iterate over an idr's elements of a given type
- * @idp:     idr handle
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
@@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id)
  * after normal terminatinon @entry is left with the value NULL.  This
  * is convenient for a "not found" value.
  */
-#define idr_for_each_entry(idp, entry, id)			\
-	for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+#define idr_for_each_entry(idr, entry, id)			\
+	for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
 
 /**
- * idr_for_each_entry - continue iteration over an idr's elements of a given type
- * @idp:     idr handle
+ * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
  */
-#define idr_for_each_entry_continue(idp, entry, id)			\
-	for ((entry) = idr_get_next((idp), &(id));			\
+#define idr_for_each_entry_continue(idr, entry, id)			\
+	for ((entry) = idr_get_next((idr), &(id));			\
 	     entry;							\
-	     ++id, (entry) = idr_get_next((idp), &(id)))
+	     ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
- *
- * IDA_BITMAP_LONGS is calculated to be one less to accommodate
- * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
-#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long))
 #define IDA_BITMAP_BITS 	(IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
-	long			nr_busy;
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
 struct ida {
-	struct idr		idr;
+	struct radix_tree_root	ida_rt;
 	struct ida_bitmap	*free_bitmap;
 };
 
-#define IDA_INIT(name)		{ .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
-#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
+#define IDA_INIT	{						\
+	.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+}
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
-void ida_init(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
+static inline void ida_init(struct ida *ida)
+{
+	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+	ida->free_bitmap = NULL;
+}
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
@@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
 	return ida_get_new_above(ida, 0, p_id);
 }
 
-static inline bool ida_is_empty(struct ida *ida)
+static inline bool ida_is_empty(const struct ida *ida)
 {
-	return idr_is_empty(&ida->idr);
+	return radix_tree_empty(&ida->ida_rt);
 }
-
-void __init idr_init_cache(void);
-
 #endif /* __IDR_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 05f715cb8062..2ba0c1f46c84 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -105,7 +105,10 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
+/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
+#define ROOT_IS_IDR	((__force gfp_t)(1 << __GFP_BITS_SHIFT))
+#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT + 1)
+
 struct radix_tree_root {
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
@@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
+void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+			gfp_t, int end);
 
-#define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
-#define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
-#define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+enum {
+	RADIX_TREE_ITER_TAG_MASK = 0x0f,	/* tag index in lower nybble */
+	RADIX_TREE_ITER_TAGGED   = 0x10,	/* lookup tagged slots */
+	RADIX_TREE_ITER_CONTIG   = 0x20,	/* stop at first hole */
+};
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -402,6 +409,40 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_lookup - look up an index in the radix tree
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: key to look up
+ *
+ * If @index is present in the radix tree, this function returns the slot
+ * containing it and updates @iter to describe the entry.  If @index is not
+ * present, it returns NULL.
+ */
+static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * radix_tree_iter_find - find a present entry
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: start location
+ *
+ * This function returns the slot containing the entry with the lowest index
+ * which is at least @index.  If @index is larger than any present entry, this
+ * function returns NULL.  The @iter is updated to describe the entry found.
+ */
+static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, 0);
+}
+
 /**
  * radix_tree_iter_retry - retry this chunk of the iteration
  * @iter:	iterator state
-- 
cgit v1.2.3


From 7ad3d4d85c7af9632055a6ac0aa15b6b6a321c6b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 11:55:56 -0500
Subject: ida: Move ida_bitmap to a percpu variable

When we preload the IDA, we allocate an IDA bitmap.  Instead of storing
that preallocated bitmap in the IDA, we store it in a percpu variable.
Generally there are more IDAs in the system than CPUs, so this cuts down
on the number of preallocated bitmaps that are unused, and about half
of the IDA users did not call ida_destroy() so they were leaking IDA
bitmaps.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/idr.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index f58c0a3addc3..2027c7aba50d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -14,6 +14,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/gfp.h>
+#include <linux/percpu.h>
 
 struct idr {
 	struct radix_tree_root	idr_rt;
@@ -171,9 +172,10 @@ struct ida_bitmap {
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
+DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
+
 struct ida {
 	struct radix_tree_root	ida_rt;
-	struct ida_bitmap	*free_bitmap;
 };
 
 #define IDA_INIT	{						\
@@ -193,7 +195,6 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
-	ida->free_bitmap = NULL;
 }
 
 /**
-- 
cgit v1.2.3


From d3e709e63e97e5f3f129b639991cfe266da60bae Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 22 Dec 2016 13:30:22 -0500
Subject: idr: Return the deleted entry from idr_remove

It is a relatively common idiom (8 instances) to first look up an IDR
entry, and then remove it from the tree if it is found, possibly doing
further operations upon the entry afterwards.  If we change idr_remove()
to return the removed object, all of these users can save themselves a
walk of the IDR tree.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/idr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 2027c7aba50d..bf70b3ef0a07 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -88,9 +88,9 @@ void *idr_get_next(struct idr *, int *nextid);
 void *idr_replace(struct idr *, void *, int id);
 void idr_destroy(struct idr *);
 
-static inline void idr_remove(struct idr *idr, int id)
+static inline void *idr_remove(struct idr *idr, int id)
 {
-	radix_tree_delete(&idr->idr_rt, id);
+	return radix_tree_delete_item(&idr->idr_rt, id, NULL);
 }
 
 static inline void idr_init(struct idr *idr)
-- 
cgit v1.2.3


From d58275bc96ae933b1b3888af80920dd6b020c505 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 16 Jan 2017 17:10:21 -0500
Subject: radix-tree: Store a pointer to the root in each node

Instead of having this mysterious private_data in each radix_tree_node,
store a pointer to the root, which can be useful for debugging.  This also
relieves the mm code from the duty of updating it.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 2ba0c1f46c84..d250059bbfa4 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -96,7 +96,7 @@ struct radix_tree_node {
 	unsigned char	count;		/* Total entry count */
 	unsigned char	exceptional;	/* Exceptional entry count */
 	struct radix_tree_node *parent;		/* Used when ascending tree */
-	void *private_data;			/* For tree user */
+	struct radix_tree_root *root;		/* The tree we belong to */
 	union {
 		struct list_head private_list;	/* For tree user */
 		struct rcu_head	rcu_head;	/* Used when freeing node */
-- 
cgit v1.2.3


From 12320d0ff1c9d5582f5c35e4bb8b9c70c475fd71 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 13 Feb 2017 15:22:48 -0500
Subject: radix-tree: Add rcu_dereference and rcu_assign_pointer calls

Some of these have been missing for many years.  Others were recently
introduced by me.  Fortunately, we have tools that help us find such
things.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index d250059bbfa4..0b502de7d23a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -561,7 +561,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 
  found:
-	if (unlikely(radix_tree_is_internal_node(*slot)))
+	if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))
 		return __radix_tree_next_slot(slot, iter, flags);
 	return slot;
 }
-- 
cgit v1.2.3


From d7b627277b57370223d682cede979a279284b12a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 13 Feb 2017 15:58:24 -0500
Subject: radix-tree: Fix __rcu annotations

Many places were missing __rcu annotations.  A few places needed a few
lines of explanation about why it was safe to not use RCU accessors.
Add a custom CFLAGS setting to the Makefile to ensure that new patches
don't miss RCU annotations.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 110 ++++++++++++++++++++++-----------------------
 1 file changed, 54 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0b502de7d23a..3e5735064b71 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -221,10 +221,8 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  */
 
 /**
- * radix_tree_deref_slot	- dereference a slot
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * Returns:	item that was stored in that slot with any direct pointer flag
- *		removed.
+ * radix_tree_deref_slot - dereference a slot
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
  *
  * For use with radix_tree_lookup_slot().  Caller must hold tree at least read
  * locked across slot lookup and dereference. Not required if write lock is
@@ -232,26 +230,27 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  *
  * radix_tree_deref_retry must be used to confirm validity of the pointer if
  * only the read lock is held.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot(void **pslot)
+static inline void *radix_tree_deref_slot(void __rcu **slot)
 {
-	return rcu_dereference(*pslot);
+	return rcu_dereference(*slot);
 }
 
 /**
- * radix_tree_deref_slot_protected	- dereference a slot without RCU lock but with tree lock held
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * Returns:	item that was stored in that slot with any direct pointer flag
- *		removed.
- *
- * Similar to radix_tree_deref_slot but only used during migration when a pages
- * mapping is being moved. The caller does not hold the RCU read lock but it
- * must hold the tree lock to prevent parallel updates.
+ * radix_tree_deref_slot_protected - dereference a slot with tree lock held
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
+ *
+ * Similar to radix_tree_deref_slot.  The caller does not hold the RCU read
+ * lock but it must hold the tree lock to prevent parallel updates.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot_protected(void **pslot,
+static inline void *radix_tree_deref_slot_protected(void __rcu **slot,
 							spinlock_t *treelock)
 {
-	return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
+	return rcu_dereference_protected(*slot, lockdep_is_held(treelock));
 }
 
 /**
@@ -287,9 +286,9 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
+int __radix_tree_create(struct radix_tree_root *, unsigned long index,
 			unsigned order, struct radix_tree_node **nodep,
-			void ***slotp);
+			void __rcu ***slotp);
 int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
 			unsigned order, void *);
 static inline int radix_tree_insert(struct radix_tree_root *root,
@@ -298,42 +297,41 @@ static inline int radix_tree_insert(struct radix_tree_root *root,
 	return __radix_tree_insert(root, index, 0, entry);
 }
 void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
-			  struct radix_tree_node **nodep, void ***slotp);
+			  struct radix_tree_node **nodep, void __rcu ***slotp);
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long);
+void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
+					unsigned long index);
 typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
-void __radix_tree_replace(struct radix_tree_root *root,
-			  struct radix_tree_node *node,
-			  void **slot, void *item,
+void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
+			  void __rcu **slot, void *entry,
 			  radix_tree_update_node_t update_node, void *private);
 void radix_tree_iter_replace(struct radix_tree_root *,
-		const struct radix_tree_iter *, void **slot, void *item);
-void radix_tree_replace_slot(struct radix_tree_root *root,
-			     void **slot, void *item);
-void __radix_tree_delete_node(struct radix_tree_root *root,
-			      struct radix_tree_node *node,
+		const struct radix_tree_iter *, void __rcu **slot, void *entry);
+void radix_tree_replace_slot(struct radix_tree_root *,
+			     void __rcu **slot, void *entry);
+void __radix_tree_delete_node(struct radix_tree_root *,
+			      struct radix_tree_node *,
 			      radix_tree_update_node_t update_node,
 			      void *private);
 void radix_tree_iter_delete(struct radix_tree_root *,
-				struct radix_tree_iter *iter, void **slot);
+			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-void radix_tree_clear_tags(struct radix_tree_root *root,
-			   struct radix_tree_node *node,
-			   void **slot);
+void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
+			   void __rcu **slot);
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
 unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
-			void ***results, unsigned long *indices,
+			void __rcu ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
-void *radix_tree_tag_set(struct radix_tree_root *root,
+void *radix_tree_tag_set(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void *radix_tree_tag_clear(struct radix_tree_root *root,
+void *radix_tree_tag_clear(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
@@ -341,15 +339,13 @@ void radix_tree_iter_tag_set(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
-		unsigned long first_index, unsigned int max_items,
-		unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results,
-		unsigned long first_index, unsigned int max_items,
-		unsigned int tag);
-int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
+		void **results, unsigned long first_index,
+		unsigned int max_items, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
+		void __rcu ***results, unsigned long first_index,
+		unsigned int max_items, unsigned int tag);
+int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
@@ -361,7 +357,7 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
-void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
 			gfp_t, int end);
 
 enum {
@@ -377,7 +373,7 @@ enum {
  * @start:	iteration starting index
  * Returns:	NULL
  */
-static __always_inline void **
+static __always_inline void __rcu **
 radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 {
 	/*
@@ -406,7 +402,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  * Also it fills @iter with data about chunk: position in the tree (index),
  * its end (next_index), and constructs a bit mask for tagged iterating (tags).
  */
-void **radix_tree_next_chunk(const struct radix_tree_root *,
+void __rcu **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
@@ -419,7 +415,8 @@ void **radix_tree_next_chunk(const struct radix_tree_root *,
  * containing it and updates @iter to describe the entry.  If @index is not
  * present, it returns NULL.
  */
-static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
+static inline void __rcu **
+radix_tree_iter_lookup(const struct radix_tree_root *root,
 			struct radix_tree_iter *iter, unsigned long index)
 {
 	radix_tree_iter_init(iter, index);
@@ -436,7 +433,8 @@ static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
  * which is at least @index.  If @index is larger than any present entry, this
  * function returns NULL.  The @iter is updated to describe the entry found.
  */
-static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
+static inline void __rcu **
+radix_tree_iter_find(const struct radix_tree_root *root,
 			struct radix_tree_iter *iter, unsigned long index)
 {
 	radix_tree_iter_init(iter, index);
@@ -453,7 +451,7 @@ static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
  * and continue the iteration.
  */
 static inline __must_check
-void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
 {
 	iter->next_index = iter->index;
 	iter->tags = 0;
@@ -476,7 +474,7 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
  * have been invalidated by an insertion or deletion.  Call this function
  * before releasing the lock to continue the iteration from the next index.
  */
-void **__must_check radix_tree_iter_resume(void **slot,
+void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
 					struct radix_tree_iter *iter);
 
 /**
@@ -492,11 +490,11 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
-				unsigned flags);
+void __rcu **__radix_tree_next_slot(void __rcu **slot,
+				struct radix_tree_iter *iter, unsigned flags);
 #else
 /* Can't happen without sibling entries, but the compiler can't tell that */
-static inline void ** __radix_tree_next_slot(void **slot,
+static inline void __rcu **__radix_tree_next_slot(void __rcu **slot,
 				struct radix_tree_iter *iter, unsigned flags)
 {
 	return slot;
@@ -522,8 +520,8 @@ static inline void ** __radix_tree_next_slot(void **slot,
  * b) we are doing non-tagged iteration, and iter->index and iter->next_index
  *    have been set up so that radix_tree_chunk_size() returns 1 or 0.
  */
-static __always_inline void **
-radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
+static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
+				struct radix_tree_iter *iter, unsigned flags)
 {
 	if (flags & RADIX_TREE_ITER_TAGGED) {
 		iter->tags >>= 1;
-- 
cgit v1.2.3


From 40137906c5f55c252194ef5834130383e639536f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 11 Feb 2017 19:26:47 +0800
Subject: rhashtable: Add nested tables

This patch adds code that handles GFP_ATOMIC kmalloc failure on
insertion.  As we cannot use vmalloc, we solve it by making our
hash table nested.  That is, we allocate single pages at each level
and reach our desired table size by nesting them.

When a nested table is created, only a single page is allocated
at the top-level.  Lower levels are allocated on demand during
insertion.  Therefore for each insertion to succeed, only two
(non-consecutive) pages are needed.

After a nested table is created, a rehash will be scheduled in
order to switch to a vmalloced table as soon as possible.  Also,
the rehash code will never rehash into a nested table.  If we
detect a nested table during a rehash, the rehash will be aborted
and a new rehash will be scheduled.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 78 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5c132d3188be..f2e12a845910 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,6 +61,7 @@ struct rhlist_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @nest: Number of bits of first-level nested table.
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
@@ -68,10 +69,12 @@ struct rhlist_head {
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
  * @future_tbl: Table under construction during rehashing
+ * @ntbl: Nested table used when out of memory.
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	unsigned int		size;
+	unsigned int		nest;
 	unsigned int		rehash;
 	u32			hash_rnd;
 	unsigned int		locks_mask;
@@ -81,7 +84,7 @@ struct bucket_table {
 
 	struct bucket_table __rcu *future_tbl;
 
-	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
 /**
@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash);
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+						   struct bucket_table *tbl,
+						   unsigned int hash);
+
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
+static inline struct rhash_head __rcu *const *rht_bucket(
+	const struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_var(
+	struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_insert(
+	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
 /**
  * rht_for_each_continue - continue iterating over hash chain
  * @pos:	the &struct rhash_head to use as a loop cursor.
@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @hash:	the hash value / bucket index
  */
 #define rht_for_each(pos, tbl, hash) \
-	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_continue - continue iterating over hash chain
@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @member:	name of the &struct rhash_head within the hashable struct.
  */
 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
+	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
 				    tbl, hash, member)
 
 /**
@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht);
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
-	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
-	     next = !rht_is_a_nulls(pos) ?				    \
-		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
-	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
-	     pos = next,						    \
-	     next = !rht_is_a_nulls(pos) ?				    \
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
+	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
+	     next = !rht_is_a_nulls(pos) ?				      \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
+	     pos = next,						      \
+	     next = !rht_is_a_nulls(pos) ?				      \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_rcu(pos, tbl, hash)				\
-	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht);
  * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
+	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
 					tbl, hash, member)
 
 /**
@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
 
@@ -697,8 +727,12 @@ slow_path:
 	}
 
 	elasticity = ht->elasticity;
-	pprev = &tbl->buckets[hash];
-	rht_for_each(head, tbl, hash) {
+	pprev = rht_bucket_insert(ht, tbl, hash);
+	data = ERR_PTR(-ENOMEM);
+	if (!pprev)
+		goto out;
+
+	rht_for_each_continue(head, *pprev, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -736,7 +770,7 @@ slow_path:
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	head = rht_dereference_bucket(*pprev, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -746,7 +780,7 @@ slow_path:
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	rcu_assign_pointer(*pprev, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 
 		list = container_of(he, struct rhlist_head, rhead);
@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		if (he != obj_old) {
 			pprev = &he->next;
 			continue;
-- 
cgit v1.2.3


From fb585b44383c4cff85f92e67377ee1c5f07d6dc1 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 10 Feb 2017 16:43:50 +0100
Subject: net: make net_device members garp_port and mrp_port conditional

garp_port is only used in net/802/garp.c which is only compiled with
CONFIG_GARP enabled. Same goes for mrp_port which is only used in
net/802/mrp.c with CONFIG_MRP enabled.

Only include the two members in struct net_device if their respective
CONFIG_* is enabled. This saves a few bytes in struct net_device in case
CONFIG_GARP or CONFIG_MRP are not enabled.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 54c82b5df0ba..98f65ed8f8b0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1865,8 +1865,12 @@ struct net_device {
 		struct pcpu_vstats __percpu		*vstats;
 	};
 
+#if IS_ENABLED(CONFIG_GARP)
 	struct garp_port __rcu	*garp_port;
+#endif
+#if IS_ENABLED(CONFIG_MRP)
 	struct mrp_port __rcu	*mrp_port;
+#endif
 
 	struct device		dev;
 	const struct attribute_group *sysfs_groups[4];
-- 
cgit v1.2.3


From efff8e7879b8b7f8a077f495262f0bb9cfaa0b06 Mon Sep 17 00:00:00 2001
From: Uri Yanai <uri.yanai@sandisk.com>
Date: Tue, 7 Feb 2017 18:00:01 +0200
Subject: mmc: Adding AUTO_BKOPS_EN bit set for Auto BKOPS support

Adding dedicated flag for AUTO_BKOPS in card->ext_csd structure.
Read AUTO_BKOPS bit value from the device EXT_CSD and set to the
card->ext_csd structure.
In mmc_decode_ext_csd() add a print message in case the AUTO_BKOPS
is enabled

Signed-off-by: Uri Yanai <uri.yanai@sandisk.com>
Signed-off-by: Alex Lemberg <alex.lemberg@sandisk.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 1 +
 include/linux/mmc/mmc.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 29d00c91c25c..77e61e0a216a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -83,6 +83,7 @@ struct mmc_ext_csd {
 	unsigned int		hpi_cmd;		/* cmd used as HPI */
 	bool			bkops;		/* background support bit */
 	bool			man_bkops_en;	/* manual bkops enable bit */
+	bool			auto_bkops_en;	/* auto bkops enable bit */
 	unsigned int            data_sector_size;       /* 512 bytes or 4KB */
 	unsigned int            data_tag_unit_size;     /* DATA TAG UNIT size */
 	unsigned int		boot_ro_lock;		/* ro lock support */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 7406d9badda0..3ffc27aaeeaf 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -407,6 +407,7 @@ static inline bool mmc_op_multi(u32 opcode)
  * BKOPS modes
  */
 #define EXT_CSD_MANUAL_BKOPS_MASK	0x01
+#define EXT_CSD_AUTO_BKOPS_MASK		0x02
 
 /*
  * Command Queue
-- 
cgit v1.2.3


From c43f1112c068f3b4b20a0a9d461c341d9caeb376 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Wed, 18 Jan 2017 14:10:33 +0200
Subject: IB/mlx5: Add additional checks before processing MADs

Check the has_smi bit in vport context and class version of MADs
before allowing MADs processing to take place.
MAD_IFC SMI commands can be executed only if smi bit is set.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Parvi Kaustubhi <parvik@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a309f6a4a15..b8d69aeb1784 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -289,6 +289,7 @@ struct mlx5_port_caps {
 	int	gid_table_len;
 	int	pkey_table_len;
 	u8	ext_port_cap;
+	bool	has_smi;
 };
 
 struct mlx5_cmd_mailbox {
-- 
cgit v1.2.3


From 853fe1bf7554155376bb3b231112cdff9ff79177 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 13 Feb 2017 16:25:26 -0800
Subject: cdrom: Make device operations read-only

Since function tables are a common target for attackers, it's best to keep
them in read-only memory. As such, this makes the CDROM device ops tables
const. This drops additionally n_minors, since it isn't used meaningfully,
and sets the only user of cdrom_dummy_generic_packet explicitly so the
variables can all be const.

Inspired by similar changes in grsecurity/PaX.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/cdrom.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 8609d577bb66..6e8f209a6dff 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -36,7 +36,7 @@ struct packet_command
 
 /* Uniform cdrom data structures for cdrom.c */
 struct cdrom_device_info {
-	struct cdrom_device_ops  *ops;  /* link to device_ops */
+	const struct cdrom_device_ops *ops; /* link to device_ops */
 	struct list_head list;		/* linked list of all device_info */
 	struct gendisk *disk;		/* matching block layer disk */
 	void *handle;		        /* driver-dependent data */
@@ -87,7 +87,6 @@ struct cdrom_device_ops {
 
 /* driver specifications */
 	const int capability;   /* capability flags */
-	int n_minors;           /* number of active minor devices */
 	/* handle uniform packets for scsi type devices (scsi,atapi) */
 	int (*generic_packet) (struct cdrom_device_info *,
 			       struct packet_command *);
@@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi,
 			    int page_code, int page_control);
 extern void init_cdrom_command(struct packet_command *cgc,
 			       void *buffer, int len, int type);
+extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
+				      struct packet_command *cgc);
 
 /* The SCSI spec says there could be 256 slots. */
 #define CDROM_MAX_SLOTS	256
-- 
cgit v1.2.3


From 23a6964e3adb0796e1633562a574839b92360cb6 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Wed, 18 Jan 2017 15:25:10 +0200
Subject: IB/mlx5: Add port counter support for Receive WQs

Counters weren't updated due to Receive WQs' traffic since the
counter-id was not associated with the RQ.

Added support for associating the q-counter-id with the Receive WQ.
The attachment is done only when changing WQ's state from RESET to
READY in modify-WQ command.

FW support is required for the above, without this support
Receive WQ counters will not count.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 37327f6ba9cb..2d197d8a7025 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4937,7 +4937,7 @@ struct mlx5_ifc_modify_rq_out_bits {
 
 enum {
 	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
-	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3,
 };
 
 struct mlx5_ifc_modify_rq_in_bits {
-- 
cgit v1.2.3


From 49780d42dfc9ec0f4090c32ca59688449da1a1cd Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 18 Jan 2017 16:58:10 +0200
Subject: IB/mlx5: Expose MR cache for mlx5_ib

Allow other parts of mlx5_ib to use MR cache mechanism.
* Add new functions mlx5_mr_cache_alloc and mlx5_mr_cache_free
* Traditional MTT MKey buckets are limited by MAX_UMR_CACHE_ENTRY
  Additinal buckets may be added above.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/driver.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b8d69aeb1784..2534b8a0fd7b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1052,7 +1052,8 @@ enum {
 };
 
 enum {
-	MAX_MR_CACHE_ENTRIES    = 21,
+	MAX_UMR_CACHE_ENTRY = 20,
+	MAX_MR_CACHE_ENTRIES
 };
 
 enum {
-- 
cgit v1.2.3


From 81713d3788d2e6bc005f15ee1c59d0eb06050a6b Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 18 Jan 2017 16:58:11 +0200
Subject: IB/mlx5: Add implicit MR support

Add implicit MR, covering entire user address space.
The MR is implemented as an indirect KSM MR consisting of
1GB direct MRs.
Pages and direct MRs are added/removed to MR by ODP.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx5/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2534b8a0fd7b..886ff2b00500 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1053,6 +1053,8 @@ enum {
 
 enum {
 	MAX_UMR_CACHE_ENTRY = 20,
+	MLX5_IMR_MTT_CACHE_ENTRY,
+	MLX5_IMR_KSM_CACHE_ENTRY,
 	MAX_MR_CACHE_ENTRIES
 };
 
-- 
cgit v1.2.3


From 51c6ce2ae35980c755af33461c3138570ded615e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:18 -0700
Subject: vmbus: callback is in softirq not workqueue

The callback is done via tasklet not workqueue.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e208e6437f5b..c9b6d533958f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -32,7 +32,6 @@
 #include <linux/scatterlist.h>
 #include <linux/list.h>
 #include <linux/timer.h>
-#include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
@@ -743,9 +742,7 @@ struct vmbus_channel {
 
 	struct vmbus_close_msg close_msg;
 
-	/* Channel callback are invoked in this workqueue context */
-	/* HANDLE dataWorkQueue; */
-
+	/* Channel callback's invoked in softirq context */
 	void (*onchannel_callback)(void *context);
 	void *channel_callback_context;
 
-- 
cgit v1.2.3


From 631e63a9f346cb657761ae22138f294718696501 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:20 -0700
Subject: vmbus: change to per channel tasklet

Make the event handling tasklet per channel rather than per-cpu.
This allows for better fairness when getting lots of data on the same
cpu.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c9b6d533958f..69afc9337c0d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -35,7 +35,7 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
-
+#include <linux/interrupt.h>
 
 #define MAX_PAGE_BUFFER_COUNT				32
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
@@ -743,6 +743,7 @@ struct vmbus_channel {
 	struct vmbus_close_msg close_msg;
 
 	/* Channel callback's invoked in softirq context */
+	struct tasklet_struct callback_event;
 	void (*onchannel_callback)(void *context);
 	void *channel_callback_context;
 
-- 
cgit v1.2.3


From b71e328297a3a578c482fb4814e737a0ec185839 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:21 -0700
Subject: vmbus: add direct isr callback mode

Change the simple boolean batched_reading into a tri-value.
For future NAPI support in netvsc driver, the callback needs to
occur directly in interrupt handler.

Batched mode is also changed to disable host interrupts immediately
in interrupt routine (to avoid unnecessary host signals), and the
tasklet is rescheduled if more data is detected.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 69afc9337c0d..e5aac5c051f7 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -748,19 +748,21 @@ struct vmbus_channel {
 	void *channel_callback_context;
 
 	/*
-	 * A channel can be marked for efficient (batched)
-	 * reading:
-	 * If batched_reading is set to "true", we read until the
-	 * channel is empty and hold off interrupts from the host
-	 * during the entire read process.
-	 * If batched_reading is set to "false", the client is not
-	 * going to perform batched reading.
-	 *
-	 * By default we will enable batched reading; specific
-	 * drivers that don't want this behavior can turn it off.
+	 * A channel can be marked for one of three modes of reading:
+	 *   BATCHED - callback called from taslket and should read
+	 *            channel until empty. Interrupts from the host
+	 *            are masked while read is in process (default).
+	 *   DIRECT - callback called from tasklet (softirq).
+	 *   ISR - callback called in interrupt context and must
+	 *         invoke its own deferred processing.
+	 *         Host interrupts are disabled and must be re-enabled
+	 *         when ring is empty.
 	 */
-
-	bool batched_reading;
+	enum hv_callback_mode {
+		HV_CALL_BATCHED,
+		HV_CALL_DIRECT,
+		HV_CALL_ISR
+	} callback_mode;
 
 	bool is_dedicated_interrupt;
 	struct hv_input_signal_event_buffer sig_buf;
@@ -910,9 +912,10 @@ static inline void set_channel_affinity_state(struct vmbus_channel *c,
 	c->affinity_policy = policy;
 }
 
-static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
+static inline void set_channel_read_mode(struct vmbus_channel *c,
+					enum hv_callback_mode mode)
 {
-	c->batched_reading = state;
+	c->callback_mode = mode;
 }
 
 static inline void set_per_channel_state(struct vmbus_channel *c, void *s)
-- 
cgit v1.2.3


From 5529eaf6e79a61e0ca7ade257f31d2ababc7f6c9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:22 -0700
Subject: vmbus: remove conditional locking of vmbus_write

All current usage of vmbus write uses the acquire_lock flag, therefore
having it be optional is unnecessary. This also fixes a sparse warning
since sparse doesn't like when a function has conditional locking.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e5aac5c051f7..466374dbc98f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -845,16 +845,6 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
-	/*
-	 * On the channel send side, many of the VMBUS
-	 * device drivers explicity serialize access to the
-	 * outgoing ring buffer. Give more control to the
-	 * VMBUS device drivers in terms how to serialize
-	 * accesss to the outgoing ring buffer.
-	 * The default behavior will be to aquire the
-	 * ring lock to preserve the current behavior.
-	 */
-	bool acquire_ring_lock;
 	/*
 	 * For performance critical channels (storage, networking
 	 * etc,), Hyper-V has a mechanism to enhance the throughput
@@ -895,11 +885,6 @@ struct vmbus_channel {
 
 };
 
-static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
-{
-	c->acquire_ring_lock = state;
-}
-
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 {
 	return !!(c->offermsg.offer.chn_flags &
-- 
cgit v1.2.3


From 6e47dd3e2938f41d75045bbcb64aa9df3a463b2a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:23 -0700
Subject: vmbus: expose hv_begin/end_read

In order to implement NAPI in netvsc, the driver needs access to
control host interrupt mask.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 466374dbc98f..08eb71a22c14 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1512,6 +1512,36 @@ init_cached_read_index(struct vmbus_channel *channel)
 	rbi->cached_read_index = rbi->ring_buffer->read_index;
 }
 
+/*
+ * Mask off host interrupt callback notifications
+ */
+static inline void hv_begin_read(struct hv_ring_buffer_info *rbi)
+{
+	rbi->ring_buffer->interrupt_mask = 1;
+
+	/* make sure mask update is not reordered */
+	virt_mb();
+}
+
+/*
+ * Re-enable host callback and return number of outstanding bytes
+ */
+static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi)
+{
+
+	rbi->ring_buffer->interrupt_mask = 0;
+
+	/* make sure mask update is not reordered */
+	virt_mb();
+
+	/*
+	 * Now check to see if the ring buffer is still empty.
+	 * If it is not, we raced and we need to process new
+	 * incoming messages.
+	 */
+	return hv_get_bytes_to_read(rbi);
+}
+
 /*
  * An API to support in-place processing of incoming VMBUS packets.
  */
-- 
cgit v1.2.3


From e4165a0fad0963bf8b4a59f54d3360ccb6a6d1ea Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 11 Feb 2017 23:02:24 -0700
Subject: vmbus: constify parameters where possible

Functions that just query state of ring buffer can have parameters
marked const.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 08eb71a22c14..62bbf3c1aa4a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -138,8 +138,8 @@ struct hv_ring_buffer_info {
  * for the specified ring buffer
  */
 static inline void
-hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
-			  u32 *read, u32 *write)
+hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
+			     u32 *read, u32 *write)
 {
 	u32 read_loc, write_loc, dsize;
 
@@ -153,7 +153,7 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
 	*read = dsize - *write;
 }
 
-static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi)
+static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi)
 {
 	u32 read_loc, write_loc, dsize, read;
 
@@ -167,7 +167,7 @@ static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi)
 	return read;
 }
 
-static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi)
+static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi)
 {
 	u32 read_loc, write_loc, dsize, write;
 
@@ -1448,9 +1448,9 @@ void vmbus_set_event(struct vmbus_channel *channel);
 
 /* Get the start of the ring buffer. */
 static inline void *
-hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
+hv_get_ring_buffer(const struct hv_ring_buffer_info *ring_info)
 {
-	return (void *)ring_info->ring_buffer->buffer;
+	return ring_info->ring_buffer->buffer;
 }
 
 /*
-- 
cgit v1.2.3


From e225c20eb0fd0b6657e640408f11ee392dc82b5b Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Tue, 14 Feb 2017 17:29:36 -0700
Subject: Move stack parameters for sed_ioctl to prevent oversized stack with
 CONFIG_KASAN

When CONFIG_KASAN is enabled, compilation fails:

block/sed-opal.c: In function 'sed_ioctl':
block/sed-opal.c:2447:1: error: the frame size of 2256 bytes is larger than 2048 bytes [-Werror=frame-larger-than=]

Moved all the ioctl structures off the stack and dynamically allocate
using _IOC_SIZE()

Fixes: 455a7b238cd6 ("block: Add Sed-opal library")

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/sed-opal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index af1a85eae193..205d520ea688 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -132,7 +132,7 @@ struct opal_dev {
 #ifdef CONFIG_BLK_SED_OPAL
 bool opal_unlock_from_suspend(struct opal_dev *dev);
 void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv);
-int sed_ioctl(struct opal_dev *dev, unsigned int cmd, unsigned long ptr);
+int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
 
 static inline bool is_sed_ioctl(unsigned int cmd)
 {
@@ -160,7 +160,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 }
 
 static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd,
-			    unsigned long ptr)
+			    void __user *ioctl_ptr)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 5f114163f2f5eb2edbb49c4d3e0b405c7a8a7e2a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Feb 2017 09:39:39 +0100
Subject: net: Add a skb_gro_flush_final helper.

Add a skb_gro_flush_final helper to prepare for  consuming
skbs in call_gro_receive. We will extend this helper to not
touch the skb if the skb is consumed by a gro callback with
a followup patch. We need this to handle the upcomming IPsec
ESP callbacks as they reinject the skb to the napi_gro_receive
asynchronous. The handler is used in all gro_receive functions
that can call the ESP gro handlers.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 58afbd1cc659..f9da3acfee9a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2661,6 +2661,11 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
 }
 
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+}
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
-- 
cgit v1.2.3


From 25393d3fc055b76587fcc91627aee8c345400c3a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Feb 2017 09:39:44 +0100
Subject: net: Prepare gro for packet consuming gro callbacks

The upcomming IPsec ESP gro callbacks will consume the skb,
so prepare for that.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f9da3acfee9a..9e5d1cd9d975 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -352,6 +352,7 @@ enum gro_result {
 	GRO_HELD,
 	GRO_NORMAL,
 	GRO_DROP,
+	GRO_CONSUMED,
 };
 typedef enum gro_result gro_result_t;
 
@@ -2661,10 +2662,18 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 	remcsum_unadjust((__sum16 *)ptr, grc->delta);
 }
 
+#ifdef CONFIG_XFRM_OFFLOAD
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS)
+		NAPI_GRO_CB(skb)->flush |= flush;
+}
+#else
 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
+#endif
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
-- 
cgit v1.2.3


From 884f38607897cb4a963ea8a65296f0973a2828d0 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 15 Feb 2017 16:35:29 +0800
Subject: mmc: core: move some sdio IDs out of quirks file

Consolidate all the sdio devices' IDs into sdio_ids.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/sdio_ids.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index d43ef96bf075..46794a7a531c 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -51,6 +51,7 @@
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS		0x9103
 #define SDIO_DEVICE_ID_MARVELL_8688WLAN		0x9104
 #define SDIO_DEVICE_ID_MARVELL_8688BT		0x9105
+#define SDIO_DEVICE_ID_MARVELL_8797_F0		0x9128
 
 #define SDIO_VENDOR_ID_SIANO			0x039a
 #define SDIO_DEVICE_ID_SIANO_NOVA_B0		0x0201
@@ -60,4 +61,10 @@
 #define SDIO_DEVICE_ID_SIANO_NOVA_A0		0x1100
 #define SDIO_DEVICE_ID_SIANO_STELLAR 		0x5347
 
+#define SDIO_VENDOR_ID_TI			0x0097
+#define SDIO_DEVICE_ID_TI_WL1271		0x4076
+
+#define SDIO_VENDOR_ID_STE			0x0020
+#define SDIO_DEVICE_ID_STE_CW1200		0x2280
+
 #endif /* LINUX_MMC_SDIO_IDS_H */
-- 
cgit v1.2.3


From 8a58a34ba479d42b3ef28f8ffd9e245a81a7786f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Feb 2017 16:41:15 +0100
Subject: timers: Make flags output in the timer_start tracepoint useful

The timer flags in the timer_start trace event contain lots of useful
information, but the meaning is not clear in the trace output. Making tools
rely on the bit positions is bad as they might change over time.

Decode the flags in the print out. Tools can retrieve the bits and their
meaning from the trace format file.

Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1702101639290.4036@nanos

Requested-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/timer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 51d601f192d4..a17f915f9456 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -66,6 +66,8 @@ struct timer_list {
 #define TIMER_ARRAYSHIFT	22
 #define TIMER_ARRAYMASK		0xFFC00000
 
+#define TIMER_TRACE_FLAGMASK	(TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
+
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
 		.entry = { .next = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
-- 
cgit v1.2.3


From 3821fd35b58dba449bd894014fbf4e1c43c9e951 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Fri, 3 Feb 2017 15:42:24 -0500
Subject: jump_label: Reduce the size of struct static_key

The static_key->next field goes mostly unused. The field is used for
associating module uses with a static key. Most uses of struct static_key
define a static key in the core kernel and make use of it entirely within
the core kernel, or define the static key in a module and make use of it
only from within that module. In fact, of the ~3,000 static keys defined,
I found only about 5 or so that did not fit this pattern.

Thus, we can remove the static_key->next field entirely and overload
the static_key->entries field. That is, when all the static_key uses
are contained within the same module, static_key->entries continues
to point to those uses. However, if the static_key uses are not contained
within the module where the static_key is defined, then we allocate a
struct static_key_mod, store a pointer to the uses within that
struct static_key_mod, and have the static key point at the static_key_mod.
This does incur some extra memory usage when a static_key is used in a
module that does not define it, but since there are only a handful of such
cases there is a net savings.

In order to identify if the static_key->entries pointer contains a
struct static_key_mod or a struct jump_entry pointer, bit 1 of
static_key->entries is set to 1 if it points to a struct static_key_mod and
is 0 if it points to a struct jump_entry. We were already using bit 0 in a
similar way to store the initial value of the static_key. This does mean
that allocations of struct static_key_mod and that the struct jump_entry
tables need to be at least 4-byte aligned in memory. As far as I can tell
all arches meet this criteria.

For my .config, the patch increased the text by 778 bytes, but reduced
the data + bss size by 14912, for a net savings of 14,134 bytes.

   text	   data	    bss	    dec	    hex	filename
8092427	5016512	 790528	13899467	 d416cb	vmlinux.pre
8093205	5001600	 790528	13885333	 d3df95	vmlinux.post

Link: http://lkml.kernel.org/r/1486154544-4321-1-git-send-email-jbaron@akamai.com

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a0547c571800..680c98b2f41c 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -89,11 +89,17 @@ extern bool static_key_initialized;
 
 struct static_key {
 	atomic_t enabled;
-/* Set lsb bit to 1 if branch is default true, 0 ot */
-	struct jump_entry *entries;
-#ifdef CONFIG_MODULES
-	struct static_key_mod *next;
-#endif
+/*
+ * bit 0 => 1 if key is initially true
+ *	    0 if initially false
+ * bit 1 => 1 if points to struct static_key_mod
+ *	    0 if points to struct jump_entry
+ */
+	union {
+		unsigned long type;
+		struct jump_entry *entries;
+		struct static_key_mod *next;
+	};
 };
 
 #else
@@ -118,9 +124,10 @@ struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_TYPE_FALSE	0UL
-#define JUMP_TYPE_TRUE	1UL
-#define JUMP_TYPE_MASK	1UL
+#define JUMP_TYPE_FALSE		0UL
+#define JUMP_TYPE_TRUE		1UL
+#define JUMP_TYPE_LINKED	2UL
+#define JUMP_TYPE_MASK		3UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-- 
cgit v1.2.3


From b85ad494098bf881c3713218fbd74193e5d5c488 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 3 Feb 2017 12:39:03 -0600
Subject: of: introduce of_graph_get_remote_node

The OF graph API leaves too much of the graph walking to clients when
in many cases the driver doesn't care about accessing the port or
endpoint nodes. The drivers typically just want the device connected via
a particular graph connection. of_graph_get_remote_node provides this
functionality.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/of_graph.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h
index bb3a5a2cd570..abdb02eaef06 100644
--- a/include/linux/of_graph.h
+++ b/include/linux/of_graph.h
@@ -51,6 +51,8 @@ struct device_node *of_graph_get_endpoint_by_regs(
 struct device_node *of_graph_get_remote_port_parent(
 					const struct device_node *node);
 struct device_node *of_graph_get_remote_port(const struct device_node *node);
+struct device_node *of_graph_get_remote_node(const struct device_node *node,
+					     u32 port, u32 endpoint);
 #else
 
 static inline int of_graph_parse_endpoint(const struct device_node *node,
@@ -89,6 +91,12 @@ static inline struct device_node *of_graph_get_remote_port(
 {
 	return NULL;
 }
+static inline struct device_node *of_graph_get_remote_node(
+					const struct device_node *node,
+					u32 port, u32 endpoint)
+{
+	return NULL;
+}
 
 #endif /* CONFIG_OF */
 
-- 
cgit v1.2.3


From c78c70fa30e23dc6cdb394f6c13880919499fba5 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.Kalluru@cavium.com>
Date: Wed, 15 Feb 2017 10:24:10 +0200
Subject: qed: Add infrastructure for PTP support

The patch adds the required qed interfaces for configuring/reading
the PTP clock on the adapter.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_eth_if.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 3613d63cd5d0..4cd1f0ccfa36 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -96,6 +96,7 @@ struct qed_update_vport_params {
 
 struct qed_start_vport_params {
 	bool remove_inner_vlan;
+	bool handle_ptp_pkts;
 	bool gro_enable;
 	bool drop_ttl0;
 	u8 vport_id;
@@ -159,6 +160,15 @@ struct qed_eth_cb_ops {
 	void (*force_mac) (void *dev, u8 *mac, bool forced);
 };
 
+#define QED_MAX_PHC_DRIFT_PPB   291666666
+
+enum qed_ptp_filter_type {
+	QED_PTP_FILTER_L2,
+	QED_PTP_FILTER_IPV4,
+	QED_PTP_FILTER_IPV4_IPV6,
+	QED_PTP_FILTER_L2_IPV4_IPV6
+};
+
 #ifdef CONFIG_DCB
 /* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration
  * of dcbnl_rtnl_ops structure.
@@ -218,6 +228,17 @@ struct qed_eth_dcbnl_ops {
 };
 #endif
 
+struct qed_eth_ptp_ops {
+	int (*hwtstamp_tx_on)(struct qed_dev *);
+	int (*cfg_rx_filters)(struct qed_dev *, enum qed_ptp_filter_type);
+	int (*read_rx_ts)(struct qed_dev *, u64 *);
+	int (*read_tx_ts)(struct qed_dev *, u64 *);
+	int (*read_cc)(struct qed_dev *, u64 *);
+	int (*disable)(struct qed_dev *);
+	int (*adjfreq)(struct qed_dev *, s32);
+	int (*enable)(struct qed_dev *);
+};
+
 struct qed_eth_ops {
 	const struct qed_common_ops *common;
 #ifdef CONFIG_QED_SRIOV
@@ -226,6 +247,7 @@ struct qed_eth_ops {
 #ifdef CONFIG_DCB
 	const struct qed_eth_dcbnl_ops *dcb;
 #endif
+	const struct qed_eth_ptp_ops *ptp;
 
 	int (*fill_dev_info)(struct qed_dev *cdev,
 			     struct qed_dev_eth_info *info);
-- 
cgit v1.2.3


From c18a1e09008de7d8bd82b046d38a88f4285d53f9 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Tue, 14 Feb 2017 23:28:59 +0800
Subject: block: introduce bio_clone_bioset_partial()

md still need bio clone(not the fast version) for behind write,
and it is more efficient to use bio_clone_bioset_partial().

The idea is simple and just copy the bvecs range specified from
parameters.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 include/linux/bio.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7cf8a6c70a3f..8e521194f6fc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
-static inline unsigned bio_segments(struct bio *bio)
+static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec)
 {
 	unsigned segs = 0;
 	struct bio_vec bv;
@@ -205,12 +205,17 @@ static inline unsigned bio_segments(struct bio *bio)
 		break;
 	}
 
-	bio_for_each_segment(bv, bio, iter)
+	__bio_for_each_segment(bv, bio, iter, *bvec)
 		segs++;
 
 	return segs;
 }
 
+static inline unsigned bio_segments(struct bio *bio)
+{
+	return __bio_segments(bio, &bio->bi_iter);
+}
+
 /*
  * get a reference to a bio, so it won't disappear. the intended use is
  * something like:
@@ -384,6 +389,8 @@ extern void bio_put(struct bio *);
 extern void __bio_clone_fast(struct bio *, struct bio *);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
+extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t,
+					    struct bio_set *, int, int);
 
 extern struct bio_set *fs_bio_set;
 
-- 
cgit v1.2.3


From bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 15 Feb 2017 22:29:51 -0500
Subject: rhashtable: Revert nested table changes.

This reverts commits:

6a25478077d987edc5e2f880590a2bc5fcab4441
9dbbfb0ab6680c6a85609041011484e6658e7d3c
40137906c5f55c252194ef5834130383e639536f

It's too risky to put in this late in the release
cycle.  We'll put these changes into the next merge
window instead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 78 +++++++++++++---------------------------------
 1 file changed, 22 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f2e12a845910..5c132d3188be 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,7 +61,6 @@ struct rhlist_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
- * @nest: Number of bits of first-level nested table.
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
@@ -69,12 +68,10 @@ struct rhlist_head {
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
  * @future_tbl: Table under construction during rehashing
- * @ntbl: Nested table used when out of memory.
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	unsigned int		size;
-	unsigned int		nest;
 	unsigned int		rehash;
 	u32			hash_rnd;
 	unsigned int		locks_mask;
@@ -84,7 +81,7 @@ struct bucket_table {
 
 	struct bucket_table __rcu *future_tbl;
 
-	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
 /**
@@ -377,12 +374,6 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-					    unsigned int hash);
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-						   struct bucket_table *tbl,
-						   unsigned int hash);
-
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
@@ -398,27 +389,6 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
-static inline struct rhash_head __rcu *const *rht_bucket(
-	const struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
-static inline struct rhash_head __rcu **rht_bucket_var(
-	struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
-static inline struct rhash_head __rcu **rht_bucket_insert(
-	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
 /**
  * rht_for_each_continue - continue iterating over hash chain
  * @pos:	the &struct rhash_head to use as a loop cursor.
@@ -438,7 +408,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * @hash:	the hash value / bucket index
  */
 #define rht_for_each(pos, tbl, hash) \
-	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
 
 /**
  * rht_for_each_entry_continue - continue iterating over hash chain
@@ -463,7 +433,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * @member:	name of the &struct rhash_head within the hashable struct.
  */
 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
+	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
 				    tbl, hash, member)
 
 /**
@@ -478,13 +448,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
-	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
-	     next = !rht_is_a_nulls(pos) ?				      \
-		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
-	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
-	     pos = next,						      \
-	     next = !rht_is_a_nulls(pos) ?				      \
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
+	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
+	     next = !rht_is_a_nulls(pos) ?				    \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
+	     pos = next,						    \
+	     next = !rht_is_a_nulls(pos) ?				    \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
@@ -515,7 +485,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_rcu(pos, tbl, hash)				\
-	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
 
 /**
  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -548,8 +518,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
-	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 					tbl, hash, member)
 
 /**
@@ -595,7 +565,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	struct bucket_table *tbl;
+	const struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
 
@@ -727,12 +697,8 @@ slow_path:
 	}
 
 	elasticity = ht->elasticity;
-	pprev = rht_bucket_insert(ht, tbl, hash);
-	data = ERR_PTR(-ENOMEM);
-	if (!pprev)
-		goto out;
-
-	rht_for_each_continue(head, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(head, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -770,7 +736,7 @@ slow_path:
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(*pprev, tbl, hash);
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -780,7 +746,7 @@ slow_path:
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(*pprev, obj);
+	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -989,8 +955,8 @@ static inline int __rhashtable_remove_fast_one(
 
 	spin_lock_bh(lock);
 
-	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(he, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		struct rhlist_head *list;
 
 		list = container_of(he, struct rhlist_head, rhead);
@@ -1141,8 +1107,8 @@ static inline int __rhashtable_replace_fast(
 
 	spin_lock_bh(lock);
 
-	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(he, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		if (he != obj_old) {
 			pprev = &he->next;
 			continue;
-- 
cgit v1.2.3


From b802c8410ca915e7772e738e68420115f1a3c811 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 15 Feb 2017 18:42:09 -0800
Subject: async_tx: deprecate broken support for channel switching

Back in 2011, Russell pointed out that the "async_tx channel switch"
capability was violating expectations of the dma mapping api [1]. At the
time the existing uses were reviewed as still usable, but that longer
term we needed a rework of the raid offload implementation. While some
of the framework for a fixed implementation was introduced in 2012 [2],
the wider rewrite never materialized.

There continues to be interest in raid offload with new dma/raid engine
drivers being submitted. Those drivers must not build on top of the
broken channel switching capability.

Prevent async_tx from using an offload engine if the channel switching
capability is enabled. This still allows the engine to be used for other
purposes, but the broken way async_tx uses these engines for raid will
be disabled. For configurations where this causes a performance
regression the only solution is to start the work of eliminating the
async_tx api and moving channel management into the raid code directly
where it can manage marshalling an operation stream between multiple dma
channels.

[1]: http://lists.infradead.org/pipermail/linux-arm-kernel/2011-January/036753.html
[2]: https://lkml.org/lkml/2012/12/6/71

Cc: Anatolij Gustschin <agust@denx.de>
Cc: Anup Patel <anup.patel@broadcom.com>
Cc: Rameshwar Prasad Sahu <rsahu@apm.com>
Cc: Saeed Bishara <saeed.bishara@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/async_tx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 388574ea38ed..28e3cf1465ab 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -87,7 +87,7 @@ struct async_submit_ctl {
 	void *scribble;
 };
 
-#ifdef CONFIG_DMA_ENGINE
+#if defined(CONFIG_DMA_ENGINE) && !defined(CONFIG_ASYNC_TX_CHANNEL_SWITCH)
 #define async_tx_issue_pending_all dma_issue_pending_all
 
 /**
-- 
cgit v1.2.3


From 2f44e29cef006a4b0a4ecf7d4c5aac7d0fbb505c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:53:12 +0100
Subject: genirq/msi: Add stubs for get_cached_msi_msg/pci_write_msi_msg

A bug fix to the MSIx handling in vfio added references to functions
that may not be defined if MSI is disabled in the kernel, resulting in
this link error:

drivers/built-in.o: In function `vfio_msi_set_vector_signal':
:(.text+0x450808): undefined reference to `get_cached_msi_msg'
:(.text+0x45080c): undefined reference to `write_msi_msg'

As suggested by Alex Williamson, add stub implementations for
get_cached_msi_msg() and pci_write_msi_msg().

In case this bugfix gets backported, please note that the #ifdef
has changed over time, originally both functions were implemented
in drivers/pci/msi.c and controlled by CONFIG_PCI_MSI, while nowadays
get_cached_msi_msg() is part of the generic MSI support and can be
used without PCI.

Fixes: b8f02af096b1 ("vfio/pci: Restore MSIx message prior to enabling")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Bart Van Assche <bart.vanassche@sandisk.com>
Link: http://lkml.kernel.org/r/1413190208.4202.34.camel@ul30vt.home
Link: http://lkml.kernel.org/r/20170214215343.3307861-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 0db320b7bb15..a83b84ff70e5 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -17,7 +17,13 @@ struct msi_desc;
 struct pci_dev;
 struct platform_msi_priv_data;
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+#ifdef CONFIG_GENERIC_MSI_IRQ
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
+#else
+static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+}
+#endif
 
 typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
 				    struct msi_msg *msg);
@@ -116,11 +122,15 @@ struct msi_desc {
 
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
 void *msi_desc_to_pci_sysdata(struct msi_desc *desc);
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
 #else /* CONFIG_PCI_MSI */
 static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
 {
 	return NULL;
 }
+static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+}
 #endif /* CONFIG_PCI_MSI */
 
 struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
@@ -128,7 +138,6 @@ struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
 void free_msi_entry(struct msi_desc *entry);
 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
 u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag);
 u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
-- 
cgit v1.2.3


From bbd6411513aa8ef3ea02abab61318daf87c1af1e Mon Sep 17 00:00:00 2001
From: "Cao, Lei" <Lei.Cao@stratus.com>
Date: Fri, 3 Feb 2017 20:04:35 +0000
Subject: KVM: Support vCPU-based gfn->hva cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide versions of struct gfn_to_hva_cache functions that
take vcpu as a parameter instead of struct kvm.  The existing functions
are not needed anymore, so delete them.  This allows dirty pages to
be logged in the vcpu dirty ring, instead of the global dirty ring,
for ring-based dirty memory tracking.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
Message-Id: <CY1PR08MB19929BD2AC47A291FD680E83F04F0@CY1PR08MB1992.namprd08.prod.outlook.com>
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cda457bcedc1..2db458ee94b0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -641,18 +641,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
 			  unsigned long len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, unsigned long len);
+int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
+			       void *data, unsigned long len);
 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
 			 int offset, int len);
 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 		    unsigned long len);
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, unsigned long len);
-int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, int offset, unsigned long len);
-int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			      gpa_t gpa, unsigned long len);
+int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
+				void *data, unsigned long len);
+int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
+				       void *data, int offset, unsigned long len);
+int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
+				   gpa_t gpa, unsigned long len);
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
-- 
cgit v1.2.3


From 55f0cd3fb9c29c20fb94c47e28a9ec8cf704f8c2 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Thu, 16 Feb 2017 13:07:37 -0700
Subject: spi: spi-ep93xx: simplify GPIO chip selects

This driver requires a GPIO line to be used for the chip select of
each SPI device.

Remove the ep93xx_spi_chip_ops definition from the platform data
and use the spi core GPIO handling for the chip selects.

Fix all the ep93xx platforms that use this driver and remove the
old Documentation.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/platform_data/spi-ep93xx.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h
index 9bb63ac13f04..171a271c2cbd 100644
--- a/include/linux/platform_data/spi-ep93xx.h
+++ b/include/linux/platform_data/spi-ep93xx.h
@@ -5,25 +5,14 @@ struct spi_device;
 
 /**
  * struct ep93xx_spi_info - EP93xx specific SPI descriptor
- * @num_chipselect: number of chip selects on this board, must be
- *                  at least one
+ * @chipselect: array of gpio numbers to use as chip selects
+ * @num_chipselect: ARRAY_SIZE(chipselect)
  * @use_dma: use DMA for the transfers
  */
 struct ep93xx_spi_info {
+	int	*chipselect;
 	int	num_chipselect;
 	bool	use_dma;
 };
 
-/**
- * struct ep93xx_spi_chip_ops - operation callbacks for SPI slave device
- * @setup: setup the chip select mechanism
- * @cleanup: cleanup the chip select mechanism
- * @cs_control: control the device chip select
- */
-struct ep93xx_spi_chip_ops {
-	int	(*setup)(struct spi_device *spi);
-	void	(*cleanup)(struct spi_device *spi);
-	void	(*cs_control)(struct spi_device *spi, int value);
-};
-
 #endif /* __ASM_MACH_EP93XX_SPI_H */
-- 
cgit v1.2.3


From bd7e5b0899a429445cc6e3037c13f8b5ae3be903 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 Feb 2017 21:18:52 -0800
Subject: KVM: x86: remove code for lazy FPU handling

The FPU is always active now when running KVM.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2db458ee94b0..8d69d5150748 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -221,7 +221,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid *pid;
-- 
cgit v1.2.3


From da20420f83ea0fbcf3d03afda08d971ea1d8a356 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 11 Feb 2017 19:26:47 +0800
Subject: rhashtable: Add nested tables

This patch adds code that handles GFP_ATOMIC kmalloc failure on
insertion.  As we cannot use vmalloc, we solve it by making our
hash table nested.  That is, we allocate single pages at each level
and reach our desired table size by nesting them.

When a nested table is created, only a single page is allocated
at the top-level.  Lower levels are allocated on demand during
insertion.  Therefore for each insertion to succeed, only two
(non-consecutive) pages are needed.

After a nested table is created, a rehash will be scheduled in
order to switch to a vmalloced table as soon as possible.  Also,
the rehash code will never rehash into a nested table.  If we
detect a nested table during a rehash, the rehash will be aborted
and a new rehash will be scheduled.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 78 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5c132d3188be..f2e12a845910 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,6 +61,7 @@ struct rhlist_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @nest: Number of bits of first-level nested table.
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
@@ -68,10 +69,12 @@ struct rhlist_head {
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
  * @future_tbl: Table under construction during rehashing
+ * @ntbl: Nested table used when out of memory.
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	unsigned int		size;
+	unsigned int		nest;
 	unsigned int		rehash;
 	u32			hash_rnd;
 	unsigned int		locks_mask;
@@ -81,7 +84,7 @@ struct bucket_table {
 
 	struct bucket_table __rcu *future_tbl;
 
-	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
 /**
@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash);
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+						   struct bucket_table *tbl,
+						   unsigned int hash);
+
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
+static inline struct rhash_head __rcu *const *rht_bucket(
+	const struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_var(
+	struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_insert(
+	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
 /**
  * rht_for_each_continue - continue iterating over hash chain
  * @pos:	the &struct rhash_head to use as a loop cursor.
@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @hash:	the hash value / bucket index
  */
 #define rht_for_each(pos, tbl, hash) \
-	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_continue - continue iterating over hash chain
@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @member:	name of the &struct rhash_head within the hashable struct.
  */
 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
+	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
 				    tbl, hash, member)
 
 /**
@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht);
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
-	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
-	     next = !rht_is_a_nulls(pos) ?				    \
-		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
-	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
-	     pos = next,						    \
-	     next = !rht_is_a_nulls(pos) ?				    \
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
+	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
+	     next = !rht_is_a_nulls(pos) ?				      \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
+	     pos = next,						      \
+	     next = !rht_is_a_nulls(pos) ?				      \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_rcu(pos, tbl, hash)				\
-	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht);
  * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
+	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
 					tbl, hash, member)
 
 /**
@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
 
@@ -697,8 +727,12 @@ slow_path:
 	}
 
 	elasticity = ht->elasticity;
-	pprev = &tbl->buckets[hash];
-	rht_for_each(head, tbl, hash) {
+	pprev = rht_bucket_insert(ht, tbl, hash);
+	data = ERR_PTR(-ENOMEM);
+	if (!pprev)
+		goto out;
+
+	rht_for_each_continue(head, *pprev, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -736,7 +770,7 @@ slow_path:
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	head = rht_dereference_bucket(*pprev, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -746,7 +780,7 @@ slow_path:
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	rcu_assign_pointer(*pprev, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 
 		list = container_of(he, struct rhlist_head, rhead);
@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		if (he != obj_old) {
 			pprev = &he->next;
 			continue;
-- 
cgit v1.2.3


From 9383191da4e40360a5d880fbe6bb03911c61621b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 16 Feb 2017 22:24:49 +0100
Subject: bpf: remove stubs for cBPF from arch code

Remove the dummy bpf_jit_compile() stubs for eBPF JITs and make
that a single __weak function in the core that can be overridden
similarly to the eBPF one. Also remove stale pr_err() mentions
of bpf_jit_compile.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index e4eb2546339a..c7a70e0cc3a0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -607,6 +607,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
+void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
@@ -625,7 +626,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
 void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 
-void bpf_jit_compile(struct bpf_prog *fp);
 void bpf_jit_free(struct bpf_prog *fp);
 
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
@@ -669,10 +669,6 @@ static inline bool bpf_jit_blinding_enabled(void)
 	return true;
 }
 #else
-static inline void bpf_jit_compile(struct bpf_prog *fp)
-{
-}
-
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_free(fp);
-- 
cgit v1.2.3


From 74451e66d516c55e309e8d89a4a1e7596e46aacd Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 16 Feb 2017 22:24:50 +0100
Subject: bpf: make jited programs visible in traces

Long standing issue with JITed programs is that stack traces from
function tracing check whether a given address is kernel code
through {__,}kernel_text_address(), which checks for code in core
kernel, modules and dynamically allocated ftrace trampolines. But
what is still missing is BPF JITed programs (interpreted programs
are not an issue as __bpf_prog_run() will be attributed to them),
thus when a stack trace is triggered, the code walking the stack
won't see any of the JITed ones. The same for address correlation
done from user space via reading /proc/kallsyms. This is read by
tools like perf, but the latter is also useful for permanent live
tracing with eBPF itself in combination with stack maps when other
eBPF types are part of the callchain. See offwaketime example on
dumping stack from a map.

This work tries to tackle that issue by making the addresses and
symbols known to the kernel. The lookup from *kernel_text_address()
is implemented through a latched RB tree that can be read under
RCU in fast-path that is also shared for symbol/size/offset lookup
for a specific given address in kallsyms. The slow-path iteration
through all symbols in the seq file done via RCU list, which holds
a tiny fraction of all exported ksyms, usually below 0.1 percent.
Function symbols are exported as bpf_prog_<tag>, in order to aide
debugging and attribution. This facility is currently enabled for
root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening
is active in any mode. The rationale behind this is that still a lot
of systems ship with world read permissions on kallsyms thus addresses
should not get suddenly exposed for them. If that situation gets
much better in future, we always have the option to change the
default on this. Likewise, unprivileged programs are not allowed
to add entries there either, but that is less of a concern as most
such programs types relevant in this context are for root-only anyway.
If enabled, call graphs and stack traces will then show a correct
attribution; one example is illustrated below, where the trace is
now visible in tooling such as perf script --kallsyms=/proc/kallsyms
and friends.

Before:

  7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux)
         f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so)

After:

  7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux)
  [...]
  7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux)
  7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux)
         f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so)

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    |   4 ++
 include/linux/filter.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 57d60dc5b600..909fc033173a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -8,10 +8,12 @@
 #define _LINUX_BPF_H 1
 
 #include <uapi/linux/bpf.h>
+
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/percpu.h>
 #include <linux/err.h>
+#include <linux/rbtree_latch.h>
 
 struct perf_event;
 struct bpf_map;
@@ -177,6 +179,8 @@ struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
 	u32 max_ctx_offset;
+	struct latch_tree_node ksym_tnode;
+	struct list_head ksym_lnode;
 	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
 	struct bpf_prog *prog;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c7a70e0cc3a0..0c1cc9143cb2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -54,6 +54,12 @@ struct bpf_prog_aux;
 #define BPF_REG_AX		MAX_BPF_REG
 #define MAX_BPF_JIT_REG		(MAX_BPF_REG + 1)
 
+/* As per nm, we expose JITed images as text (code) section for
+ * kallsyms. That way, tools like perf can find it to match
+ * addresses.
+ */
+#define BPF_SYM_ELF_TYPE	't'
+
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
@@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
 	set_memory_rw((unsigned long)fp, fp->pages);
 }
+
+static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
+{
+	set_memory_rw((unsigned long)hdr, hdr->pages);
+}
 #else
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
@@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
 }
+
+static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
+{
+}
 #endif /* CONFIG_DEBUG_SET_MODULE_RONX */
 
+static inline struct bpf_binary_header *
+bpf_jit_binary_hdr(const struct bpf_prog *fp)
+{
+	unsigned long real_start = (unsigned long)fp->bpf_func;
+	unsigned long addr = real_start & PAGE_MASK;
+
+	return (void *)addr;
+}
+
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
@@ -617,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act);
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
 extern int bpf_jit_harden;
+extern int bpf_jit_kallsyms;
 
 typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 
@@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void)
 # endif
 }
 
+static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
+{
+	return fp->jited && bpf_jit_is_ebpf();
+}
+
 static inline bool bpf_jit_blinding_enabled(void)
 {
 	/* These are the prerequisites, should someone ever have the
@@ -668,11 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void)
 
 	return true;
 }
-#else
+
+static inline bool bpf_jit_kallsyms_enabled(void)
+{
+	/* There are a couple of corner cases where kallsyms should
+	 * not be enabled f.e. on hardening.
+	 */
+	if (bpf_jit_harden)
+		return false;
+	if (!bpf_jit_kallsyms)
+		return false;
+	if (bpf_jit_kallsyms == 1)
+		return true;
+
+	return false;
+}
+
+const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+				 unsigned long *off, char *sym);
+bool is_bpf_text_address(unsigned long addr);
+int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+		    char *sym);
+
+static inline const char *
+bpf_address_lookup(unsigned long addr, unsigned long *size,
+		   unsigned long *off, char **modname, char *sym)
+{
+	const char *ret = __bpf_address_lookup(addr, size, off, sym);
+
+	if (ret && modname)
+		*modname = NULL;
+	return ret;
+}
+
+void bpf_prog_kallsyms_add(struct bpf_prog *fp);
+void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+
+#else /* CONFIG_BPF_JIT */
+
+static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
+{
+	return false;
+}
+
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_free(fp);
 }
+
+static inline bool bpf_jit_kallsyms_enabled(void)
+{
+	return false;
+}
+
+static inline const char *
+__bpf_address_lookup(unsigned long addr, unsigned long *size,
+		     unsigned long *off, char *sym)
+{
+	return NULL;
+}
+
+static inline bool is_bpf_text_address(unsigned long addr)
+{
+	return false;
+}
+
+static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
+				  char *type, char *sym)
+{
+	return -ERANGE;
+}
+
+static inline const char *
+bpf_address_lookup(unsigned long addr, unsigned long *size,
+		   unsigned long *off, char **modname, char *sym)
+{
+	return NULL;
+}
+
+static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
+{
+}
 #endif /* CONFIG_BPF_JIT */
 
 #define BPF_ANC		BIT(15)
-- 
cgit v1.2.3


From 4f1244c8298606b8fae64b4d78b820ae6b896e3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Feb 2017 13:59:39 +0100
Subject: block/sed-opal: allocate struct opal_dev dynamically

Insted of bloating the containing structure with it all the time this
allocates struct opal_dev dynamically.  Additionally this allows moving
the definition of struct opal_dev into sed-opal.c.  For this a new
private data field is added to it that is passed to the send/receive
callback.  After that a lot of internals can be made private as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Scott Bauer <scott.bauer@intel.com>
Reviewed-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/sed-opal.h | 116 ++---------------------------------------------
 1 file changed, 4 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 205d520ea688..deee23d012e7 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -21,117 +21,14 @@
 #include <uapi/linux/sed-opal.h>
 #include <linux/kernel.h>
 
-/*
- * These constant values come from:
- * SPC-4 section
- * 6.30 SECURITY PROTOCOL IN command / table 265.
- */
-enum {
-	TCG_SECP_00 = 0,
-	TCG_SECP_01,
-};
 struct opal_dev;
 
-#define IO_BUFFER_LENGTH 2048
-#define MAX_TOKS 64
-
-typedef int (*opal_step)(struct opal_dev *dev);
-typedef int (sec_send_recv)(struct opal_dev *ctx, u16 spsp, u8 secp,
-			    void *buffer, size_t len, bool send);
-
-
-enum opal_atom_width {
-	OPAL_WIDTH_TINY,
-	OPAL_WIDTH_SHORT,
-	OPAL_WIDTH_MEDIUM,
-	OPAL_WIDTH_LONG,
-	OPAL_WIDTH_TOKEN
-};
-
-/*
- * Token defs derived from:
- * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
- * 3.2.2 Data Stream Encoding
- */
-enum opal_response_token {
-	OPAL_DTA_TOKENID_BYTESTRING = 0xe0,
-	OPAL_DTA_TOKENID_SINT = 0xe1,
-	OPAL_DTA_TOKENID_UINT = 0xe2,
-	OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */
-	OPAL_DTA_TOKENID_INVALID = 0X0
-};
-
-/*
- * On the parsed response, we don't store again the toks that are already
- * stored in the response buffer. Instead, for each token, we just store a
- * pointer to the position in the buffer where the token starts, and the size
- * of the token in bytes.
- */
-struct opal_resp_tok {
-	const u8 *pos;
-	size_t len;
-	enum opal_response_token type;
-	enum opal_atom_width width;
-	union {
-		u64 u;
-		s64 s;
-	} stored;
-};
-
-/*
- * From the response header it's not possible to know how many tokens there are
- * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later
- * if we start dealing with messages that have more than that, we can increase
- * this number. This is done to avoid having to make two passes through the
- * response, the first one counting how many tokens we have and the second one
- * actually storing the positions.
- */
-struct parsed_resp {
-	int num;
-	struct opal_resp_tok toks[MAX_TOKS];
-};
-
-/**
- * struct opal_dev - The structure representing a OPAL enabled SED.
- * @supported: Whether or not OPAL is supported on this controller.
- * @send_recv: The combined sec_send/sec_recv function pointer.
- * @opal_step: A series of opal methods that are necessary to complete a command.
- * @func_data: An array of parameters for the opal methods above.
- * @state: Describes the current opal_step we're working on.
- * @dev_lock: Locks the entire opal_dev structure.
- * @parsed: Parsed response from controller.
- * @prev_data: Data returned from a method to the controller.
- * @unlk_lst: A list of Locking ranges to unlock on this device during a resume.
- */
-struct opal_dev {
-	bool initialized;
-	bool supported;
-	sec_send_recv *send_recv;
-
-	const opal_step *funcs;
-	void **func_data;
-	int state;
-	struct mutex dev_lock;
-	u16 comid;
-	u32 hsn;
-	u32 tsn;
-	u64 align;
-	u64 lowest_lba;
-
-	size_t pos;
-	u8 cmd[IO_BUFFER_LENGTH];
-	u8 resp[IO_BUFFER_LENGTH];
-
-	struct parsed_resp parsed;
-	size_t prev_d_len;
-	void *prev_data;
-
-	struct list_head unlk_lst;
-};
+typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
+		size_t len, bool send);
 
 #ifdef CONFIG_BLK_SED_OPAL
 bool opal_unlock_from_suspend(struct opal_dev *dev);
-void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv);
+struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
 
 static inline bool is_sed_ioctl(unsigned int cmd)
@@ -168,11 +65,6 @@ static inline bool opal_unlock_from_suspend(struct opal_dev *dev)
 {
 	return false;
 }
-static inline void init_opal_dev(struct opal_dev *opal_dev,
-				 sec_send_recv *send_recv)
-{
-	opal_dev->supported = false;
-	opal_dev->initialized = true;
-}
+#define init_opal_dev(data, send_recv)		NULL
 #endif /* CONFIG_BLK_SED_OPAL */
 #endif /* LINUX_OPAL_H */
-- 
cgit v1.2.3


From 8a9ae523282f324989850fcf41312b42a2fb9296 Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Fri, 17 Feb 2017 13:59:40 +0100
Subject: nvme: Check for Security send/recv support before issuing commands.

We need to verify that the controller supports the security
commands before actually trying to issue them.

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
[hch: moved the check so that we don't call into the OPAL code if not
      supported]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3d1c6f1b15c9..00eac863a9c7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -244,6 +244,7 @@ enum {
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
+	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 };
 
 struct nvme_lbaf {
-- 
cgit v1.2.3


From 1e128c81290a419ab9ec8b09fe989f1c6c15a0f4 Mon Sep 17 00:00:00 2001
From: Arun Easi <arun.easi@qlogic.com>
Date: Wed, 15 Feb 2017 06:28:22 -0800
Subject: qed: Add support for hardware offloaded FCoE.

This adds the backbone required for the various HW initalizations
which are necessary for the FCoE driver (qedf) for QLogic FastLinQ
4xxxx line of adapters - FW notification, resource initializations, etc.

Signed-off-by: Arun Easi <arun.easi@cavium.com>
Signed-off-by: Yuval Mintz <yuval.mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h  |  10 +-
 include/linux/qed/fcoe_common.h | 715 ++++++++++++++++++++++++++++++++++++++++
 include/linux/qed/qed_fcoe_if.h | 145 ++++++++
 include/linux/qed/qed_if.h      |  41 ++-
 4 files changed, 908 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/qed/fcoe_common.h
 create mode 100644 include/linux/qed/qed_fcoe_if.h

(limited to 'include/linux')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index c33080baf38c..52966b9bfde3 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -62,6 +62,7 @@
 #define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE        64
 
 #define ISCSI_CDU_TASK_SEG_TYPE       0
+#define FCOE_CDU_TASK_SEG_TYPE        0
 #define RDMA_CDU_TASK_SEG_TYPE        1
 
 #define FW_ASSERT_GENERAL_ATTN_IDX    32
@@ -205,6 +206,9 @@
 #define	DQ_XCM_ETH_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
 #define	DQ_XCM_ETH_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
 #define	DQ_XCM_ETH_GO_TO_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD5
+#define DQ_XCM_FCOE_SQ_CONS_CMD             DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_FCOE_SQ_PROD_CMD             DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_FCOE_X_FERQ_PROD_CMD         DQ_XCM_AGG_VAL_SEL_WORD5
 #define DQ_XCM_ISCSI_SQ_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
 #define DQ_XCM_ISCSI_SQ_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
 #define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3
@@ -261,6 +265,7 @@
 #define DQ_XCM_ETH_TERMINATE_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
 #define DQ_XCM_ETH_SLOW_PATH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
 #define DQ_XCM_ETH_TPH_EN_CMD		BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_FCOE_SLOW_PATH_CMD           BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
 #define DQ_XCM_ISCSI_DQ_FLUSH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
 #define DQ_XCM_ISCSI_SLOW_PATH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
 #define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
@@ -291,6 +296,9 @@
 #define DQ_TCM_AGG_FLG_SHIFT_CF6	6
 #define DQ_TCM_AGG_FLG_SHIFT_CF7	7
 /* TCM agg counter flag selection (FW) */
+#define DQ_TCM_FCOE_FLUSH_Q0_CMD            BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_FCOE_DUMMY_TIMER_CMD         BIT(DQ_TCM_AGG_FLG_SHIFT_CF2)
+#define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD      BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
 #define DQ_TCM_ISCSI_FLUSH_Q0_CMD	BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
 #define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD	BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
 
@@ -728,7 +736,7 @@ enum mf_mode {
 /* Per-protocol connection types */
 enum protocol_type {
 	PROTOCOLID_ISCSI,
-	PROTOCOLID_RESERVED2,
+	PROTOCOLID_FCOE,
 	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
 	PROTOCOLID_ETH,
diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h
new file mode 100644
index 000000000000..2e417a45c5f7
--- /dev/null
+++ b/include/linux/qed/fcoe_common.h
@@ -0,0 +1,715 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef __FCOE_COMMON__
+#define __FCOE_COMMON__
+/*********************/
+/* FCOE FW CONSTANTS */
+/*********************/
+
+#define FC_ABTS_REPLY_MAX_PAYLOAD_LEN	12
+#define FCOE_MAX_SIZE_FCP_DATA_SUPER	(8600)
+
+struct fcoe_abts_pkt {
+	__le32 abts_rsp_fc_payload_lo;
+	__le16 abts_rsp_rx_id;
+	u8 abts_rsp_rctl;
+	u8 reserved2;
+};
+
+/* FCoE additional WQE (Sq/XferQ) information */
+union fcoe_additional_info_union {
+	__le32 previous_tid;
+	__le32 parent_tid;
+	__le32 burst_length;
+	__le32 seq_rec_updated_offset;
+};
+
+struct fcoe_exp_ro {
+	__le32 data_offset;
+	__le32 reserved;
+};
+
+union fcoe_cleanup_addr_exp_ro_union {
+	struct regpair abts_rsp_fc_payload_hi;
+	struct fcoe_exp_ro exp_ro;
+};
+
+/* FCoE Ramrod Command IDs */
+enum fcoe_completion_status {
+	FCOE_COMPLETION_STATUS_SUCCESS,
+	FCOE_COMPLETION_STATUS_FCOE_VER_ERR,
+	FCOE_COMPLETION_STATUS_SRC_MAC_ADD_ARR_ERR,
+	MAX_FCOE_COMPLETION_STATUS
+};
+
+struct fc_addr_nw {
+	u8 addr_lo;
+	u8 addr_mid;
+	u8 addr_hi;
+};
+
+/* FCoE connection offload */
+struct fcoe_conn_offload_ramrod_data {
+	struct regpair sq_pbl_addr;
+	struct regpair sq_curr_page_addr;
+	struct regpair sq_next_page_addr;
+	struct regpair xferq_pbl_addr;
+	struct regpair xferq_curr_page_addr;
+	struct regpair xferq_next_page_addr;
+	struct regpair respq_pbl_addr;
+	struct regpair respq_curr_page_addr;
+	struct regpair respq_next_page_addr;
+	__le16 dst_mac_addr_lo;
+	__le16 dst_mac_addr_mid;
+	__le16 dst_mac_addr_hi;
+	__le16 src_mac_addr_lo;
+	__le16 src_mac_addr_mid;
+	__le16 src_mac_addr_hi;
+	__le16 tx_max_fc_pay_len;
+	__le16 e_d_tov_timer_val;
+	__le16 rx_max_fc_pay_len;
+	__le16 vlan_tag;
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_MASK              0xFFF
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_SHIFT             0
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_CFI_MASK                  0x1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_CFI_SHIFT                 12
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_MASK             0x7
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_SHIFT            13
+	__le16 physical_q0;
+	__le16 rec_rr_tov_timer_val;
+	struct fc_addr_nw s_id;
+	u8 max_conc_seqs_c3;
+	struct fc_addr_nw d_id;
+	u8 flags;
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONT_INCR_SEQ_CNT_MASK  0x1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONT_INCR_SEQ_CNT_SHIFT 0
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_MASK           0x1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_SHIFT          1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_MASK          0x1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_SHIFT         2
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_MASK          0x1
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_SHIFT         3
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_MODE_MASK                 0x3
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_MODE_SHIFT                4
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_RESERVED0_MASK            0x3
+#define FCOE_CONN_OFFLOAD_RAMROD_DATA_RESERVED0_SHIFT           6
+	__le16 conn_id;
+	u8 def_q_idx;
+	u8 reserved[5];
+};
+
+/* FCoE terminate connection request */
+struct fcoe_conn_terminate_ramrod_data {
+	struct regpair terminate_params_addr;
+};
+
+struct fcoe_fast_sgl_ctx {
+	struct regpair sgl_start_addr;
+	__le32 sgl_byte_offset;
+	__le16 task_reuse_cnt;
+	__le16 init_offset_in_first_sge;
+};
+
+struct fcoe_slow_sgl_ctx {
+	struct regpair base_sgl_addr;
+	__le16 curr_sge_off;
+	__le16 remainder_num_sges;
+	__le16 curr_sgl_index;
+	__le16 reserved;
+};
+
+struct fcoe_sge {
+	struct regpair sge_addr;
+	__le16 size;
+	__le16 reserved0;
+	u8 reserved1[3];
+	u8 is_valid_sge;
+};
+
+union fcoe_data_desc_ctx {
+	struct fcoe_fast_sgl_ctx fast;
+	struct fcoe_slow_sgl_ctx slow;
+	struct fcoe_sge single_sge;
+};
+
+union fcoe_dix_desc_ctx {
+	struct fcoe_slow_sgl_ctx dix_sgl;
+	struct fcoe_sge cached_dix_sge;
+};
+
+struct fcoe_fcp_cmd_payload {
+	__le32 opaque[8];
+};
+
+struct fcoe_fcp_rsp_payload {
+	__le32 opaque[6];
+};
+
+struct fcoe_fcp_xfer_payload {
+	__le32 opaque[3];
+};
+
+/* FCoE firmware function init */
+struct fcoe_init_func_ramrod_data {
+	struct scsi_init_func_params func_params;
+	struct scsi_init_func_queues q_params;
+	__le16 mtu;
+	__le16 sq_num_pages_in_pbl;
+	__le32 reserved;
+};
+
+/* FCoE: Mode of the connection: Target or Initiator or both */
+enum fcoe_mode_type {
+	FCOE_INITIATOR_MODE = 0x0,
+	FCOE_TARGET_MODE = 0x1,
+	FCOE_BOTH_OR_NOT_CHOSEN = 0x3,
+	MAX_FCOE_MODE_TYPE
+};
+
+struct fcoe_mstorm_fcoe_task_st_ctx_fp {
+	__le16 flags;
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_MASK                 0x7FFF
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_SHIFT                0
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_MASK  0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_SHIFT 15
+	__le16 difDataResidue;
+	__le16 parent_id;
+	__le16 single_sge_saved_offset;
+	__le32 data_2_trns_rem;
+	__le32 offset_in_io;
+	union fcoe_dix_desc_ctx dix_desc;
+	union fcoe_data_desc_ctx data_desc;
+};
+
+struct fcoe_mstorm_fcoe_task_st_ctx_non_fp {
+	__le16 flags;
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_MASK            0x3
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_SHIFT           0
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_MASK               0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_SHIFT              2
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_MASK      0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_SHIFT     3
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_MASK         0xF
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_SHIFT        4
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_MASK            0x3
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_SHIFT           8
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_MASK                  0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_SHIFT                 10
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_MASK  0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_SHIFT 11
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_MASK      0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_SHIFT     12
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_MASK        0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_SHIFT       13
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_MASK        0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_SHIFT       14
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_MASK             0x1
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_SHIFT            15
+	u8 tx_rx_sgl_mode;
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_MASK               0x7
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_SHIFT              0
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_MASK               0x7
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_SHIFT              3
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_MASK                     0x3
+#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_SHIFT                    6
+	u8 rsrv2;
+	__le32 num_prm_zero_read;
+	struct regpair rsp_buf_addr;
+};
+
+struct fcoe_rx_stat {
+	struct regpair fcoe_rx_byte_cnt;
+	struct regpair fcoe_rx_data_pkt_cnt;
+	struct regpair fcoe_rx_xfer_pkt_cnt;
+	struct regpair fcoe_rx_other_pkt_cnt;
+	__le32 fcoe_silent_drop_pkt_cmdq_full_cnt;
+	__le32 fcoe_silent_drop_pkt_rq_full_cnt;
+	__le32 fcoe_silent_drop_pkt_crc_error_cnt;
+	__le32 fcoe_silent_drop_pkt_task_invalid_cnt;
+	__le32 fcoe_silent_drop_total_pkt_cnt;
+	__le32 rsrv;
+};
+
+enum fcoe_sgl_mode {
+	FCOE_SLOW_SGL,
+	FCOE_SINGLE_FAST_SGE,
+	FCOE_2_FAST_SGE,
+	FCOE_3_FAST_SGE,
+	FCOE_4_FAST_SGE,
+	FCOE_MUL_FAST_SGES,
+	MAX_FCOE_SGL_MODE
+};
+
+struct fcoe_stat_ramrod_data {
+	struct regpair stat_params_addr;
+};
+
+struct protection_info_ctx {
+	__le16 flags;
+#define PROTECTION_INFO_CTX_HOST_INTERFACE_MASK        0x3
+#define PROTECTION_INFO_CTX_HOST_INTERFACE_SHIFT       0
+#define PROTECTION_INFO_CTX_DIF_TO_PEER_MASK           0x1
+#define PROTECTION_INFO_CTX_DIF_TO_PEER_SHIFT          2
+#define PROTECTION_INFO_CTX_VALIDATE_DIX_APP_TAG_MASK  0x1
+#define PROTECTION_INFO_CTX_VALIDATE_DIX_APP_TAG_SHIFT 3
+#define PROTECTION_INFO_CTX_INTERVAL_SIZE_LOG_MASK     0xF
+#define PROTECTION_INFO_CTX_INTERVAL_SIZE_LOG_SHIFT    4
+#define PROTECTION_INFO_CTX_VALIDATE_DIX_REF_TAG_MASK  0x1
+#define PROTECTION_INFO_CTX_VALIDATE_DIX_REF_TAG_SHIFT 8
+#define PROTECTION_INFO_CTX_RESERVED0_MASK             0x7F
+#define PROTECTION_INFO_CTX_RESERVED0_SHIFT            9
+	u8 dix_block_size;
+	u8 dst_size;
+};
+
+union protection_info_union_ctx {
+	struct protection_info_ctx info;
+	__le32 value;
+};
+
+struct fcp_rsp_payload_padded {
+	struct fcoe_fcp_rsp_payload rsp_payload;
+	__le32 reserved[2];
+};
+
+struct fcp_xfer_payload_padded {
+	struct fcoe_fcp_xfer_payload xfer_payload;
+	__le32 reserved[5];
+};
+
+struct fcoe_tx_data_params {
+	__le32 data_offset;
+	__le32 offset_in_io;
+	u8 flags;
+#define FCOE_TX_DATA_PARAMS_OFFSET_IN_IO_VALID_MASK  0x1
+#define FCOE_TX_DATA_PARAMS_OFFSET_IN_IO_VALID_SHIFT 0
+#define FCOE_TX_DATA_PARAMS_DROP_DATA_MASK           0x1
+#define FCOE_TX_DATA_PARAMS_DROP_DATA_SHIFT          1
+#define FCOE_TX_DATA_PARAMS_AFTER_SEQ_REC_MASK       0x1
+#define FCOE_TX_DATA_PARAMS_AFTER_SEQ_REC_SHIFT      2
+#define FCOE_TX_DATA_PARAMS_RESERVED0_MASK           0x1F
+#define FCOE_TX_DATA_PARAMS_RESERVED0_SHIFT          3
+	u8 dif_residual;
+	__le16 seq_cnt;
+	__le16 single_sge_saved_offset;
+	__le16 next_dif_offset;
+	__le16 seq_id;
+	__le16 reserved3;
+};
+
+struct fcoe_tx_mid_path_params {
+	__le32 parameter;
+	u8 r_ctl;
+	u8 type;
+	u8 cs_ctl;
+	u8 df_ctl;
+	__le16 rx_id;
+	__le16 ox_id;
+};
+
+struct fcoe_tx_params {
+	struct fcoe_tx_data_params data;
+	struct fcoe_tx_mid_path_params mid_path;
+};
+
+union fcoe_tx_info_union_ctx {
+	struct fcoe_fcp_cmd_payload fcp_cmd_payload;
+	struct fcp_rsp_payload_padded fcp_rsp_payload;
+	struct fcp_xfer_payload_padded fcp_xfer_payload;
+	struct fcoe_tx_params tx_params;
+};
+
+struct ystorm_fcoe_task_st_ctx {
+	u8 task_type;
+	u8 sgl_mode;
+#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK  0x7
+#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 0
+#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK         0x1F
+#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT        3
+	u8 cached_dix_sge;
+	u8 expect_first_xfer;
+	__le32 num_pbf_zero_write;
+	union protection_info_union_ctx protection_info_union;
+	__le32 data_2_trns_rem;
+	union fcoe_tx_info_union_ctx tx_info_union;
+	union fcoe_dix_desc_ctx dix_desc;
+	union fcoe_data_desc_ctx data_desc;
+	__le16 ox_id;
+	__le16 rx_id;
+	__le32 task_rety_identifier;
+	__le32 reserved1[2];
+};
+
+struct ystorm_fcoe_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 word0;
+	u8 flags0;
+#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK     0xF
+#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT    0
+#define YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK        0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT       4
+#define YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK        0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT       5
+#define YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK        0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT       6
+#define YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK        0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT       7
+	u8 flags1;
+#define YSTORM_FCOE_TASK_AG_CTX_CF0_MASK         0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT        0
+#define YSTORM_FCOE_TASK_AG_CTX_CF1_MASK         0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT        2
+#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK  0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT 4
+#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK       0x1
+#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT      6
+#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK       0x1
+#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT      7
+	u8 flags2;
+#define YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK        0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT       0
+#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT    1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT    2
+#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT    3
+#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT    4
+#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT    5
+#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT    6
+#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK     0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT    7
+	u8 byte2;
+	__le32 reg0;
+	u8 byte3;
+	u8 byte4;
+	__le16 rx_id;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le32 reg1;
+	__le32 reg2;
+};
+
+struct tstorm_fcoe_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 icid;
+	u8 flags0;
+#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK     0xF
+#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT    0
+#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK        0x1
+#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT       4
+#define TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK                0x1
+#define TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT               5
+#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK     0x1
+#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT    6
+#define TSTORM_FCOE_TASK_AG_CTX_VALID_MASK               0x1
+#define TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT              7
+	u8 flags1;
+#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK        0x1
+#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT       0
+#define TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK                0x1
+#define TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT               1
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK       0x3
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT      2
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK           0x3
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT          4
+#define TSTORM_FCOE_TASK_AG_CTX_CF2_MASK                 0x3
+#define TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT                6
+	u8 flags2;
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK      0x3
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT     0
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK       0x3
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT      2
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK         0x3
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT        4
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK     0x3
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT    6
+	u8 flags3;
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK       0x3
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT      0
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK    0x1
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT   2
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK        0x1
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT       3
+#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK               0x1
+#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT              4
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK   0x1
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT  5
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK    0x1
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT   6
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK      0x1
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT     7
+	u8 flags4;
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK  0x1
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT 0
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK    0x1
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT   1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT            2
+#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT            3
+#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT            4
+#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT            5
+#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT            6
+#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK             0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT            7
+	u8 cleanup_state;
+	__le16 last_sent_tid;
+	__le32 rec_rr_tov_exp_timeout;
+	u8 byte3;
+	u8 byte4;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 data_offset_end_of_seq;
+	__le32 data_offset_next;
+};
+
+struct fcoe_tstorm_fcoe_task_st_ctx_read_write {
+	union fcoe_cleanup_addr_exp_ro_union cleanup_addr_exp_ro_union;
+	__le16 flags;
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK       0x7
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_SHIFT      0
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_MASK   0x1
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT  3
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_MASK        0x1
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT       4
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_MASK       0x1
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT      5
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_MASK  0x1
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 6
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_MASK   0x1
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT  7
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_MASK        0x3
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT       8
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK             0x3F
+#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT            10
+	__le16 seq_cnt;
+	u8 seq_id;
+	u8 ooo_rx_seq_id;
+	__le16 rx_id;
+	struct fcoe_abts_pkt abts_data;
+	__le32 e_d_tov_exp_timeout_val;
+	__le16 ooo_rx_seq_cnt;
+	__le16 reserved1;
+};
+
+struct fcoe_tstorm_fcoe_task_st_ctx_read_only {
+	u8 task_type;
+	u8 dev_type;
+	u8 conf_supported;
+	u8 glbl_q_num;
+	__le32 cid;
+	__le32 fcp_cmd_trns_size;
+	__le32 rsrv;
+};
+
+struct tstorm_fcoe_task_st_ctx {
+	struct fcoe_tstorm_fcoe_task_st_ctx_read_write read_write;
+	struct fcoe_tstorm_fcoe_task_st_ctx_read_only read_only;
+};
+
+struct mstorm_fcoe_task_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	__le16 icid;
+	u8 flags0;
+#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK    0xF
+#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT   0
+#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK       0x1
+#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT      4
+#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK         0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT        5
+#define MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK               0x1
+#define MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT              6
+#define MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK               0x1
+#define MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT              7
+	u8 flags1;
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK      0x3
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT     0
+#define MSTORM_FCOE_TASK_AG_CTX_CF1_MASK                0x3
+#define MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT               2
+#define MSTORM_FCOE_TASK_AG_CTX_CF2_MASK                0x3
+#define MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT               4
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK   0x1
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT  6
+#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK              0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT             7
+	u8 flags2;
+#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK              0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT             0
+#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT           1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT           2
+#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT           3
+#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT           4
+#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT           5
+#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK  0x1
+#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT 6
+#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK            0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT           7
+	u8 cleanup_state;
+	__le32 received_bytes;
+	u8 byte3;
+	u8 glbl_q_num;
+	__le16 word1;
+	__le16 tid_to_xfer;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le32 expected_bytes;
+	__le32 reg2;
+};
+
+struct mstorm_fcoe_task_st_ctx {
+	struct fcoe_mstorm_fcoe_task_st_ctx_non_fp non_fp;
+	struct fcoe_mstorm_fcoe_task_st_ctx_fp fp;
+};
+
+struct ustorm_fcoe_task_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	__le16 icid;
+	u8 flags0;
+#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK  0xF
+#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
+#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK     0x1
+#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT    4
+#define USTORM_FCOE_TASK_AG_CTX_BIT1_MASK             0x1
+#define USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT            5
+#define USTORM_FCOE_TASK_AG_CTX_CF0_MASK              0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT             6
+	u8 flags1;
+#define USTORM_FCOE_TASK_AG_CTX_CF1_MASK              0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT             0
+#define USTORM_FCOE_TASK_AG_CTX_CF2_MASK              0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT             2
+#define USTORM_FCOE_TASK_AG_CTX_CF3_MASK              0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT             4
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK     0x3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT    6
+	u8 flags2;
+#define USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK            0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT           0
+#define USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK            0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT           1
+#define USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK            0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT           2
+#define USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK            0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT           3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK  0x1
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4
+#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT         5
+#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT         6
+#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT         7
+	u8 flags3;
+#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT         0
+#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT         1
+#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT         2
+#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK          0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT         3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK   0xF
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT  4
+	__le32 dif_err_intervals;
+	__le32 dif_error_1st_interval;
+	__le32 global_cq_num;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+};
+
+struct fcoe_task_context {
+	struct ystorm_fcoe_task_st_ctx ystorm_st_context;
+	struct tdif_task_context tdif_context;
+	struct ystorm_fcoe_task_ag_ctx ystorm_ag_context;
+	struct tstorm_fcoe_task_ag_ctx tstorm_ag_context;
+	struct timers_context timer_context;
+	struct tstorm_fcoe_task_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
+	struct mstorm_fcoe_task_ag_ctx mstorm_ag_context;
+	struct mstorm_fcoe_task_st_ctx mstorm_st_context;
+	struct ustorm_fcoe_task_ag_ctx ustorm_ag_context;
+	struct rdif_task_context rdif_context;
+};
+
+struct fcoe_tx_stat {
+	struct regpair fcoe_tx_byte_cnt;
+	struct regpair fcoe_tx_data_pkt_cnt;
+	struct regpair fcoe_tx_xfer_pkt_cnt;
+	struct regpair fcoe_tx_other_pkt_cnt;
+};
+
+struct fcoe_wqe {
+	__le16 task_id;
+	__le16 flags;
+#define FCOE_WQE_REQ_TYPE_MASK        0xF
+#define FCOE_WQE_REQ_TYPE_SHIFT       0
+#define FCOE_WQE_SGL_MODE_MASK        0x7
+#define FCOE_WQE_SGL_MODE_SHIFT       4
+#define FCOE_WQE_CONTINUATION_MASK    0x1
+#define FCOE_WQE_CONTINUATION_SHIFT   7
+#define FCOE_WQE_INVALIDATE_PTU_MASK  0x1
+#define FCOE_WQE_INVALIDATE_PTU_SHIFT 8
+#define FCOE_WQE_SUPER_IO_MASK        0x1
+#define FCOE_WQE_SUPER_IO_SHIFT       9
+#define FCOE_WQE_SEND_AUTO_RSP_MASK   0x1
+#define FCOE_WQE_SEND_AUTO_RSP_SHIFT  10
+#define FCOE_WQE_RESERVED0_MASK       0x1F
+#define FCOE_WQE_RESERVED0_SHIFT      11
+	union fcoe_additional_info_union additional_info_union;
+};
+
+struct xfrqe_prot_flags {
+	u8 flags;
+#define XFRQE_PROT_FLAGS_PROT_INTERVAL_SIZE_LOG_MASK  0xF
+#define XFRQE_PROT_FLAGS_PROT_INTERVAL_SIZE_LOG_SHIFT 0
+#define XFRQE_PROT_FLAGS_DIF_TO_PEER_MASK             0x1
+#define XFRQE_PROT_FLAGS_DIF_TO_PEER_SHIFT            4
+#define XFRQE_PROT_FLAGS_HOST_INTERFACE_MASK          0x3
+#define XFRQE_PROT_FLAGS_HOST_INTERFACE_SHIFT         5
+#define XFRQE_PROT_FLAGS_RESERVED_MASK                0x1
+#define XFRQE_PROT_FLAGS_RESERVED_SHIFT               7
+};
+
+struct fcoe_db_data {
+	u8 params;
+#define FCOE_DB_DATA_DEST_MASK         0x3
+#define FCOE_DB_DATA_DEST_SHIFT        0
+#define FCOE_DB_DATA_AGG_CMD_MASK      0x3
+#define FCOE_DB_DATA_AGG_CMD_SHIFT     2
+#define FCOE_DB_DATA_BYPASS_EN_MASK    0x1
+#define FCOE_DB_DATA_BYPASS_EN_SHIFT   4
+#define FCOE_DB_DATA_RESERVED_MASK     0x1
+#define FCOE_DB_DATA_RESERVED_SHIFT    5
+#define FCOE_DB_DATA_AGG_VAL_SEL_MASK  0x3
+#define FCOE_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8 agg_flags;
+	__le16 sq_prod;
+};
+#endif /* __FCOE_COMMON__ */
diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h
new file mode 100644
index 000000000000..bd6bcb809415
--- /dev/null
+++ b/include/linux/qed/qed_fcoe_if.h
@@ -0,0 +1,145 @@
+#ifndef _QED_FCOE_IF_H
+#define _QED_FCOE_IF_H
+#include <linux/types.h>
+#include <linux/qed/qed_if.h>
+struct qed_fcoe_stats {
+	u64 fcoe_rx_byte_cnt;
+	u64 fcoe_rx_data_pkt_cnt;
+	u64 fcoe_rx_xfer_pkt_cnt;
+	u64 fcoe_rx_other_pkt_cnt;
+	u32 fcoe_silent_drop_pkt_cmdq_full_cnt;
+	u32 fcoe_silent_drop_pkt_rq_full_cnt;
+	u32 fcoe_silent_drop_pkt_crc_error_cnt;
+	u32 fcoe_silent_drop_pkt_task_invalid_cnt;
+	u32 fcoe_silent_drop_total_pkt_cnt;
+
+	u64 fcoe_tx_byte_cnt;
+	u64 fcoe_tx_data_pkt_cnt;
+	u64 fcoe_tx_xfer_pkt_cnt;
+	u64 fcoe_tx_other_pkt_cnt;
+};
+
+struct qed_dev_fcoe_info {
+	struct qed_dev_info common;
+
+	void __iomem *primary_dbq_rq_addr;
+	void __iomem *secondary_bdq_rq_addr;
+};
+
+struct qed_fcoe_params_offload {
+	dma_addr_t sq_pbl_addr;
+	dma_addr_t sq_curr_page_addr;
+	dma_addr_t sq_next_page_addr;
+
+	u8 src_mac[ETH_ALEN];
+	u8 dst_mac[ETH_ALEN];
+
+	u16 tx_max_fc_pay_len;
+	u16 e_d_tov_timer_val;
+	u16 rec_tov_timer_val;
+	u16 rx_max_fc_pay_len;
+	u16 vlan_tag;
+
+	struct fc_addr_nw s_id;
+	u8 max_conc_seqs_c3;
+	struct fc_addr_nw d_id;
+	u8 flags;
+	u8 def_q_idx;
+};
+
+#define MAX_TID_BLOCKS_FCOE (512)
+struct qed_fcoe_tid {
+	u32 size;		/* In bytes per task */
+	u32 num_tids_per_block;
+	u8 *blocks[MAX_TID_BLOCKS_FCOE];
+};
+
+struct qed_fcoe_cb_ops {
+	struct qed_common_cb_ops common;
+	 u32 (*get_login_failures)(void *cookie);
+};
+
+void qed_fcoe_set_pf_params(struct qed_dev *cdev,
+			    struct qed_fcoe_pf_params *params);
+
+/**
+ * struct qed_fcoe_ops - qed FCoE operations.
+ * @common:		common operations pointer
+ * @fill_dev_info:	fills FCoE specific information
+ *			@param cdev
+ *			@param info
+ *			@return 0 on sucesss, otherwise error value.
+ * @register_ops:	register FCoE operations
+ *			@param cdev
+ *			@param ops - specified using qed_iscsi_cb_ops
+ *			@param cookie - driver private
+ * @ll2:		light L2 operations pointer
+ * @start:		fcoe in FW
+ *			@param cdev
+ *			@param tasks - qed will fill information about tasks
+ *			return 0 on success, otherwise error value.
+ * @stop:		stops fcoe in FW
+ *			@param cdev
+ *			return 0 on success, otherwise error value.
+ * @acquire_conn:	acquire a new fcoe connection
+ *			@param cdev
+ *			@param handle - qed will fill handle that should be
+ *				used henceforth as identifier of the
+ *				connection.
+ *			@param p_doorbell - qed will fill the address of the
+ *				doorbell.
+ *			return 0 on sucesss, otherwise error value.
+ * @release_conn:	release a previously acquired fcoe connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			return 0 on success, otherwise error value.
+ * @offload_conn:	configures an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param conn_info - the configuration to use for the
+ *				offload.
+ *			return 0 on success, otherwise error value.
+ * @destroy_conn:	stops an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param terminate_params
+ *			return 0 on success, otherwise error value.
+ * @get_stats:		gets FCoE related statistics
+ *			@param cdev
+ *			@param stats - pointer to struck that would be filled
+ *				we stats
+ *			return 0 on success, error otherwise.
+ */
+struct qed_fcoe_ops {
+	const struct qed_common_ops *common;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_fcoe_info *info);
+
+	void (*register_ops)(struct qed_dev *cdev,
+			     struct qed_fcoe_cb_ops *ops, void *cookie);
+
+	const struct qed_ll2_ops *ll2;
+
+	int (*start)(struct qed_dev *cdev, struct qed_fcoe_tid *tasks);
+
+	int (*stop)(struct qed_dev *cdev);
+
+	int (*acquire_conn)(struct qed_dev *cdev,
+			    u32 *handle,
+			    u32 *fw_cid, void __iomem **p_doorbell);
+
+	int (*release_conn)(struct qed_dev *cdev, u32 handle);
+
+	int (*offload_conn)(struct qed_dev *cdev,
+			    u32 handle,
+			    struct qed_fcoe_params_offload *conn_info);
+	int (*destroy_conn)(struct qed_dev *cdev,
+			    u32 handle, dma_addr_t terminate_params);
+
+	int (*get_stats)(struct qed_dev *cdev, struct qed_fcoe_stats *stats);
+};
+
+const struct qed_fcoe_ops *qed_get_fcoe_ops(void);
+void qed_put_fcoe_ops(void);
+#endif
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index d1576a2bcfc9..fde56c436f71 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -59,7 +59,6 @@ enum dcbx_protocol_type {
 
 #define QED_ROCE_PROTOCOL_INDEX (3)
 
-#ifdef CONFIG_DCB
 #define QED_LLDP_CHASSIS_ID_STAT_LEN 4
 #define QED_LLDP_PORT_ID_STAT_LEN 4
 #define QED_DCBX_MAX_APP_PROTOCOL 32
@@ -155,7 +154,6 @@ struct qed_dcbx_get {
 	struct qed_dcbx_remote_params remote;
 	struct qed_dcbx_admin_params local;
 };
-#endif
 
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
@@ -182,6 +180,38 @@ struct qed_eth_pf_params {
 	u16 num_cons;
 };
 
+struct qed_fcoe_pf_params {
+	/* The following parameters are used during protocol-init */
+	u64 glbl_q_params_addr;
+	u64 bdq_pbl_base_addr[2];
+
+	/* The following parameters are used during HW-init
+	 * and these parameters need to be passed as arguments
+	 * to update_pf_params routine invoked before slowpath start
+	 */
+	u16 num_cons;
+	u16 num_tasks;
+
+	/* The following parameters are used during protocol-init */
+	u16 sq_num_pbl_pages;
+
+	u16 cq_num_entries;
+	u16 cmdq_num_entries;
+	u16 rq_buffer_log_size;
+	u16 mtu;
+	u16 dummy_icid;
+	u16 bdq_xoff_threshold[2];
+	u16 bdq_xon_threshold[2];
+	u16 rq_buffer_size;
+	u8 num_cqs;		/* num of global CQs */
+	u8 log_page_size;
+	u8 gl_rq_pi;
+	u8 gl_cmd_pi;
+	u8 debug_mode;
+	u8 is_target;
+	u8 bdq_pbl_num_entries[2];
+};
+
 /* Most of the the parameters below are described in the FW iSCSI / TCP HSI */
 struct qed_iscsi_pf_params {
 	u64 glbl_q_params_addr;
@@ -245,6 +275,7 @@ struct qed_rdma_pf_params {
 
 struct qed_pf_params {
 	struct qed_eth_pf_params eth_pf_params;
+	struct qed_fcoe_pf_params fcoe_pf_params;
 	struct qed_iscsi_pf_params iscsi_pf_params;
 	struct qed_rdma_pf_params rdma_pf_params;
 };
@@ -305,6 +336,7 @@ enum qed_sb_type {
 enum qed_protocol {
 	QED_PROTOCOL_ETH,
 	QED_PROTOCOL_ISCSI,
+	QED_PROTOCOL_FCOE,
 };
 
 enum qed_link_mode_bits {
@@ -391,6 +423,7 @@ struct qed_int_info {
 struct qed_common_cb_ops {
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
+	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 };
 
 struct qed_selftest_ops {
@@ -494,6 +527,10 @@ struct qed_common_ops {
 
 	void		(*simd_handler_clean)(struct qed_dev *cdev,
 					      int index);
+	int (*dbg_grc)(struct qed_dev *cdev,
+		       void *buffer, u32 *num_dumped_bytes);
+
+	int (*dbg_grc_size)(struct qed_dev *cdev);
 
 	int (*dbg_all_data) (struct qed_dev *cdev, void *buffer);
 
-- 
cgit v1.2.3


From bd4b9f8b4af7be15fd162276ec9a2a1d49f10270 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 17 Feb 2017 12:45:37 +0800
Subject: sctp: add support for generating stream reconf resp chunk

This patch is to define Re-configuration Response Parameter described
in rfc6525 section 4.4. As optional fields are only for SSN/TSN Reset
Request Parameter, it uses another function to make that.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index b055788de0cf..7a4804c4a593 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -749,4 +749,28 @@ struct sctp_strreset_addstrm {
 	__u16 reserved;
 };
 
+enum {
+	SCTP_STRRESET_NOTHING_TO_DO	= 0x00,
+	SCTP_STRRESET_PERFORMED		= 0x01,
+	SCTP_STRRESET_DENIED		= 0x02,
+	SCTP_STRRESET_ERR_WRONG_SSN	= 0x03,
+	SCTP_STRRESET_ERR_IN_PROGRESS	= 0x04,
+	SCTP_STRRESET_ERR_BAD_SEQNO	= 0x05,
+	SCTP_STRRESET_IN_PROGRESS	= 0x06,
+};
+
+struct sctp_strreset_resp {
+	sctp_paramhdr_t param_hdr;
+	__u32 response_seq;
+	__u32 result;
+};
+
+struct sctp_strreset_resptsn {
+	sctp_paramhdr_t param_hdr;
+	__u32 response_seq;
+	__u32 result;
+	__u32 senders_next_tsn;
+	__u32 receivers_next_tsn;
+};
+
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From d24cdcd3e40a6825135498e11c20c7976b9bf545 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Jan 2017 12:06:09 +0100
Subject: libceph: use BUG() instead of BUG_ON(1)

I ran into this compile warning, which is the result of BUG_ON(1)
not always leading to the compiler treating the code path as
unreachable:

    include/linux/ceph/osdmap.h: In function 'ceph_can_shift_osds':
    include/linux/ceph/osdmap.h:62:1: error: control reaches end of non-void function [-Werror=return-type]

Using BUG() here avoids the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 9a9041784dcf..412906609954 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
 	case CEPH_POOL_TYPE_EC:
 		return false;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 }
 
-- 
cgit v1.2.3


From 66a0e2d579dbec5c676cfe446234ffebb267c564 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 31 Jan 2017 15:55:06 +0100
Subject: crush: remove mutable part of CRUSH map

Then add it to the working state. It would be very nice if we didn't
have to take a lock to calculate a crush placement. By moving the
permutation array into the working data, we can treat the CRUSH map as
immutable.

Reflects ceph.git commit cbcd039651c0569551cb90d26ce27e1432671f2a.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h  |  1 +
 include/linux/crush/crush.h  | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/crush/mapper.h |  4 +++-
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 412906609954..cef1cab789b9 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -175,6 +175,7 @@ struct ceph_osdmap {
 
 	struct mutex crush_scratch_mutex;
 	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+	void *crush_workspace;
 };
 
 static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index be8f12b8f195..fbecbd089d75 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -135,13 +135,6 @@ struct crush_bucket {
 	__u32 size;      /* num items */
 	__s32 *items;
 
-	/*
-	 * cached random permutation: used for uniform bucket and for
-	 * the linear search fallback for the other bucket types.
-	 */
-	__u32 perm_x;  /* @x for which *perm is defined */
-	__u32 perm_n;  /* num elements of *perm that are permuted/defined */
-	__u32 *perm;
 };
 
 struct crush_bucket_uniform {
@@ -211,6 +204,21 @@ struct crush_map {
 	 * device fails. */
 	__u8 chooseleaf_stable;
 
+	/*
+	 * This value is calculated after decode or construction by
+	 * the builder. It is exposed here (rather than having a
+	 * 'build CRUSH working space' function) so that callers can
+	 * reserve a static buffer, allocate space on the stack, or
+	 * otherwise avoid calling into the heap allocator if they
+	 * want to. The size of the working space depends on the map,
+	 * while the size of the scratch vector passed to the mapper
+	 * depends on the size of the desired result set.
+	 *
+	 * Nothing stops the caller from allocating both in one swell
+	 * foop and passing in two points, though.
+	 */
+	size_t working_size;
+
 #ifndef __KERNEL__
 	/*
 	 * version 0 (original) of straw_calc has various flaws.  version 1
@@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i)
 	return ((i+1) << 1)-1;
 }
 
+/*
+ * These data structures are private to the CRUSH implementation. They
+ * are exposed in this header file because builder needs their
+ * definitions to calculate the total working size.
+ *
+ * Moving this out of the crush map allow us to treat the CRUSH map as
+ * immutable within the mapper and removes the requirement for a CRUSH
+ * map lock.
+ */
+struct crush_work_bucket {
+	__u32 perm_x; /* @x for which *perm is defined */
+	__u32 perm_n; /* num elements of *perm that are permuted/defined */
+	__u32 *perm;  /* Permutation of the bucket's items */
+};
+
+struct crush_work {
+	struct crush_work_bucket **work; /* Per-bucket working store */
+};
+
 #endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 5dfd5b1125d2..3303c7fd8a31 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -15,6 +15,8 @@ extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
 			 const __u32 *weights, int weight_max,
-			 int *scratch);
+			 void *cwin, int *scratch);
+
+void crush_init_workspace(const struct crush_map *map, void *v);
 
 #endif
-- 
cgit v1.2.3


From 743efcffffc6620ab44ea9ec67c7e4e28dfa7742 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 31 Jan 2017 15:55:06 +0100
Subject: crush: merge working data and scratch

Much like Arlo Guthrie, I decided that one big pile is better than two
little piles.

Reflects ceph.git commit 95c2df6c7e0b22d2ea9d91db500cf8b9441c73ba.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h  |  3 +--
 include/linux/crush/mapper.h | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index cef1cab789b9..8cebdc4158c3 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -173,8 +173,7 @@ struct ceph_osdmap {
 	 * the list of osds that store+replicate them. */
 	struct crush_map *crush;
 
-	struct mutex crush_scratch_mutex;
-	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+	struct mutex crush_workspace_mutex;
 	void *crush_workspace;
 };
 
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 3303c7fd8a31..c95e19e1ff11 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -15,7 +15,19 @@ extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
 			 const __u32 *weights, int weight_max,
-			 void *cwin, int *scratch);
+			 void *cwin);
+
+/*
+ * Returns the exact amount of workspace that will need to be used
+ * for a given combination of crush_map and result_max. The caller can
+ * then allocate this much on its own, either on the stack, in a
+ * per-thread long-lived buffer, or however it likes.
+ */
+static inline size_t crush_work_size(const struct crush_map *map,
+				     int result_max)
+{
+	return map->working_size + result_max * 3 * sizeof(__u32);
+}
 
 void crush_init_workspace(const struct crush_map *map, void *v);
 
-- 
cgit v1.2.3


From 083a51fbc57ca848ab087692f3cc97898fd88b54 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 9 Feb 2017 16:14:52 +0100
Subject: libceph: bump CEPH_PG_MAX_SIZE to 32

... to accommodate potentially very wide EC pools.  This increases the
size of a typical rbd ceph_osd_request by ~12% (from 1040 to 1168 bytes),
but I'd rather go future proof here.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/rados.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5c0da61cb763..5d0018782d50 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -50,7 +50,7 @@ struct ceph_timespec {
 #define CEPH_PG_LAYOUT_LINEAR 2
 #define CEPH_PG_LAYOUT_HYBRID 3
 
-#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
+#define CEPH_PG_MAX_SIZE      32  /* max # osds in a single pg */
 
 /*
  * placement group.
-- 
cgit v1.2.3


From 6c696d8560e74cd42458931375875d62ae88c6ae Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:23 +0100
Subject: rbd: kill obj_request->object_name and rbd_segment_name_cache

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 include/linux/ceph/osdmap.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 8cebdc4158c3..938656f70807 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest,
 		    const struct ceph_object_locator *src);
 void ceph_oloc_destroy(struct ceph_object_locator *oloc);
 
-/*
- * Maximum supported by kernel client object name length
- *
- * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
- */
-#define CEPH_MAX_OID_NAME_LEN 100
-
 /*
  * 51-char inline_name is long enough for all cephfs and all but one
  * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
-- 
cgit v1.2.3


From 42f82367df2cb2b71ceead6164e4415851c51fa4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 20 Feb 2017 13:51:07 +0100
Subject: video: fbdev: fsl-diu-fb: fix spelling mistake "palette"

trivial fix to spelling mistakes of "palette"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Timur Tabi <timur@tabi.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/fsl-diu-fb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
index a1e8277120c7..c46eab5bc893 100644
--- a/include/linux/fsl-diu-fb.h
+++ b/include/linux/fsl-diu-fb.h
@@ -73,7 +73,7 @@ struct diu_ad {
 	/* Word 0(32-bit) in DDR memory */
 /* 	__u16 comp; */
 /* 	__u16 pixel_s:2; */
-/* 	__u16 pallete:1; */
+/* 	__u16 palette:1; */
 /* 	__u16 red_c:2; */
 /* 	__u16 green_c:2; */
 /* 	__u16 blue_c:2; */
@@ -142,7 +142,7 @@ struct diu_ad {
 struct diu {
 	__be32 desc[3];
 	__be32 gamma;
-	__be32 pallete;
+	__be32 palette;
 	__be32 cursor;
 	__be32 curs_pos;
 	__be32 diu_mode;
-- 
cgit v1.2.3


From 25149ef9d25cafc4f4fe9f4461f18f876f397417 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Feb 2017 16:07:34 -0800
Subject: net: phy: Check phydev->drv

There are number of function calls, originating from user-space,
typically through the Ethernet driver that can make us crash by
dereferencing phydev->drv which will be NULL once we unbind the driver
from the PHY.

There are still functional issues that prevent an unbind then rebind to
work, but these will be addressed separately.

Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d9bdf53e0514..772476028a65 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -807,6 +807,9 @@ int phy_stop_interrupts(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
 {
+	if (!phydev->drv)
+		return -EIO;
+
 	return phydev->drv->read_status(phydev);
 }
 
-- 
cgit v1.2.3


From e71695307114335be1ed912f4a347396c2ed0e69 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 19 Feb 2017 07:17:17 +0200
Subject: ptr_ring: fix race conditions when resizing

Resizing currently drops consumer lock.  This can cause entries to be
reordered, which isn't good in itself.  More importantly, consumer can
detect a false ring empty condition and block forever.

Further, nesting of consumer within producer lock is problematic for
tun, since it produces entries in a BH, which causes a lock order
reversal:

       CPU0                    CPU1
       ----                    ----
  consume:
  lock(&(&r->consumer_lock)->rlock);
                               resize:
                               local_irq_disable();
                               lock(&(&r->producer_lock)->rlock);
                               lock(&(&r->consumer_lock)->rlock);
  <Interrupt>
  produce:
  lock(&(&r->producer_lock)->rlock);

To fix, nest producer lock within consumer lock during resize,
and keep consumer lock during the whole swap operation.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 2052011bf9fb..6c70444da3b9 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -111,6 +111,11 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
 	return 0;
 }
 
+/*
+ * Note: resize (below) nests producer lock within consumer lock, so if you
+ * consume in interrupt or BH context, you must disable interrupts/BH when
+ * calling this.
+ */
 static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
 {
 	int ret;
@@ -242,6 +247,11 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 	return ptr;
 }
 
+/*
+ * Note: resize (below) nests producer lock within consumer lock, so if you
+ * call this in interrupt or BH context, you must disable interrupts/BH when
+ * producing.
+ */
 static inline void *ptr_ring_consume(struct ptr_ring *r)
 {
 	void *ptr;
@@ -357,7 +367,7 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 	void **old;
 	void *ptr;
 
-	while ((ptr = ptr_ring_consume(r)))
+	while ((ptr = __ptr_ring_consume(r)))
 		if (producer < size)
 			queue[producer++] = ptr;
 		else if (destroy)
@@ -372,6 +382,12 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
 	return old;
 }
 
+/*
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
 static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 				  void (*destroy)(void *))
 {
@@ -382,17 +398,25 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	if (!queue)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&(r)->producer_lock, flags);
+	spin_lock_irqsave(&(r)->consumer_lock, flags);
+	spin_lock(&(r)->producer_lock);
 
 	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
 
-	spin_unlock_irqrestore(&(r)->producer_lock, flags);
+	spin_unlock(&(r)->producer_lock);
+	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
 
 	kfree(old);
 
 	return 0;
 }
 
+/*
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
 static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
 					   int size,
 					   gfp_t gfp, void (*destroy)(void *))
@@ -412,10 +436,12 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
 	}
 
 	for (i = 0; i < nrings; ++i) {
-		spin_lock_irqsave(&(rings[i])->producer_lock, flags);
+		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
+		spin_lock(&(rings[i])->producer_lock);
 		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
 						  size, gfp, destroy);
-		spin_unlock_irqrestore(&(rings[i])->producer_lock, flags);
+		spin_unlock(&(rings[i])->producer_lock);
+		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
 	}
 
 	for (i = 0; i < nrings; ++i)
-- 
cgit v1.2.3


From bb7462b6fd64e40809a857223bf7f0e628969f87 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: vfs: use helpers for calling f_op->{read,write}_iter()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78c81e6f5d76..a3145cdff8f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1715,6 +1715,18 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
+				     struct iov_iter *iter)
+{
+	return file->f_op->read_iter(kio, iter);
+}
+
+static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
+				      struct iov_iter *iter)
+{
+	return file->f_op->write_iter(kio, iter);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.3


From f74ac01520c9f6d89bbc3c6931a72f757b742f86 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: mm: use helper for calling f_op->mmap()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3145cdff8f2..93691b59e476 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,6 +1727,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return file->f_op->mmap(file, vma);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.3


From 0eb8af4916a540c362a2950e5ab54eca32eb7d58 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: vfs: use helper for calling f_op->fsync()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 93691b59e476..72a33007ec24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1732,6 +1732,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
+static inline int call_fsync(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	return file->f_op->fsync(file, start, end, datasync);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.3


From 3498d8694d41af701c51289e7caf3ffefc7bfb6b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 21 Feb 2017 14:19:45 +0100
Subject: gpio: reintroduce devm_get_gpiod_from_child()

We need to keep this API around for the merge window to avoid
nasty build problems in the merges.

Cc: Lee Jones <lee.jones@linaro.org>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index ea9b01d40017..2484b2fcc6eb 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,6 +143,16 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 						struct fwnode_handle *child,
 						enum gpiod_flags flags,
 						const char *label);
+/* FIXME: delete this helper when users are switched over */
+static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
+			  const char *con_id, struct fwnode_handle *child)
+{
+	return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
+						      0, child,
+						      GPIOD_ASIS,
+						      "?");
+}
+
 #else /* CONFIG_GPIOLIB */
 
 static inline int gpiod_count(struct device *dev, const char *con_id)
@@ -434,6 +444,13 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
+/* FIXME: delete this when all users are switched over */
+static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
+			  const char *con_id, struct fwnode_handle *child)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 #endif /* CONFIG_GPIOLIB */
 
 static inline
-- 
cgit v1.2.3


From 9d876e79df6a2f364b9f2737eacd72ceb27da53a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 21 Feb 2017 16:09:34 +0100
Subject: bpf: fix unlocking of jited image when module ronx not set

Eric and Willem reported that they recently saw random crashes when
JIT was in use and bisected this to 74451e66d516 ("bpf: make jited
programs visible in traces"). Issue was that the consolidation part
added bpf_jit_binary_unlock_ro() that would unlock previously made
read-only memory back to read-write. However, DEBUG_SET_MODULE_RONX
cannot be used for this to test for presence of set_memory_*()
functions. We need to use ARCH_HAS_SET_MEMORY instead to fix this;
also add the corresponding bpf_jit_binary_lock_ro() to filter.h.

Fixes: 74451e66d516 ("bpf: make jited programs visible in traces")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: Willem de Bruijn <willemb@google.com>
Bisected-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c1cc9143cb2..0c167fdee5f7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -551,7 +551,7 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
-#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
 	set_memory_ro((unsigned long)fp, fp->pages);
@@ -562,6 +562,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 	set_memory_rw((unsigned long)fp, fp->pages);
 }
 
+static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
+{
+	set_memory_ro((unsigned long)hdr, hdr->pages);
+}
+
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
 	set_memory_rw((unsigned long)hdr, hdr->pages);
@@ -575,10 +580,14 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
 }
 
+static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
+{
+}
+
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
 }
-#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
 
 static inline struct bpf_binary_header *
 bpf_jit_binary_hdr(const struct bpf_prog *fp)
-- 
cgit v1.2.3


From 0500ce589aa7b5325af161d3c992ffb6be138ff9 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Wed, 15 Feb 2017 09:35:27 -0700
Subject: rtc: m48t86: remove unused platform_data

All users of this driver have been updated to allow the driver to
manage it's own resources and do the read/write operations internally.
The m48t86_ops are no longer used.

Remove the platform_data header and the support code in the driver.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 include/linux/platform_data/rtc-m48t86.h | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 include/linux/platform_data/rtc-m48t86.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/rtc-m48t86.h b/include/linux/platform_data/rtc-m48t86.h
deleted file mode 100644
index 915d6b4f0f89..000000000000
--- a/include/linux/platform_data/rtc-m48t86.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * ST M48T86 / Dallas DS12887 RTC driver
- * Copyright (c) 2006 Tower Technologies
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-struct m48t86_ops
-{
-	void (*writebyte)(unsigned char value, unsigned long addr);
-	unsigned char (*readbyte)(unsigned long addr);
-};
-- 
cgit v1.2.3


From 0ae060ca2bdfe11181094699b0f76437ec210b17 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:25 -0500
Subject: SUNRPC: Add generic helpers for xdr_stream encode/decode

Add some generic helpers for encoding/decoding opaque structures and
basic u32/u64.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h | 177 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 56c48c884a24..dc7e51a769ac 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,6 +242,183 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 
+/**
+ * xdr_align_size - Calculate padded size of an object
+ * @n: Size of an object being XDR encoded (in bytes)
+ *
+ * Return value:
+ *   Size (in bytes) of the object including xdr padding
+ */
+static inline size_t
+xdr_align_size(size_t n)
+{
+	const size_t mask = sizeof(__u32) - 1;
+
+	return (n + mask) & ~mask;
+}
+
+/**
+ * xdr_stream_encode_u32 - Encode a 32-bit integer
+ * @xdr: pointer to xdr_stream
+ * @n: integer to encode
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
+{
+	const size_t len = sizeof(n);
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	*p = cpu_to_be32(n);
+	return len;
+}
+
+/**
+ * xdr_stream_encode_u64 - Encode a 64-bit integer
+ * @xdr: pointer to xdr_stream
+ * @n: 64-bit integer to encode
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
+{
+	const size_t len = sizeof(n);
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_hyper(p, n);
+	return len;
+}
+
+/**
+ * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to opaque data object
+ * @len: size of object pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t len)
+{
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_opaque_fixed(p, ptr, len);
+	return xdr_align_size(len);
+}
+
+/**
+ * xdr_stream_encode_opaque - Encode variable length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to opaque data object
+ * @len: size of object pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
+{
+	size_t count = sizeof(__u32) + xdr_align_size(len);
+	__be32 *p = xdr_reserve_space(xdr, count);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_opaque(p, ptr, len);
+	return count;
+}
+
+/**
+ * xdr_stream_decode_u32 - Decode a 32-bit integer
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store integer
+ *
+ * Return values:
+ *   %0 on success
+ *   %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
+{
+	const size_t count = sizeof(*ptr);
+	__be32 *p = xdr_inline_decode(xdr, count);
+
+	if (unlikely(!p))
+		return -EBADMSG;
+	*ptr = be32_to_cpup(p);
+	return 0;
+}
+
+/**
+ * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store data
+ * @len: size of buffer pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns size of object stored in @ptr
+ *   %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
+{
+	__be32 *p = xdr_inline_decode(xdr, len);
+
+	if (unlikely(!p))
+		return -EBADMSG;
+	xdr_decode_opaque_fixed(p, ptr, len);
+	return len;
+}
+
+/**
+ * xdr_stream_decode_opaque_inline - Decode variable length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ *
+ * Note: the pointer stored in @ptr cannot be assumed valid after the XDR
+ * buffer has been destroyed, or even after calling xdr_inline_decode()
+ * on @xdr. It is therefore expected that the object it points to should
+ * be processed immediately.
+ *
+ * Return values:
+ *   On success, returns size of object stored in *@ptr
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE if the size of the object would exceed @maxlen
+ */
+static inline ssize_t
+xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxlen)
+{
+	__be32 *p;
+	__u32 len;
+
+	*ptr = NULL;
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
+		return -EBADMSG;
+	if (len != 0) {
+		p = xdr_inline_decode(xdr, len);
+		if (unlikely(!p))
+			return -EBADMSG;
+		if (unlikely(len > maxlen))
+			return -EMSGSIZE;
+		*ptr = p;
+	}
+	return len;
+}
 #endif /* __KERNEL__ */
 
 #endif /* _SUNRPC_XDR_H_ */
-- 
cgit v1.2.3


From 5c741d4f2215371ae17e3dbdf3d46da850662e13 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:31 -0500
Subject: SUNRPC: Add a helper function xdr_stream_decode_string_dup()

Create a helper function that decodes a xdr string object, allocates a memory
buffer and then store it as a NUL terminated string.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index dc7e51a769ac..054c8cde18f3 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,6 +242,8 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 
+ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
+		size_t maxlen, gfp_t gfp_flags);
 /**
  * xdr_align_size - Calculate padded size of an object
  * @n: Size of an object being XDR encoded (in bytes)
-- 
cgit v1.2.3


From 986994a27587efd8ce4c595cab89b570f7475359 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Thu, 26 Jan 2017 17:17:28 +0200
Subject: nvme: Use CNS as 8-bit field and avoid endianness conversion

This patch defines CNS field as 8-bit field and avoids cpu_to/from_le
conversions.
Also initialize nvme_command cns value explicitly to NVME_ID_CNS_NS
for readability (don't rely on the fact that NVME_ID_CNS_NS = 0).

Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0b676a02cf3e..5b32521456d6 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -644,7 +644,9 @@ struct nvme_identify {
 	__le32			nsid;
 	__u64			rsvd2[2];
 	union nvme_data_ptr	dptr;
-	__le32			cns;
+	__u8			cns;
+	__u8			rsvd3;
+	__le16			ctrlid;
 	__u32			rsvd11[5];
 };
 
-- 
cgit v1.2.3


From c5552fde102fcc3f2cf9e502b8ac90e3500d8fdf Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 7 Feb 2017 10:08:45 -0800
Subject: nvme: Enable autonomous power state transitions

NVMe devices can advertise multiple power states.  These states can
be either "operational" (the device is fully functional but possibly
slow) or "non-operational" (the device is asleep until woken up).
Some devices can automatically enter a non-operational state when
idle for a specified amount of time and then automatically wake back
up when needed.

The hardware configuration is a table.  For each state, an entry in
the table indicates the next deeper non-operational state, if any,
to autonomously transition to and the idle time required before
transitioning.

This patch teaches the driver to program APST so that each successive
non-operational state will be entered after an idle time equal to 100%
of the total latency (entry plus exit) associated with that state.
The maximum acceptable latency is controlled using dev_pm_qos
(e.g. power/pm_qos_latency_tolerance_us in sysfs); non-operational
states with total latency greater than this value will not be used.
As a special case, setting the latency tolerance to 0 will disable
APST entirely.  On hardware without APST support, the sysfs file will
not be exposed.

The latency tolerance for newly-probed devices is set by the module
parameter nvme_core.default_ps_max_latency_us.

In theory, the device can expose "default" APST table, but this
doesn't seem to function correctly on my device (Samsung 950), nor
does it seem particularly useful.  There is also an optional
mechanism by which a configuration can be "saved" so it will be
automatically loaded on reset.  This can be configured from
userspace, but it doesn't seem useful to support in the driver.

On my laptop, enabling APST seems to save nearly 1W.

The hardware tables can be decoded in userspace with nvme-cli.
'nvme id-ctrl /dev/nvmeN' will show the power state table and
'nvme get-feature -f 0x0c -H /dev/nvme0' will show the current APST
configuration.

This feature is quirked off on a known-buggy Samsung device.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5b32521456d6..c43d435d4225 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd {
 	__le16			appmask;
 };
 
+/* Features */
+
+struct nvme_feat_auto_pst {
+	__le64 entries[32];
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
-- 
cgit v1.2.3


From 3ee80c3d15b61e61611903033df414a0590cfde9 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 20 Feb 2017 13:44:28 +0200
Subject: nvme-rdma: move nvme cm status helper to .h file

This will enable the usage for nvme rdma target.
Also move from a lookup array to a switch statement.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme-rdma.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index bf240a3cbf99..a72fd04aa5e1 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -29,6 +29,30 @@ enum nvme_rdma_cm_status {
 	NVME_RDMA_CM_INVALID_ORD	= 0x08,
 };
 
+static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+	switch (status) {
+	case NVME_RDMA_CM_INVALID_LEN:
+		return "invalid length";
+	case NVME_RDMA_CM_INVALID_RECFMT:
+		return "invalid record format";
+	case NVME_RDMA_CM_INVALID_QID:
+		return "invalid queue ID";
+	case NVME_RDMA_CM_INVALID_HSQSIZE:
+		return "invalid host SQ size";
+	case NVME_RDMA_CM_INVALID_HRQSIZE:
+		return "invalid host RQ size";
+	case NVME_RDMA_CM_NO_RSC:
+		return "resource not found";
+	case NVME_RDMA_CM_INVALID_IRD:
+		return "invalid IRD";
+	case NVME_RDMA_CM_INVALID_ORD:
+		return "Invalid ORD";
+	default:
+		return "unrecognized reason";
+	}
+}
+
 /**
  * struct nvme_rdma_cm_req - rdma connect request
  *
-- 
cgit v1.2.3


From d3213e8fd4b0f18dfd438268ff480406ba743abb Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Wed, 22 Feb 2017 15:39:47 -0800
Subject: tracing: add __print_flags_u64()

Patch series "DAX tracepoints, mm argument simplification", v4.

This contains both my DAX tracepoint code and Dave Jiang's MM argument
simplifications.  Dave's code was written with my tracepoint code as a
baseline, so it seemed simplest to keep them together in a single series.

This patch (of 7):

Add __print_flags_u64() and the helper trace_print_flags_seq_u64() in the
same spirit as __print_symbolic_u64() and trace_print_symbols_seq_u64().
These functions allow us to print symbols associated with flags that are
64 bits wide even on 32 bit machines.

These will be used by the DAX code so that we can print the flags set in a
pfn_t such as PFN_SG_CHAIN, PFN_SG_LAST, PFN_DEV and PFN_MAP.

Without this new function I was getting errors like the following when
compiling for i386:

  include/linux/pfn_t.h:13:22: warning: large integer implicitly truncated to unsigned type [-Woverflow]
   #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
    ^

Link: http://lkml.kernel.org/r/1484085142-2297-2-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/trace_events.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0f165507495c..0af63c4381b9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -23,6 +23,10 @@ const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 				    const struct trace_print_flags *symbol_array);
 
 #if BITS_PER_LONG == 32
+const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
+		      unsigned long long flags,
+		      const struct trace_print_flags_u64 *flag_array);
+
 const char *trace_print_symbols_seq_u64(struct trace_seq *p,
 					unsigned long long val,
 					const struct trace_print_flags_u64
-- 
cgit v1.2.3


From 282a8e0391c377b64c7d065b18fc2f6267a24dad Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Wed, 22 Feb 2017 15:39:50 -0800
Subject: dax: add tracepoint infrastructure, PMD tracing

Tracepoints are the standard way to capture debugging and tracing
information in many parts of the kernel, including the XFS and ext4
filesystems.  Create a tracepoint header for FS DAX and add the first DAX
tracepoints to the PMD fault handler.  This allows the tracing for DAX to
be done in the same way as the filesystem tracing so that developers can
look at them together and get a coherent idea of what the system is doing.

I added both an entry and exit tracepoint because future patches will add
tracepoints to child functions of dax_iomap_pmd_fault() like
dax_pmd_load_hole() and dax_pmd_insert_mapping().  We want those messages
to be wrapped by the parent function tracepoints so the code flow is more
easily understood.  Having entry and exit tracepoints for faults also
allows us to easily see what filesystems functions were called during the
fault.  These filesystem functions get executed via iomap_begin() and
iomap_end() calls, for example, and will have their own tracepoints.

For PMD faults we primarily want to understand the type of mapping, the
fault flags, the faulting address and whether it fell back to 4k faults.
If it fell back to 4k faults the tracepoints should let us understand why.

I named the new tracepoint header file "fs_dax.h" to allow for device DAX
to have its own separate tracing header in the same directory at some
point.

Here is an example output for these events from a successful PMD fault:

  big-1441  [005] ....    32.582758: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003

  big-1441  [005] ....    32.582776: dax_pmd_fault: dev 259:0 ino 0x1003
  shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400

  big-1441  [005] ....    32.583292: dax_pmd_fault_done: dev 259:0 ino 0x1003
  shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE

Link: http://lkml.kernel.org/r/1484085142-2297-3-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6ff66d6fe8e2..d22f7837ad90 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,17 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100	/* The fault was during an instruction fetch */
 
+#define FAULT_FLAG_TRACE \
+	{ FAULT_FLAG_WRITE,		"WRITE" }, \
+	{ FAULT_FLAG_MKWRITE,		"MKWRITE" }, \
+	{ FAULT_FLAG_ALLOW_RETRY,	"ALLOW_RETRY" }, \
+	{ FAULT_FLAG_RETRY_NOWAIT,	"RETRY_NOWAIT" }, \
+	{ FAULT_FLAG_KILLABLE,		"KILLABLE" }, \
+	{ FAULT_FLAG_TRIED,		"TRIED" }, \
+	{ FAULT_FLAG_USER,		"USER" }, \
+	{ FAULT_FLAG_REMOTE,		"REMOTE" }, \
+	{ FAULT_FLAG_INSTRUCTION,	"INSTRUCTION" }
+
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
@@ -1111,6 +1122,20 @@ static inline void clear_page_pfmemalloc(struct page *page)
 			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
 			 VM_FAULT_FALLBACK)
 
+#define VM_FAULT_RESULT_TRACE \
+	{ VM_FAULT_OOM,			"OOM" }, \
+	{ VM_FAULT_SIGBUS,		"SIGBUS" }, \
+	{ VM_FAULT_MAJOR,		"MAJOR" }, \
+	{ VM_FAULT_WRITE,		"WRITE" }, \
+	{ VM_FAULT_HWPOISON,		"HWPOISON" }, \
+	{ VM_FAULT_HWPOISON_LARGE,	"HWPOISON_LARGE" }, \
+	{ VM_FAULT_SIGSEGV,		"SIGSEGV" }, \
+	{ VM_FAULT_NOPAGE,		"NOPAGE" }, \
+	{ VM_FAULT_LOCKED,		"LOCKED" }, \
+	{ VM_FAULT_RETRY,		"RETRY" }, \
+	{ VM_FAULT_FALLBACK,		"FALLBACK" }, \
+	{ VM_FAULT_DONE_COW,		"DONE_COW" }
+
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
 #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
-- 
cgit v1.2.3


From 27a7ffaccd915675c88045ba83493804a0267ab7 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Wed, 22 Feb 2017 15:40:00 -0800
Subject: dax: add tracepoints to dax_pmd_insert_mapping()

Add tracepoints to dax_pmd_insert_mapping(), following the same logging
conventions as the tracepoints in dax_iomap_pmd_fault().

Here is an example PMD fault showing the new tracepoints:

big-1504  [001] ....   326.960743: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003

big-1504  [001] ....   326.960753: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400

big-1504  [001] ....   326.960981: dax_pmd_insert_mapping: dev 259:0 ino 0x1003 shared write address 0x10505000 length 0x200000 pfn 0x100600 DEV|MAP radix_entry 0xc000e

big-1504  [001] ....   326.960986: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE

Link: http://lkml.kernel.org/r/1484085142-2297-6-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pfn_t.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index a3d90b9da18d..033fc7bbcefa 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -15,6 +15,12 @@
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
 #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
 
+#define PFN_FLAGS_TRACE \
+	{ PFN_SG_CHAIN,	"SG_CHAIN" }, \
+	{ PFN_SG_LAST,	"SG_LAST" }, \
+	{ PFN_DEV,	"DEV" }, \
+	{ PFN_MAP,	"MAP" }
+
 static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
 {
 	pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
-- 
cgit v1.2.3


From d8a849e1bc123790bbbf1facba94452a3aef5736 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 22 Feb 2017 15:40:03 -0800
Subject: mm, dax: make pmd_fault() and friends be the same as fault()

Instead of passing in multiple parameters in the pmd_fault() handler,
a vmf can be passed in just like a fault() handler. This will simplify
code and remove the need for the actual pmd fault handlers to allocate a
vmf. Related functions are also modified to do the same.

[dave.jiang@intel.com: fix issue with xfs_tests stall when DAX option is off]
  Link: http://lkml.kernel.org/r/148469861071.195597.3619476895250028518.stgit@djiang5-desk3.ch.intel.com
Link: http://lkml.kernel.org/r/1484085142-2297-7-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h | 7 +++----
 include/linux/mm.h  | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 24ad71173995..a829fee2b42b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -71,16 +71,15 @@ static inline unsigned int dax_radix_order(void *entry)
 		return PMD_SHIFT - PAGE_SHIFT;
 	return 0;
 }
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
-		pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
+int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+		struct iomap_ops *ops);
 #else
 static inline unsigned int dax_radix_order(void *entry)
 {
 	return 0;
 }
 static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
-		unsigned long address, pmd_t *pmd, unsigned int flags,
-		struct iomap_ops *ops)
+		struct vm_fault *vmf, struct iomap_ops *ops)
 {
 	return VM_FAULT_FALLBACK;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d22f7837ad90..0961e95e904e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -351,8 +351,7 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
-	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
-						pmd_t *, unsigned int flags);
+	int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
 	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
-- 
cgit v1.2.3


From f42003917b4569a2f4f0c79c35e1e3df2859f81a Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 22 Feb 2017 15:40:06 -0800
Subject: mm, dax: change pmd_fault() to take only vmf parameter

pmd_fault() and related functions really only need the vmf parameter since
the additional parameters are all included in the vmf struct.  Remove the
additional parameter and simplify pmd_fault() and friends.

Link: http://lkml.kernel.org/r/1484085142-2297-8-git-send-email-ross.zwisler@linux.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h | 7 +++----
 include/linux/mm.h  | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index a829fee2b42b..c1bd6ab5e974 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -71,15 +71,14 @@ static inline unsigned int dax_radix_order(void *entry)
 		return PMD_SHIFT - PAGE_SHIFT;
 	return 0;
 }
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-		struct iomap_ops *ops);
+int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops);
 #else
 static inline unsigned int dax_radix_order(void *entry)
 {
 	return 0;
 }
-static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
-		struct vm_fault *vmf, struct iomap_ops *ops)
+static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
+		struct iomap_ops *ops)
 {
 	return VM_FAULT_FALLBACK;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0961e95e904e..3787f047a098 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -351,7 +351,7 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
-	int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*pmd_fault)(struct vm_fault *vmf);
 	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
-- 
cgit v1.2.3


From bf5eb3de3847ebcfd1fea7bc14072ef9f21d4e8d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:11 -0800
Subject: slub: separate out sysfs_slab_release() from sysfs_slab_remove()

Separate out slub sysfs removal and release, and call the former earlier
from __kmem_cache_shutdown().  There's no reason to defer sysfs removal
through RCU and this will later allow us to remove sysfs files way
earlier during memory cgroup offline instead of release.

Link: http://lkml.kernel.org/r/20170117235411.9408-3-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 75f56c2ef2d4..07ef550c6627 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,9 +113,9 @@ struct kmem_cache {
 
 #ifdef CONFIG_SYSFS
 #define SLAB_SUPPORTS_SYSFS
-void sysfs_slab_remove(struct kmem_cache *);
+void sysfs_slab_release(struct kmem_cache *);
 #else
-static inline void sysfs_slab_remove(struct kmem_cache *s)
+static inline void sysfs_slab_release(struct kmem_cache *s)
 {
 }
 #endif
-- 
cgit v1.2.3


From 9eeadc8b6e0e31f9aea1f8886ef472f62c2b7f55 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:17 -0800
Subject: slab: reorganize memcg_cache_params

We're going to change how memcg caches are iterated.  In preparation,
clean up and reorganize memcg_cache_params.

* The shared ->list is replaced by ->children in root and
  ->children_node in children.

* ->is_root_cache is removed.  Instead ->root_cache is moved out of
  the child union and now used by both root and children.  NULL
  indicates root cache.  Non-NULL a memcg one.

This patch doesn't cause any observable behavior changes.

Link: http://lkml.kernel.org/r/20170117235411.9408-5-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4c5363566815..1f611ba00f1d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -545,22 +545,37 @@ struct memcg_cache_array {
  * array to be accessed without taking any locks, on relocation we free the old
  * version only after a grace period.
  *
- * Child caches will hold extra metadata needed for its operation. Fields are:
+ * Root and child caches hold different metadata.
  *
- * @memcg: pointer to the memcg this cache belongs to
- * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @root_cache:	Common to root and child caches.  NULL for root, pointer to
+ *		the root cache for children.
  *
- * Both root and child caches of the same kind are linked into a list chained
- * through @list.
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
+ *		used to index child cachces during allocation and cleared
+ *		early during shutdown.
+ *
+ * @children:	List of all child caches.  While the child caches are also
+ *		reachable through @memcg_caches, a child cache remains on
+ *		this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg:	Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
  */
 struct memcg_cache_params {
-	bool is_root_cache;
-	struct list_head list;
+	struct kmem_cache *root_cache;
 	union {
-		struct memcg_cache_array __rcu *memcg_caches;
+		struct {
+			struct memcg_cache_array __rcu *memcg_caches;
+			struct list_head children;
+		};
 		struct {
 			struct mem_cgroup *memcg;
-			struct kmem_cache *root_cache;
+			struct list_head children_node;
 		};
 	};
 };
-- 
cgit v1.2.3


From bc2791f857e1984b7548d2a2de2ffb1a913dee62 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:21 -0800
Subject: slab: link memcg kmem_caches on their associated memory cgroup

With kmem cgroup support enabled, kmem_caches can be created and
destroyed frequently and a great number of near empty kmem_caches can
accumulate if there are a lot of transient cgroups and the system is not
under memory pressure.  When memory reclaim starts under such
conditions, it can lead to consecutive deactivation and destruction of
many kmem_caches, easily hundreds of thousands on moderately large
systems, exposing scalability issues in the current slab management
code.  This is one of the patches to address the issue.

While a memcg kmem_cache is listed on its root cache's ->children list,
there is no direct way to iterate all kmem_caches which are assocaited
with a memory cgroup.  The only way to iterate them is walking all
caches while filtering out caches which don't match, which would be most
of them.

This makes memcg destruction operations O(N^2) where N is the total
number of slab caches which can be huge.  This combined with the
synchronous RCU operations can tie up a CPU and affect the whole machine
for many hours when memory reclaim triggers offlining and destruction of
the stale memcgs.

This patch adds mem_cgroup->kmem_caches list which goes through
memcg_cache_params->kmem_caches_node of all kmem_caches which are
associated with the memcg.  All memcg specific iterations, including
stat file access, are updated to use the new list instead.

Link: http://lkml.kernel.org/r/20170117235411.9408-6-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jay Vana <jsvana@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 +
 include/linux/slab.h       | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..9fcece9be85d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -253,6 +253,7 @@ struct mem_cgroup {
         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
 	enum memcg_kmem_state kmem_state;
+	struct list_head kmem_caches;
 #endif
 
 	int last_scanned_node;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1f611ba00f1d..a0cc7a77cda2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -565,6 +565,8 @@ struct memcg_cache_array {
  * @memcg:	Pointer to the memcg this cache belongs to.
  *
  * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
  */
 struct memcg_cache_params {
 	struct kmem_cache *root_cache;
@@ -576,6 +578,7 @@ struct memcg_cache_params {
 		struct {
 			struct mem_cgroup *memcg;
 			struct list_head children_node;
+			struct list_head kmem_caches_node;
 		};
 	};
 };
-- 
cgit v1.2.3


From 510ded33e075c2bd662b1efab0110f4240325fc9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:24 -0800
Subject: slab: implement slab_root_caches list

With kmem cgroup support enabled, kmem_caches can be created and
destroyed frequently and a great number of near empty kmem_caches can
accumulate if there are a lot of transient cgroups and the system is not
under memory pressure.  When memory reclaim starts under such
conditions, it can lead to consecutive deactivation and destruction of
many kmem_caches, easily hundreds of thousands on moderately large
systems, exposing scalability issues in the current slab management
code.  This is one of the patches to address the issue.

slab_caches currently lists all caches including root and memcg ones.
This is the only data structure which lists the root caches and
iterating root caches can only be done by walking the list while
skipping over memcg caches.  As there can be a huge number of memcg
caches, this can become very expensive.

This also can make /proc/slabinfo behave very badly.  seq_file processes
reads in 4k chunks and seeks to the previous Nth position on slab_caches
list to resume after each chunk.  With a lot of memcg cache churns on
the list, reading /proc/slabinfo can become very slow and its content
often ends up with duplicate and/or missing entries.

This patch adds a new list slab_root_caches which lists only the root
caches.  When memcg is not enabled, it becomes just an alias of
slab_caches.  memcg specific list operations are collected into
memcg_[un]link_cache().

Link: http://lkml.kernel.org/r/20170117235411.9408-7-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jay Vana <jsvana@fb.com>
Acked-by: Vladimir Davydov <vdavydov@tarantool.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index a0cc7a77cda2..af1a5bef80f4 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -556,6 +556,8 @@ struct memcg_cache_array {
  *		used to index child cachces during allocation and cleared
  *		early during shutdown.
  *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
  * @children:	List of all child caches.  While the child caches are also
  *		reachable through @memcg_caches, a child cache remains on
  *		this list until it is actually destroyed.
@@ -573,6 +575,7 @@ struct memcg_cache_params {
 	union {
 		struct {
 			struct memcg_cache_array __rcu *memcg_caches;
+			struct list_head __root_caches_node;
 			struct list_head children;
 		};
 		struct {
-- 
cgit v1.2.3


From 01fb58bcba63f8fba37581c24c99e9a515dd0335 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:30 -0800
Subject: slab: remove synchronous synchronize_sched() from memcg cache
 deactivation path

With kmem cgroup support enabled, kmem_caches can be created and
destroyed frequently and a great number of near empty kmem_caches can
accumulate if there are a lot of transient cgroups and the system is not
under memory pressure.  When memory reclaim starts under such
conditions, it can lead to consecutive deactivation and destruction of
many kmem_caches, easily hundreds of thousands on moderately large
systems, exposing scalability issues in the current slab management
code.  This is one of the patches to address the issue.

slub uses synchronize_sched() to deactivate a memcg cache.
synchronize_sched() is an expensive and slow operation and doesn't scale
when a huge number of caches are destroyed back-to-back.  While there
used to be a simple batching mechanism, the batching was too restricted
to be helpful.

This patch implements slab_deactivate_memcg_cache_rcu_sched() which slub
can use to schedule sched RCU callback instead of performing
synchronize_sched() synchronously while holding cgroup_mutex.  While
this adds online cpus, mems and slab_mutex operations, operating on
these locks back-to-back from the same kworker, which is what's gonna
happen when there are many to deactivate, isn't expensive at all and
this gets rid of the scalability problem completely.

Link: http://lkml.kernel.org/r/20170117235411.9408-9-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jay Vana <jsvana@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index af1a5bef80f4..3c37a8c51921 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -582,6 +582,12 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head children_node;
 			struct list_head kmem_caches_node;
+
+			void (*deact_fn)(struct kmem_cache *);
+			union {
+				struct rcu_head deact_rcu_head;
+				struct work_struct deact_work;
+			};
 		};
 	};
 };
-- 
cgit v1.2.3


From 17cc4dfeda97636d67e83de8cd41940b65a93bc7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2017 15:41:36 -0800
Subject: slab: use memcg_kmem_cache_wq for slab destruction operations

If there's contention on slab_mutex, queueing the per-cache destruction
work item on the system_wq can unnecessarily create and tie up a lot of
kworkers.

Rename memcg_kmem_cache_create_wq to memcg_kmem_cache_wq and make it
global and use that workqueue for the destruction work items too.  While
at it, convert the workqueue from an unbound workqueue to a per-cpu one
with concurrency limited to 1.  It's generally preferable to use per-cpu
workqueues and concurrency limit of 1 is safe enough.

This is suggested by Joonsoo Kim.

Link: http://lkml.kernel.org/r/20170117235411.9408-11-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jay Vana <jsvana@fb.com>
Acked-by: Vladimir Davydov <vdavydov@tarantool.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9fcece9be85d..5af377303880 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -830,6 +830,7 @@ void memcg_kmem_uncharge(struct page *page, int order);
 
 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
 extern struct static_key_false memcg_kmem_enabled_key;
+extern struct workqueue_struct *memcg_kmem_cache_wq;
 
 extern int memcg_nr_cache_ids;
 void memcg_get_cache_ids(void);
-- 
cgit v1.2.3


From 893e26e61d04eac974ded0c11e1647b335c8cb7b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 22 Feb 2017 15:42:27 -0800
Subject: userfaultfd: non-cooperative: Add fork() event

When the mm with uffd-ed vmas fork()-s the respective vmas notify their
uffds with the event which contains a descriptor with new uffd.  This
new descriptor can then be used to get events from the child and
populate its mm with data.  Note, that there can be different uffd-s
controlling different vmas within one mm, so first we should collect all
those uffds (and ctx-s) in a list and then notify them all one by one
but only once per fork().

The context is created at fork() time but the descriptor, file struct
and anon inode object is created at event read time.  So some trickery
is added to the userfaultfd_ctx_read() to handle the ctx queues' locking
vs file creation.

Another thing worth noticing is that the task that fork()-s waits for
the uffd event to get processed WITHOUT the mmap sem.

[aarcange@redhat.com: build warning fix]
  Link: http://lkml.kernel.org/r/20161216144821.5183-10-aarcange@redhat.com
Link: http://lkml.kernel.org/r/20161216144821.5183-9-aarcange@redhat.com
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 11b92b047a1e..79002bca1f43 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -52,6 +52,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 	return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
 }
 
+extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
+extern void dup_userfaultfd_complete(struct list_head *);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -76,6 +79,16 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 	return false;
 }
 
+static inline int dup_userfaultfd(struct vm_area_struct *vma,
+				  struct list_head *l)
+{
+	return 0;
+}
+
+static inline void dup_userfaultfd_complete(struct list_head *l)
+{
+}
+
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From 72f87654c69690ff4721bd9b4a39983f971de9a5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 22 Feb 2017 15:42:34 -0800
Subject: userfaultfd: non-cooperative: add mremap() event

The event denotes that an area [start:end] moves to different location.
Length change isn't reported as "new" addresses, if they appear on the
uffd reader side they will not contain any data and the latter can just
zeromap them.

Waiting for the event ACK is also done outside of mmap sem, as for fork
event.

Link: http://lkml.kernel.org/r/20161216144821.5183-12-aarcange@redhat.com
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 79002bca1f43..7f318a46044b 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -55,6 +55,12 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
 extern void dup_userfaultfd_complete(struct list_head *);
 
+extern void mremap_userfaultfd_prep(struct vm_area_struct *,
+				    struct vm_userfaultfd_ctx *);
+extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx,
+					unsigned long from, unsigned long to,
+					unsigned long len);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -89,6 +95,17 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
 {
 }
 
+static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+					   struct vm_userfaultfd_ctx *ctx)
+{
+}
+
+static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx ctx,
+					       unsigned long from,
+					       unsigned long to,
+					       unsigned long len)
+{
+}
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From 90794bf19dc19691a16b71bcd75c04094d9e392d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 22 Feb 2017 15:42:37 -0800
Subject: userfaultfd: non-cooperative: optimize mremap_userfaultfd_complete()

Optimize the mremap_userfaultfd_complete() interface to pass only the
vm_userfaultfd_ctx pointer through the stack as a microoptimization.

Link: http://lkml.kernel.org/r/20161216144821.5183-13-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 7f318a46044b..78ec197e8b47 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -57,7 +57,7 @@ extern void dup_userfaultfd_complete(struct list_head *);
 
 extern void mremap_userfaultfd_prep(struct vm_area_struct *,
 				    struct vm_userfaultfd_ctx *);
-extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx,
+extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
@@ -100,7 +100,7 @@ static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
 {
 }
 
-static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx ctx,
+static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 					       unsigned long from,
 					       unsigned long to,
 					       unsigned long len)
-- 
cgit v1.2.3


From 05ce77249d5068b057082d24ec22d3824f4816ac Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 22 Feb 2017 15:42:40 -0800
Subject: userfaultfd: non-cooperative: add madvise() event for MADV_DONTNEED
 request

If the page is punched out of the address space the uffd reader should
know this and zeromap the respective area in case of the #PF event.

Link: http://lkml.kernel.org/r/20161216144821.5183-14-aarcange@redhat.com
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 78ec197e8b47..f431861f22f1 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,6 +61,11 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
+extern void madvise_userfault_dontneed(struct vm_area_struct *vma,
+				       struct vm_area_struct **prev,
+				       unsigned long start,
+				       unsigned long end);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -106,6 +111,13 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 					       unsigned long len)
 {
 }
+
+static inline void madvise_userfault_dontneed(struct vm_area_struct *vma,
+					      struct vm_area_struct **prev,
+					      unsigned long start,
+					      unsigned long end)
+{
+}
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From fa4d75c1de13299c61b5e18a1ae46bc00888b599 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 22 Feb 2017 15:42:49 -0800
Subject: userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb
 userfaultfd support

userfaultfd UFFDIO_COPY allows user level code to copy data to a page at
fault time.  The data is copied from user space to a newly allocated
huge page.  The new routine copy_huge_page_from_user performs this copy.

Link: http://lkml.kernel.org/r/20161216144821.5183-17-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3787f047a098..6bc7f2c1a6d1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2424,6 +2424,9 @@ extern void clear_huge_page(struct page *page,
 extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned long addr, struct vm_area_struct *vma,
 				unsigned int pages_per_huge_page);
+extern long copy_huge_page_from_user(struct page *dst_page,
+				const void __user *usr_src,
+				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 extern struct page_ext_operations debug_guardpage_ops;
-- 
cgit v1.2.3


From 8fb5debc5fcd450470cdd789c2d80ef95ebb8cf4 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 22 Feb 2017 15:42:52 -0800
Subject: userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd
 support

hugetlb_mcopy_atomic_pte is the low level routine that implements the
userfaultfd UFFDIO_COPY command.  It is based on the existing
mcopy_atomic_pte routine with modifications for huge pages.

Link: http://lkml.kernel.org/r/20161216144821.5183-18-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 48c76d612d40..aab2fff3e269 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -81,6 +81,11 @@ void hugetlb_show_meminfo(void);
 unsigned long hugetlb_total_pages(void);
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
+int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
+				struct vm_area_struct *dst_vma,
+				unsigned long dst_addr,
+				unsigned long src_addr,
+				struct page **pagep);
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						vm_flags_t vm_flags);
@@ -149,6 +154,8 @@ static inline void hugetlb_show_meminfo(void)
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
+#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+				src_addr, pagep)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address)	0
 static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 {
-- 
cgit v1.2.3


From 810a56b943e265bbabfcd5a8e54cb8d3b16cd6e4 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 22 Feb 2017 15:42:58 -0800
Subject: userfaultfd: hugetlbfs: fix __mcopy_atomic_hugetlb retry/error
 processing

The new routine copy_huge_page_from_user() uses kmap_atomic() to map
PAGE_SIZE pages.  However, this prevents page faults in the subsequent
call to copy_from_user().  This is OK in the case where the routine is
copied with mmap_sema held.  However, in another case we want to allow
page faults.  So, add a new argument allow_pagefault to indicate if the
routine should allow page faults.

[dan.carpenter@oracle.com: unmap the correct pointer]
  Link: http://lkml.kernel.org/r/20170113082608.GA3548@mwanda
[akpm@linux-foundation.org: kunmap() takes a page*, per Hugh]
Link: http://lkml.kernel.org/r/20161216144821.5183-20-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6bc7f2c1a6d1..c3e2be2b3296 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2426,7 +2426,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 extern long copy_huge_page_from_user(struct page *dst_page,
 				const void __user *usr_src,
-				unsigned int pages_per_huge_page);
+				unsigned int pages_per_huge_page,
+				bool allow_pagefault);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 extern struct page_ext_operations debug_guardpage_ops;
-- 
cgit v1.2.3


From 87ffc118b54dcd4cc642723603d944673248152f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 22 Feb 2017 15:43:13 -0800
Subject: userfaultfd: hugetlbfs: gup: support VM_FAULT_RETRY

Add support for VM_FAULT_RETRY to follow_hugetlb_page() so that
get_user_pages_unlocked/locked and "nonblocking/FOLL_NOWAIT" features
will work on hugetlbfs.

This is required for fully functional userfaultfd non-present support on
hugetlbfs.

Link: http://lkml.kernel.org/r/20161216144821.5183-25-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index aab2fff3e269..503099d8aada 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -65,7 +65,8 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			 struct page **, struct vm_area_struct **,
-			 unsigned long *, unsigned long *, long, unsigned int);
+			 unsigned long *, unsigned long *, long, unsigned int,
+			 int *);
 void unmap_hugepage_range(struct vm_area_struct *,
 			  unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
@@ -136,7 +137,7 @@ static inline unsigned long hugetlb_total_pages(void)
 	return 0;
 }
 
-#define follow_hugetlb_page(m,v,p,vs,a,b,i,w)	({ BUG(); 0; })
+#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)	({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 static inline void hugetlb_report_meminfo(struct seq_file *m)
-- 
cgit v1.2.3


From 4c27fe4c4c84f3afd504ecff2420cc1ad420d38e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:43:25 -0800
Subject: userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd
 support

shmem_mcopy_atomic_pte is the low level routine that implements the
userfaultfd UFFDIO_COPY command.  It is based on the existing
mcopy_atomic_pte routine with modifications for shared memory pages.

Link: http://lkml.kernel.org/r/20161216144821.5183-29-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index ff078e7043b6..fdaac9d4d46d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
 }
 #endif
 
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+				  struct vm_area_struct *dst_vma,
+				  unsigned long dst_addr,
+				  unsigned long src_addr,
+				  struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+			       src_addr, pagep)        ({ BUG(); 0; })
+#endif
+
 #endif
-- 
cgit v1.2.3


From b0506e488da5cf2f07f3a4f6d7acaa8f459ad714 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:43:28 -0800
Subject: userfaultfd: shmem: introduce vma_is_shmem

Currently userfault relies on vma_is_anonymous and vma_is_hugetlb to
ensure compatibility of a VMA with userfault.  Introduction of
vma_is_shmem allows detection if tmpfs backed VMAs, so that they may be
used with userfaultfd.  Current implementation presumes usage of
vma_is_shmem only by slow path routines in userfaultfd, therefore the
vma_is_shmem is not made inline to leave the few remaining free bits in
vm_flags.

Link: http://lkml.kernel.org/r/20161216144821.5183-30-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c3e2be2b3296..bb997493e15d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1383,6 +1383,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 	return !vma->vm_ops;
 }
 
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
 static inline int stack_guard_page_start(struct vm_area_struct *vma,
 					     unsigned long addr)
 {
-- 
cgit v1.2.3


From 74d81bfae8e3f52e956367f6ed764db269b87091 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 22 Feb 2017 15:44:41 -0800
Subject: mm: un-export wake_up_page functions

These are no longer used outside mm/filemap.c, so un-export them and
make them static where possible.  These were exported specifically for
NFS use in commit a4796e37c12e ("MM: export page_wakeup functions").

Link: http://lkml.kernel.org/r/20170103182234.30141-3-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 324c8dbad1e1..b572f5530392 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -482,19 +482,11 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback,
- * and for filesystems which need to wait on PG_private.
+ * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * and should not be used directly.
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
-extern void wake_up_page_bit(struct page *page, int bit_nr);
-
-static inline void wake_up_page(struct page *page, int bit)
-{
-	if (!PageWaiters(page))
-		return;
-	wake_up_page_bit(page, bit);
-}
 
 /* 
  * Wait for a page to be unlocked.
-- 
cgit v1.2.3


From 7f354a548d1cb6bb01b6ee74aee9264aa152f1ec Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 22 Feb 2017 15:44:50 -0800
Subject: mm, compaction: add vmstats for kcompactd work

A "compact_daemon_wake" vmstat exists that represents the number of
times kcompactd has woken up.  This doesn't represent how much work it
actually did, though.

It's useful to understand how much compaction work is being done by
kcompactd versus other methods such as direct compaction and explicitly
triggered per-node (or system) compaction.

This adds two new vmstats: "compact_daemon_migrate_scanned" and
"compact_daemon_free_scanned" to represent the number of pages kcompactd
has scanned as part of its migration scanner and freeing scanner,
respectively.

These values are still accounted for in the general
"compact_migrate_scanned" and "compact_free_scanned" for compatibility.

It could be argued that explicitly triggered compaction could also be
tracked separately, and that could be added if others find it useful.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1612071749390.69852@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 4d6ec58a8d45..6aa1b6cb5828 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -56,6 +56,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		COMPACTISOLATED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 		KCOMPACTD_WAKE,
+		KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
-- 
cgit v1.2.3


From b92df1de5d289c0b5d653e72414bf0850b8511e0 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 22 Feb 2017 15:44:53 -0800
Subject: mm: page_alloc: skip over regions of invalid pfns where possible

When using a sparse memory model memmap_init_zone() when invoked with
the MEMMAP_EARLY context will skip over pages which aren't valid - ie.
which aren't in a populated region of the sparse memory map.  However if
the memory map is extremely sparse then it can spend a long time
linearly checking each PFN in a large non-populated region of the memory
map & skipping it in turn.

When CONFIG_HAVE_MEMBLOCK_NODE_MAP is enabled, we have sufficient
information to quickly discover the next valid PFN given an invalid one
by searching through the list of memory regions & skipping forwards to
the first PFN covered by the memory region to the right of the
non-populated region.  Implement this in order to speed up
memmap_init_zone() for systems with extremely sparse memory maps.

James said "I have tested this patch on a virtual model of a Samurai CPU
with a sparse memory map.  The kernel boot time drops from 109 to
62 seconds. "

Link: http://lkml.kernel.org/r/20161125185518.29885-1-paul.burton@imgtec.com
Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Tested-by: James Hartley <james.hartley@imgtec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5b759c9acf97..38bcf00cbed3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -203,6 +203,7 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			  unsigned long *out_end_pfn, int *out_nid);
+unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
 
 /**
  * for_each_mem_pfn_range - early memory pfn range iterator
-- 
cgit v1.2.3


From 16e72e9b30986ee15f17fbb68189ca842c32af58 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Wed, 22 Feb 2017 15:45:16 -0800
Subject: powerpc: do not make the entire heap executable

On 32-bit powerpc the ELF PLT sections of binaries (built with
--bss-plt, or with a toolchain which defaults to it) look like this:

  [17] .sbss             NOBITS          0002aff8 01aff8 000014 00  WA  0   0  4
  [18] .plt              NOBITS          0002b00c 01aff8 000084 00 WAX  0   0  4
  [19] .bss              NOBITS          0002b090 01aff8 0000a4 00  WA  0   0  4

Which results in an ELF load header:

  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x019c70 0x00029c70 0x00029c70 0x01388 0x014c4 RWE 0x10000

This is all correct, the load region containing the PLT is marked as
executable.  Note that the PLT starts at 0002b00c but the file mapping
ends at 0002aff8, so the PLT falls in the 0 fill section described by
the load header, and after a page boundary.

Unfortunately the generic ELF loader ignores the X bit in the load
headers when it creates the 0 filled non-file backed mappings.  It
assumes all of these mappings are RW BSS sections, which is not the case
for PPC.

gcc/ld has an option (--secure-plt) to not do this, this is said to
incur a small performance penalty.

Currently, to support 32-bit binaries with PLT in BSS kernel maps
*entire brk area* with executable rights for all binaries, even
--secure-plt ones.

Stop doing that.

Teach the ELF loader to check the X bit in the relevant load header and
create 0 filled anonymous mappings that are executable if the load
header requests that.

Test program showing the difference in /proc/$PID/maps:

int main() {
	char buf[16*1024];
	char *p = malloc(123); /* make "[heap]" mapping appear */
	int fd = open("/proc/self/maps", O_RDONLY);
	int len = read(fd, buf, sizeof(buf));
	write(1, buf, len);
	printf("%p\n", p);
	return 0;
}

Compiled using: gcc -mbss-plt -m32 -Os test.c -otest

Unpatched ppc64 kernel:
00100000-00120000 r-xp 00000000 00:00 0                                  [vdso]
0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094                           /usr/lib/libc-2.17.so
0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094                           /usr/lib/libc-2.17.so
0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094                           /usr/lib/libc-2.17.so
10000000-10010000 r-xp 00000000 fd:00 100674505                          /home/user/test
10010000-10020000 r--p 00000000 fd:00 100674505                          /home/user/test
10020000-10030000 rw-p 00010000 fd:00 100674505                          /home/user/test
10690000-106c0000 rwxp 00000000 00:00 0                                  [heap]
f7f70000-f7fa0000 r-xp 00000000 fd:00 67898089                           /usr/lib/ld-2.17.so
f7fa0000-f7fb0000 r--p 00020000 fd:00 67898089                           /usr/lib/ld-2.17.so
f7fb0000-f7fc0000 rw-p 00030000 fd:00 67898089                           /usr/lib/ld-2.17.so
ffa90000-ffac0000 rw-p 00000000 00:00 0                                  [stack]
0x10690008

Patched ppc64 kernel:
00100000-00120000 r-xp 00000000 00:00 0                                  [vdso]
0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094                           /usr/lib/libc-2.17.so
0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094                           /usr/lib/libc-2.17.so
0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094                           /usr/lib/libc-2.17.so
10000000-10010000 r-xp 00000000 fd:00 100674505                          /home/user/test
10010000-10020000 r--p 00000000 fd:00 100674505                          /home/user/test
10020000-10030000 rw-p 00010000 fd:00 100674505                          /home/user/test
10180000-101b0000 rw-p 00000000 00:00 0                                  [heap]
                  ^^^^ this has changed
f7c60000-f7c90000 r-xp 00000000 fd:00 67898089                           /usr/lib/ld-2.17.so
f7c90000-f7ca0000 r--p 00020000 fd:00 67898089                           /usr/lib/ld-2.17.so
f7ca0000-f7cb0000 rw-p 00030000 fd:00 67898089                           /usr/lib/ld-2.17.so
ff860000-ff890000 rw-p 00000000 00:00 0                                  [stack]
0x10180008

The patch was originally posted in 2012 by Jason Gunthorpe
and apparently ignored:

https://lkml.org/lkml/2012/9/30/138

Lightly run-tested.

Link: http://lkml.kernel.org/r/20161215131950.23054-1-dvlasenk@redhat.com
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb997493e15d..dae6f58d67c8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2083,6 +2083,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {}
 
 /* These take the mm semaphore themselves */
 extern int __must_check vm_brk(unsigned long, unsigned long);
+extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
 extern int vm_munmap(unsigned long, size_t);
 extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
         unsigned long, unsigned long,
-- 
cgit v1.2.3


From 235b62176712b970c815923e36b9a9cc05d4d901 Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 22 Feb 2017 15:45:22 -0800
Subject: mm/swap: add cluster lock

This patch is to reduce the lock contention of swap_info_struct->lock
via using a more fine grained lock in swap_cluster_info for some swap
operations.  swap_info_struct->lock is heavily contended if multiple
processes reclaim pages simultaneously.  Because there is only one lock
for each swap device.  While in common configuration, there is only one
or several swap devices in the system.  The lock protects almost all
swap related operations.

In fact, many swap operations only access one element of
swap_info_struct->swap_map array.  And there is no dependency between
different elements of swap_info_struct->swap_map.  So a fine grained
lock can be used to allow parallel access to the different elements of
swap_info_struct->swap_map.

In this patch, a spinlock is added to swap_cluster_info to protect the
elements of swap_info_struct->swap_map in the swap cluster and the
fields of swap_cluster_info.  This reduced locking contention for
swap_info_struct->swap_map access greatly.

Because of the added spinlock, the size of swap_cluster_info increases
from 4 bytes to 8 bytes on the 64 bit and 32 bit system.  This will use
additional 4k RAM for every 1G swap space.

Because the size of swap_cluster_info is much smaller than the size of
the cache line (8 vs 64 on x86_64 architecture), there may be false
cache line sharing between spinlocks in swap_cluster_info.  To avoid the
false sharing in the first round of the swap cluster allocation, the
order of the swap clusters in the free clusters list is changed.  So
that, the swap_cluster_info sharing the same cache line will be placed
as far as possible.  After the first round of allocation, the order of
the clusters in free clusters list is expected to be random.  So the
false sharing should be not serious.

Compared with a previous implementation using bit_spin_lock, the
sequential swap out throughput improved about 3.2%.  Test was done on a
Xeon E5 v3 system.  The swap device used is a RAM simulated PMEM
(persistent memory) device.  To test the sequential swapping out, the
test case created 32 processes, which sequentially allocate and write to
the anonymous pages until the RAM and part of the swap device is used.

[ying.huang@intel.com: v5]
  Link: http://lkml.kernel.org/r/878tqeuuic.fsf_-_@yhuang-dev.intel.com
[minchan@kernel.org: initialize spinlock for swap_cluster_info]
  Link: http://lkml.kernel.org/r/1486434945-29753-1-git-send-email-minchan@kernel.org
[hughd@google.com: annotate nested locking for cluster lock]
  Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1702161050540.21773@eggly.anvils
Link: http://lkml.kernel.org/r/dbb860bbd825b1aaba18988015e8963f263c3f0d.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7f47b7098b1b..279c7b84bd63 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -176,6 +176,12 @@ enum {
  * protected by swap_info_struct.lock.
  */
 struct swap_cluster_info {
+	spinlock_t lock;	/*
+				 * Protect swap_cluster_info fields
+				 * and swap_info_struct->swap_map
+				 * elements correspond to the swap
+				 * cluster
+				 */
 	unsigned int data:24;
 	unsigned int flags:8;
 };
-- 
cgit v1.2.3


From 4b3ef9daa4fc0bba742a79faecb17fdaaead083b Mon Sep 17 00:00:00 2001
From: "Huang, Ying" <ying.huang@intel.com>
Date: Wed, 22 Feb 2017 15:45:26 -0800
Subject: mm/swap: split swap cache into 64MB trunks

The patch is to improve the scalability of the swap out/in via using
fine grained locks for the swap cache.  In current kernel, one address
space will be used for each swap device.  And in the common
configuration, the number of the swap device is very small (one is
typical).  This causes the heavy lock contention on the radix tree of
the address space if multiple tasks swap out/in concurrently.

But in fact, there is no dependency between pages in the swap cache.  So
that, we can split the one shared address space for each swap device
into several address spaces to reduce the lock contention.  In the
patch, the shared address space is split into 64MB trunks.  64MB is
chosen to balance the memory space usage and effect of lock contention
reduction.

The size of struct address_space on x86_64 architecture is 408B, so with
the patch, 6528B more memory will be used for every 1GB swap space on
x86_64 architecture.

One address space is still shared for the swap entries in the same 64M
trunks.  To avoid lock contention for the first round of swap space
allocation, the order of the swap clusters in the initial free clusters
list is changed.  The swap space distance between the consecutive swap
clusters in the free cluster list is at least 64M.  After the first
round of allocation, the swap clusters are expected to be freed
randomly, so the lock contention should be reduced effectively.

Link: http://lkml.kernel.org/r/735bab895e64c930581ffb0a05b661e01da82bc5.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 279c7b84bd63..648a32b58e3e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -343,8 +343,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
 		sector_t *);
 
 /* linux/mm/swap_state.c */
-extern struct address_space swapper_spaces[];
-#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
+/* One swap address space for each 64M swap space */
+#define SWAP_ADDRESS_SPACE_SHIFT	14
+#define SWAP_ADDRESS_SPACE_PAGES	(1 << SWAP_ADDRESS_SPACE_SHIFT)
+extern struct address_space *swapper_spaces[];
+#define swap_address_space(entry)			    \
+	(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
+		>> SWAP_ADDRESS_SPACE_SHIFT])
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, struct list_head *list);
@@ -398,6 +403,8 @@ extern struct swap_info_struct *page_swap_info(struct page *);
 extern bool reuse_swap_page(struct page *, int *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
+extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
+extern void exit_swap_address_space(unsigned int type);
 
 #else /* CONFIG_SWAP */
 
-- 
cgit v1.2.3


From e8c26ab60598558ec3a626e7925b06e7417d7710 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Wed, 22 Feb 2017 15:45:29 -0800
Subject: mm/swap: skip readahead for unreferenced swap slots

We can avoid needlessly allocating page for swap slots that are not used
by anyone.  No pages have to be read in for these slots.

Link: http://lkml.kernel.org/r/0784b3f20b9bd3aa5552219624cb78dc4ae710c9.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 648a32b58e3e..3116382067cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -398,6 +398,7 @@ extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int page_swapcount(struct page *);
+extern int __swp_swapcount(swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern bool reuse_swap_page(struct page *, int *);
@@ -492,6 +493,11 @@ static inline int page_swapcount(struct page *page)
 	return 0;
 }
 
+static inline int __swp_swapcount(swp_entry_t entry)
+{
+	return 0;
+}
+
 static inline int swp_swapcount(swp_entry_t entry)
 {
 	return 0;
-- 
cgit v1.2.3


From 36005bae205da3eef0016a5c96a34f10a68afa1e Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Wed, 22 Feb 2017 15:45:33 -0800
Subject: mm/swap: allocate swap slots in batches

Currently, the swap slots are allocated one page at a time, causing
contention to the swap_info lock protecting the swap partition on every
page being swapped.

This patch adds new functions get_swap_pages and scan_swap_map_slots to
request multiple swap slots at once.  This will reduces the lock
contention on the swap_info lock.  Also scan_swap_map_slots can operate
more efficiently as swap slots often occurs in clusters close to each
other on a swap device and it is quicker to allocate them together.

Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3116382067cd..956eae8a8edf 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -27,6 +27,7 @@ struct bio;
 #define SWAP_FLAGS_VALID	(SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
 				 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
 				 SWAP_FLAG_DISCARD_PAGES)
+#define SWAP_BATCH 64
 
 static inline int current_is_kswapd(void)
 {
@@ -386,6 +387,7 @@ static inline long get_nr_swap_pages(void)
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
-- 
cgit v1.2.3


From 7c00bafee87c7bac7ed9eced7c161f8e5332cb4e Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Wed, 22 Feb 2017 15:45:36 -0800
Subject: mm/swap: free swap slots in batch

Add new functions that free unused swap slots in batches without the
need to reacquire swap info lock.  This improves scalability and reduce
lock contention.

Link: http://lkml.kernel.org/r/c25e0fcdfd237ec4ca7db91631d3b9f6ed23824e.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 956eae8a8edf..bcc0b18f96d2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -394,6 +394,7 @@ extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
 extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
-- 
cgit v1.2.3


From 67afa38e012e9581b9b42f2a41dfc56b1280794d Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Wed, 22 Feb 2017 15:45:39 -0800
Subject: mm/swap: add cache for swap slots allocation

We add per cpu caches for swap slots that can be allocated and freed
quickly without the need to touch the swap info lock.

Two separate caches are maintained for swap slots allocated and swap
slots returned.  This is to allow the swap slots to be returned to the
global pool in a batch so they will have a chance to be coaelesced with
other slots in a cluster.  We do not reuse the slots that are returned
right away, as it may increase fragmentation of the slots.

The swap allocation cache is protected by a mutex as we may sleep when
searching for empty slots in cache.  The swap free cache is protected by
a spin lock as we cannot sleep in the free path.

We refill the swap slots cache when we run out of slots, and we disable
the swap slots cache and drain the slots if the global number of slots
fall below a low watermark threshold.  We re-enable the cache agian when
the slots available are above a high watermark.

[ying.huang@intel.com: use raw_cpu_ptr over this_cpu_ptr for swap slots access]
[tim.c.chen@linux.intel.com: add comments on locks in swap_slots.h]
  Link: http://lkml.kernel.org/r/20170118180327.GA24225@linux.intel.com
Link: http://lkml.kernel.org/r/35de301a4eaa8daa2977de6e987f2c154385eb66.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h       |  4 ++++
 include/linux/swap_slots.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 include/linux/swap_slots.h

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bcc0b18f96d2..45e91dd6716d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -372,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
+extern bool has_usable_swap(void);
 
 /* Swap 50% full? Release swapcache more aggressively.. */
 static inline bool vm_swap_full(void)
@@ -410,6 +411,9 @@ struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
 extern void exit_swap_address_space(unsigned int type);
 
+extern int get_swap_slots(int n, swp_entry_t *slots);
+extern void swapcache_free_batch(swp_entry_t *entries, int n);
+
 #else /* CONFIG_SWAP */
 
 #define swap_address_space(entry)		(NULL)
diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
new file mode 100644
index 000000000000..ba5623b27c60
--- /dev/null
+++ b/include/linux/swap_slots.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_SWAP_SLOTS_H
+#define _LINUX_SWAP_SLOTS_H
+
+#include <linux/swap.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+
+#define SWAP_SLOTS_CACHE_SIZE			SWAP_BATCH
+#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE	(5*SWAP_SLOTS_CACHE_SIZE)
+#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE	(2*SWAP_SLOTS_CACHE_SIZE)
+
+struct swap_slots_cache {
+	bool		lock_initialized;
+	struct mutex	alloc_lock; /* protects slots, nr, cur */
+	swp_entry_t	*slots;
+	int		nr;
+	int		cur;
+	spinlock_t	free_lock;  /* protects slots_ret, n_ret */
+	swp_entry_t	*slots_ret;
+	int		n_ret;
+};
+
+void disable_swap_slots_cache_lock(void);
+void reenable_swap_slots_cache_unlock(void);
+int enable_swap_slots_cache(void);
+int free_swap_slot(swp_entry_t entry);
+
+#endif /* _LINUX_SWAP_SLOTS_H */
-- 
cgit v1.2.3


From ba81f83842549871cbd7226fc11530dc464500bb Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 22 Feb 2017 15:45:46 -0800
Subject: mm/swap: skip readahead only when swap slot cache is enabled

Because during swap off, a swap entry may have swap_map[] ==
SWAP_HAS_CACHE (for example, just allocated).  If we return NULL in
__read_swap_cache_async(), the swap off will abort.  So when swap slot
cache is disabled, (for swap off), we will wait for page to be put into
swap cache in such race condition.  This should not be a problem for swap
slot cache, because swap slot cache should be drained after clearing
swap_slot_cache_enabled.

[ying.huang@intel.com: fix memory leak in __read_swap_cache_async()]
  Link: http://lkml.kernel.org/r/874lzt6znd.fsf@yhuang-dev.intel.com
Link: http://lkml.kernel.org/r/5e2c5f6abe8e6eb0797408897b1bba80938e9b9d.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap_slots.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h
index ba5623b27c60..6ef92d17633d 100644
--- a/include/linux/swap_slots.h
+++ b/include/linux/swap_slots.h
@@ -25,4 +25,6 @@ void reenable_swap_slots_cache_unlock(void);
 int enable_swap_slots_cache(void);
 int free_swap_slot(swp_entry_t entry);
 
+extern bool swap_slot_cache_enabled;
+
 #endif /* _LINUX_SWAP_SLOTS_H */
-- 
cgit v1.2.3


From 21440d7eb9044001b7fdb71d0163689f60a0f2a1 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 22 Feb 2017 15:45:49 -0800
Subject: mm, thp: add new defer+madvise defrag option

There is no thp defrag option that currently allows MADV_HUGEPAGE
regions to do direct compaction and reclaim while all other thp
allocations simply trigger kswapd and kcompactd in the background and
fail immediately.

The "defer" setting simply triggers background reclaim and compaction
for all regions, regardless of MADV_HUGEPAGE, which makes it unusable
for our userspace where MADV_HUGEPAGE is being used to indicate the
application is willing to wait for work for thp memory to be available.

The "madvise" setting will do direct compaction and reclaim for these
MADV_HUGEPAGE regions, but does not trigger kswapd and kcompactd in the
background for anybody else.

For reasonable usage, there needs to be a mesh between the two options.
This patch introduces a fifth mode, "defer+madvise", that will do direct
reclaim and compaction for MADV_HUGEPAGE regions and trigger background
reclaim and compaction for everybody else so that hugepages may be
available in the near future.

A proposal to allow direct reclaim and compaction for MADV_HUGEPAGE
regions as part of the "defer" mode, making it a very powerful setting
and avoids breaking userspace, was offered:
     http://marc.info/?t=148236612700003
This additional mode is a compromise.

A second proposal to allow both "defer" and "madvise" to be selected at
the same time was also offered:
     http://marc.info/?t=148357345300001.
This is possible, but there was a concern that it might break existing
userspaces the parse the output of the defrag mode, so the fifth option
was introduced instead.

This patch also cleans up the helper function for storing to "enabled"
and "defrag" since the former supports three modes while the latter
supports five and triple_flag_store() was getting unnecessarily messy.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701101614330.41805@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 97e478d6b690..f0029e786205 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -33,6 +33,7 @@ enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
 	TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
-- 
cgit v1.2.3


From fd538803731e50367b7c59ce4ad3454426a3d671 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 22 Feb 2017 15:45:58 -0800
Subject: mm, vmscan: cleanup lru size claculations

lruvec_lru_size returns the full size of the LRU list while we sometimes
need a value reduced only to eligible zones (e.g.  for lowmem requests).
inactive_list_is_low is one such user.  Later patches will add more of
them.  Add a new parameter to lruvec_lru_size and allow it filter out
zones which are not eligible for the given context.

Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f4aac87adcc3..82fc632fd11d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
 #endif
 }
 
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru);
+extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
 
 #ifdef CONFIG_HAVE_MEMORY_PRESENT
 void memory_present(int nid, unsigned long start, unsigned long end);
-- 
cgit v1.2.3


From a8e99259e7e32b67af2b447f0a570813c0c283ec Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 22 Feb 2017 15:46:10 -0800
Subject: mm, page_alloc: warn_alloc print nodemask

warn_alloc is currently used for to report an allocation failure or an
allocation stall.  We print some details of the allocation request like
the gfp mask and the request order.  We do not print the allocation
nodemask which is important when debugging the reason for the allocation
failure as well.  We alreaddy print the nodemask in the OOM report.

Add nodemask to warn_alloc and print it in warn_alloc as well.

Link: http://lkml.kernel.org/r/20170117091543.25850-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dae6f58d67c8..28b6c3f8a7f3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1942,8 +1942,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern unsigned long arch_reserved_kernel_pages(void);
 #endif
 
-extern __printf(2, 3)
-void warn_alloc(gfp_t gfp_mask, const char *fmt, ...);
+extern __printf(3, 4)
+void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
 
 extern void setup_per_cpu_pageset(void);
 
-- 
cgit v1.2.3


From 9af744d743170b5f5ef70031dea8d772d166ab28 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 22 Feb 2017 15:46:16 -0800
Subject: lib/show_mem.c: teach show_mem to work with the given nodemask

show_mem() allows to filter out node specific data which is irrelevant
to the allocation request via SHOW_MEM_FILTER_NODES.  The filtering is
done in skip_free_areas_node which skips all nodes which are not in the
mems_allowed of the current process.  This works most of the time as
expected because the nodemask shouldn't be outside of the allocating
task but there are some exceptions.  E.g.  memory hotplug might want to
request allocations from outside of the allowed nodes (see
new_node_page).

Get rid of this hardcoded behavior and push the allocation mask down the
show_mem path and use it instead of cpuset_current_mems_allowed.  NULL
nodemask is interpreted as cpuset_current_mems_allowed.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20170117091543.25850-5-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28b6c3f8a7f3..8a67cae5a07c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1152,8 +1152,7 @@ extern void pagefault_out_of_memory(void);
  */
 #define SHOW_MEM_FILTER_NODES		(0x0001u)	/* disallowed nodes */
 
-extern void show_free_areas(unsigned int flags);
-extern bool skip_free_areas_node(unsigned int flags, int nid);
+extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
 int shmem_zero_setup(struct vm_area_struct *);
 #ifdef CONFIG_SHMEM
@@ -1934,7 +1933,7 @@ extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
-extern void show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags, nodemask_t *nodemask);
 extern long si_mem_available(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
-- 
cgit v1.2.3


From da162e9368990ed747075e2ab427da0759fc4a59 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 22 Feb 2017 15:46:31 -0800
Subject: mm: drop zap_details::ignore_dirty

The only user of ignore_dirty is oom-reaper.  But it doesn't really use
it.

ignore_dirty only has effect on file pages mapped with dirty pte.  But
oom-repear skips shared VMAs, so there's no way we can dirty file pte in
them.

Link: http://lkml.kernel.org/r/20170118122429.43661-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8a67cae5a07c..eae478a42f26 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1175,7 +1175,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	bool ignore_dirty;			/* Ignore dirty pages */
 	bool check_swap_entries;		/* Check also swap entries */
 };
 
-- 
cgit v1.2.3


From 3e8715fdc03e8df4d26d8e436166e44e3e416d3b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 22 Feb 2017 15:46:34 -0800
Subject: mm: drop zap_details::check_swap_entries

detail == NULL would give the same functionality as
.check_swap_entries==true.

Link: http://lkml.kernel.org/r/20170118122429.43661-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index eae478a42f26..062936e8b832 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1175,7 +1175,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	bool check_swap_entries;		/* Check also swap entries */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
-- 
cgit v1.2.3


From ecf1385d72f0491400a8ceca7001196ca369aa8c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 22 Feb 2017 15:46:37 -0800
Subject: mm: drop unused argument of zap_page_range()

There's no users of zap_page_range() who wants non-NULL 'details'.
Let's drop it.

Link: http://lkml.kernel.org/r/20170118122429.43661-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 062936e8b832..574bc157a27c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1185,7 +1185,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
-		unsigned long size, struct zap_details *);
+		unsigned long size);
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long start, unsigned long end);
 
-- 
cgit v1.2.3


From 083fb8edda0487d192e8c117f625563b920cf7a4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 22 Feb 2017 15:46:48 -0800
Subject: mm: fix <linux/pagemap.h> stray kernel-doc notation

Delete stray (second) function description in find_lock_page()
kernel-doc notation.

Note: scripts/kernel-doc just ignores the second function description.

Fixes: 2457aec63745e ("mm: non-atomically mark page accessed during page cache allocation where possible")
Link: http://lkml.kernel.org/r/b037e9a3-516c-ec02-6c8e-fa5479747ba6@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b572f5530392..84943e8057ef 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -266,7 +266,6 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
 
 /**
  * find_lock_page - locate, pin and lock a pagecache page
- * pagecache_get_page - find and get a page reference
  * @mapping: the address_space to search
  * @offset: the page index
  *
-- 
cgit v1.2.3


From 745d8ae4622c6808b22e33a944c7decb30074be4 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 23 Feb 2017 12:02:42 +0200
Subject: net/mlx4: Spoofcheck and zero MAC can't coexist

Spoofcheck can't be enabled if VF MAC is zero.
Vice versa, can't zero MAC if spoofcheck is on.

Fixes: 8f7ba3ca12f6 ('net/mlx4: Add set VF mac address support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/cmd.h    |  2 +-
 include/linux/mlx4/driver.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 1f3568694a57..7b74afcbbab2 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -308,7 +308,7 @@ int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index,
 int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx,
 		      struct ifla_vf_stats *vf_stats);
 u32 mlx4_comm_get_version(void);
-int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac);
 int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan,
 		     u8 qos, __be16 proto);
 int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index bd0e7075ea6d..e965e5090d96 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -104,4 +104,14 @@ static inline u64 mlx4_mac_to_u64(u8 *addr)
 	return mac;
 }
 
+static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
+{
+	int i;
+
+	for (i = ETH_ALEN; i > 0; i--) {
+		addr[i - 1] = mac && 0xFF;
+		mac >>= 8;
+	}
+}
+
 #endif /* MLX4_DRIVER_H */
-- 
cgit v1.2.3


From 7d6d15789d69856f1c5405e106a773c87277eb8c Mon Sep 17 00:00:00 2001
From: Scott Bauer <scott.bauer@intel.com>
Date: Wed, 22 Feb 2017 10:15:06 -0700
Subject: block/sed-opal: Introduce free_opal_dev to free the structure and
 clean up state

Before we free the opal structure we need to clean up any saved
locking ranges that the user had told us to unlock from a suspend.

Signed-off-by: Scott Bauer <scott.bauer@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/sed-opal.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index deee23d012e7..04b124fca51e 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
 		size_t len, bool send);
 
 #ifdef CONFIG_BLK_SED_OPAL
+void free_opal_dev(struct opal_dev *dev);
 bool opal_unlock_from_suspend(struct opal_dev *dev);
 struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
 int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
@@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd)
 	return false;
 }
 #else
+static inline void free_opal_dev(struct opal_dev *dev)
+{
+}
+
 static inline bool is_sed_ioctl(unsigned int cmd)
 {
 	return false;
-- 
cgit v1.2.3


From da55f2cc78418dee88400aafbbaed19d7ac8188e Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 22 Feb 2017 10:58:29 -0800
Subject: blk-mq: use sbq wait queues instead of restart for driver tags

Commit 50e1dab86aa2 ("blk-mq-sched: fix starvation for multiple hardware
queues and shared tags") fixed one starvation issue for shared tags.
However, we can still get into a situation where we fail to allocate a
tag because all tags are allocated but we don't have any pending
requests on any hardware queue.

One solution for this would be to restart all queues that share a tag
map, but that really sucks. Ideally, we could just block and wait for a
tag, but that isn't always possible from blk_mq_dispatch_rq_list().

However, we can still use the struct sbitmap_queue wait queues with a
custom callback instead of blocking. This has a few benefits:

1. It avoids iterating over all hardware queues when completing an I/O,
   which the current restart code has to do.
2. It benefits from the existing rolling wakeup code.
3. It avoids punting to another thread just to have it block.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8e4df3d6c8cd..001d30d727c5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -33,6 +33,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
+	wait_queue_t		dispatch_wait;
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
@@ -160,6 +161,7 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
+	BLK_MQ_S_TAG_WAITING	= 3,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
-- 
cgit v1.2.3


From d08d1b27fe2a7f6923952613f5fab56ae47a6f5b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 22 Feb 2017 13:58:52 +0530
Subject: PM / QoS: Remove global notifiers

They were never used in the kernel, so get rid of them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_qos.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index d4d34791e463..3e2547d6e207 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev,
 			    struct notifier_block *notifier);
 int dev_pm_qos_remove_notifier(struct device *dev,
 			       struct notifier_block *notifier);
-int dev_pm_qos_add_global_notifier(struct notifier_block *notifier);
-int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier);
 void dev_pm_qos_constraints_init(struct device *dev);
 void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
@@ -199,12 +197,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev,
 static inline int dev_pm_qos_remove_notifier(struct device *dev,
 					     struct notifier_block *notifier)
 			{ return 0; }
-static inline int dev_pm_qos_add_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
-static inline int dev_pm_qos_remove_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
 static inline void dev_pm_qos_constraints_init(struct device *dev)
 {
 	dev->power.power_state = PMSG_ON;
-- 
cgit v1.2.3


From 29dee3c03abce04cd527878ef5f9e5f91b7b83f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 10 Feb 2017 16:27:52 +0100
Subject: locking/refcounts: Out-of-line everything

Linus asked to please make this real C code.

And since size then isn't an issue what so ever anymore, remove the
debug knob and make all WARN()s unconditional.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dwindsor@gmail.com
Cc: elena.reshetova@intel.com
Cc: gregkh@linuxfoundation.org
Cc: ishkamiel@gmail.com
Cc: keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 277 ++---------------------------------------------
 1 file changed, 12 insertions(+), 265 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 600aadf9cca4..0e8cfb2ce91e 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -1,55 +1,10 @@
 #ifndef _LINUX_REFCOUNT_H
 #define _LINUX_REFCOUNT_H
 
-/*
- * Variant of atomic_t specialized for reference counts.
- *
- * The interface matches the atomic_t interface (to aid in porting) but only
- * provides the few functions one should use for reference counting.
- *
- * It differs in that the counter saturates at UINT_MAX and will not move once
- * there. This avoids wrapping the counter and causing 'spurious'
- * use-after-free issues.
- *
- * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
- * and provide only what is strictly required for refcounts.
- *
- * The increments are fully relaxed; these will not provide ordering. The
- * rationale is that whatever is used to obtain the object we're increasing the
- * reference count on will provide the ordering. For locked data structures,
- * its the lock acquire, for RCU/lockless data structures its the dependent
- * load.
- *
- * Do note that inc_not_zero() provides a control dependency which will order
- * future stores against the inc, this ensures we'll never modify the object
- * if we did not in fact acquire a reference.
- *
- * The decrements will provide release order, such that all the prior loads and
- * stores will be issued before, it also provides a control dependency, which
- * will order us against the subsequent free().
- *
- * The control dependency is against the load of the cmpxchg (ll/sc) that
- * succeeded. This means the stores aren't fully ordered, but this is fine
- * because the 1->0 transition indicates no concurrency.
- *
- * Note that the allocator is responsible for ordering things between free()
- * and alloc().
- *
- */
-
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-#ifdef CONFIG_DEBUG_REFCOUNT
-#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
-#define __refcount_check	__must_check
-#else
-#define REFCOUNT_WARN(cond, str) (void)(cond)
-#define __refcount_check
-#endif
-
 typedef struct refcount_struct {
 	atomic_t refs;
 } refcount_t;
@@ -66,229 +21,21 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
-static inline __refcount_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		if (!val)
-			return false;
-
-		if (unlikely(val == UINT_MAX))
-			return true;
-
-		new = val + i;
-		if (new < val)
-			new = UINT_MAX;
-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-	return true;
-}
-
-static inline void refcount_add(unsigned int i, refcount_t *r)
-{
-	REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- */
-static inline __refcount_check
-bool refcount_inc_not_zero(refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		new = val + 1;
-
-		if (!val)
-			return false;
-
-		if (unlikely(!new))
-			return true;
-
-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-	return true;
-}
-
-/*
- * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller already has a
- * reference on the object, will WARN when this is not so.
- */
-static inline void refcount_inc(refcount_t *r)
-{
-	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		if (unlikely(val == UINT_MAX))
-			return false;
-
-		new = val - i;
-		if (new > val) {
-			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-			return false;
-		}
-
-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	return !new;
-}
-
-static inline __refcount_check
-bool refcount_dec_and_test(refcount_t *r)
-{
-	return refcount_sub_and_test(1, r);
-}
+extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
+extern void refcount_add(unsigned int i, refcount_t *r);
 
-/*
- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
- * when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before.
- */
-static inline
-void refcount_dec(refcount_t *r)
-{
-	REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
-}
-
-/*
- * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
- * success thereof.
- *
- * Like all decrement operations, it provides release memory order and provides
- * a control dependency.
- *
- * It can be used like a try-delete operator; this explicit case is provided
- * and not cmpxchg in generic, because that would allow implementing unsafe
- * operations.
- */
-static inline __refcount_check
-bool refcount_dec_if_one(refcount_t *r)
-{
-	return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
-}
-
-/*
- * No atomic_t counterpart, it decrements unless the value is 1, in which case
- * it will return false.
- *
- * Was often done like: atomic_add_unless(&var, -1, 1)
- */
-static inline __refcount_check
-bool refcount_dec_not_one(refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
+extern __must_check bool refcount_inc_not_zero(refcount_t *r);
+extern void refcount_inc(refcount_t *r);
 
-	for (;;) {
-		if (unlikely(val == UINT_MAX))
-			return true;
+extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
+extern void refcount_sub(unsigned int i, refcount_t *r);
 
-		if (val == 1)
-			return false;
+extern __must_check bool refcount_dec_and_test(refcount_t *r);
+extern void refcount_dec(refcount_t *r);
 
-		new = val - 1;
-		if (new > val) {
-			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-			return true;
-		}
-
-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	return true;
-}
-
-/*
- * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
- * to decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
-{
-	if (refcount_dec_not_one(r))
-		return false;
-
-	mutex_lock(lock);
-	if (!refcount_dec_and_test(r)) {
-		mutex_unlock(lock);
-		return false;
-	}
-
-	return true;
-}
-
-/*
- * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
-{
-	if (refcount_dec_not_one(r))
-		return false;
-
-	spin_lock(lock);
-	if (!refcount_dec_and_test(r)) {
-		spin_unlock(lock);
-		return false;
-	}
-
-	return true;
-}
+extern __must_check bool refcount_dec_if_one(refcount_t *r);
+extern __must_check bool refcount_dec_not_one(refcount_t *r);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
 
 #endif /* _LINUX_REFCOUNT_H */
-- 
cgit v1.2.3


From 318b1dedcd39012624f466d281627553e9fa2570 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Thu, 23 Feb 2017 15:09:34 +0200
Subject: locking/refcounts: Add missing kernel.h header to have UINT_MAX
 defined

Fix header dependency.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1487855374-21993-1-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 0e8cfb2ce91e..0023fee4bbbc 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -4,6 +4,7 @@
 #include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/kernel.h>
 
 typedef struct refcount_struct {
 	atomic_t refs;
-- 
cgit v1.2.3


From d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 21 Feb 2017 15:35:32 -0600
Subject: objtool: Improve detection of BUG() and other dead ends

The BUG() macro's use of __builtin_unreachable() via the unreachable()
macro tells gcc that the instruction is a dead end, and that it's safe
to assume the current code path will not execute past the previous
instruction.

On x86, the BUG() macro is implemented with the 'ud2' instruction.  When
objtool's branch analysis sees that instruction, it knows the current
code path has come to a dead end.

Peter Zijlstra has been working on a patch to change the WARN macros to
use 'ud2'.  That patch will break objtool's assumption that 'ud2' is
always a dead end.

Generally it's best for objtool to avoid making those kinds of
assumptions anyway.  The more ignorant it is of kernel code internals,
the better.

So create a more generic way for objtool to detect dead ends by adding
an annotation to the unreachable() macro.  The annotation stores a
pointer to the end of the unreachable code path in an '__unreachable'
section.  Objtool can read that section to find the dead ends.

Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0444b1336268..8ea159fc489d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -195,6 +195,17 @@
 #endif
 #endif
 
+#ifdef CONFIG_STACK_VALIDATION
+#define annotate_unreachable() ({					\
+	asm("1:\t\n"							\
+	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".long 1b\t\n"						\
+	    ".popsection\t\n");						\
+})
+#else
+#define annotate_unreachable()
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
@@ -204,7 +215,7 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() __builtin_unreachable()
+#define unreachable() annotate_unreachable(); __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
-- 
cgit v1.2.3


From b18b9550e4059ceea0393c518eb323b95243f92f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sat, 11 Feb 2017 18:46:08 +0100
Subject: libceph: get rid of ack vs commit

- CEPH_OSD_FLAG_ACK shouldn't be set anymore, so assert on it
- remove support for handling ack replies (OSDs will send ack replies
  only if clients request them)
- drop the "do lingering callbacks under osd->lock" logic from
  handle_reply() -- lreq->lock is sufficient in all three cases

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/osd_client.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653d329a..2ea0c282f3dc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -22,7 +22,6 @@ struct ceph_osd_client;
  * completion callback for async writepages
  */
 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
-typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
 
 #define CEPH_HOMELESS_OSD	-1
 
@@ -170,15 +169,12 @@ struct ceph_osd_request {
 	unsigned int		r_num_ops;
 
 	int               r_result;
-	bool              r_got_reply;
 
 	struct ceph_osd_client *r_osdc;
 	struct kref       r_kref;
 	bool              r_mempool;
-	struct completion r_completion;
-	struct completion r_done_completion;  /* fsync waiter */
+	struct completion r_completion;       /* private to osd_client.c */
 	ceph_osdc_callback_t r_callback;
-	ceph_osdc_unsafe_callback_t r_unsafe_callback;
 	struct list_head  r_unsafe_item;
 
 	struct inode *r_inode;         	      /* for use by callbacks */
-- 
cgit v1.2.3


From 05a45a2db42543c5f1a32e08f545aebbd7cb4790 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:22 -0500
Subject: sunrpc: turn bitfield flags in svc_version into bools

It's just simpler to read this way, IMO. Also, no need to explicitly
set vs_hidden to false in the nfsacl ones.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7321ae933867..96467c95f02e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -400,10 +400,11 @@ struct svc_version {
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
-	unsigned int		vs_hidden : 1,	/* Don't register with portmapper.
-						 * Only used for nfsacl so far. */
-				vs_rpcb_optnl:1;/* Don't care the result of register.
-						 * Only used for nfsv4. */
+	/* Don't register with rpcbind */
+	bool			vs_hidden;
+
+	/* Don't care if the rpcbind registration fails */
+	bool			vs_rpcb_optnl;
 
 	/* Override dispatch function (e.g. when caching replies).
 	 * A return value of 0 means drop the request. 
-- 
cgit v1.2.3


From 362142b25843fb059941aaa01b91501d5d8652cc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:23 -0500
Subject: sunrpc: flag transports as having congestion control

NFSv4 requires a transport protocol with congestion control in most
cases.

On an IP network, that means that NFSv4 over UDP should be forbidden.

The situation with RDMA is a bit more nuanced, but most RDMA transports
are suitable for this. For now, we assume that all RDMA transports are
suitable, but we may need to revise that at some point.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7440290f64ac..ddb7f94a9d06 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -67,6 +67,7 @@ struct svc_xprt {
 #define XPT_CACHE_AUTH	11		/* cache auth info */
 #define XPT_LOCAL	12		/* connection from loopback interface */
 #define XPT_KILL_TEMP   13		/* call xpo_kill_temp_xprt before closing */
+#define XPT_CONG_CTRL	14		/* has congestion control */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
-- 
cgit v1.2.3


From bb292ac1c6028344013309a309b44dc691581825 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 25 Jan 2017 14:21:10 -0800
Subject: watchdog: Introduce watchdog_stop_on_unregister helper

Many watchdog drivers explicitly stop the watchdog when unregistering it.
While it is unclear if this is actually needed (the whatdog should not be
running at that time if it can be stopped), introduce a helper to
explicitly stop the watchdog in the watchdog core when unregistering it.
This helps reducing driver code size while retaining functionality.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/watchdog.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 35a4d8185b51..a786e5e8973b 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -117,6 +117,7 @@ struct watchdog_device {
 #define WDOG_NO_WAY_OUT		1	/* Is 'nowayout' feature set ? */
 #define WDOG_STOP_ON_REBOOT	2	/* Should be stopped on reboot */
 #define WDOG_HW_RUNNING		3	/* True if HW watchdog running */
+#define WDOG_STOP_ON_UNREGISTER	4	/* Should be stopped on unregister */
 	struct list_head deferred;
 };
 
@@ -151,6 +152,12 @@ static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd)
 	set_bit(WDOG_STOP_ON_REBOOT, &wdd->status);
 }
 
+/* Use the following function to stop the watchdog when unregistering it */
+static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd)
+{
+	set_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status);
+}
+
 /* Use the following function to check if a timeout value is invalid */
 static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
 {
-- 
cgit v1.2.3


From 5283b03ee5cd28d516646298bead09b238d92ddc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:24 -0500
Subject: nfs/nfsd/sunrpc: enforce transport requirements for NFSv4

NFSv4 requires a transport "that is specified to avoid network
congestion" (RFC 7530, section 3.1, paragraph 2).  In practical terms,
that means that you should not run NFSv4 over UDP. The server has never
enforced that requirement, however.

This patchset fixes this by adding a new flag to the svc_version that
states that it has these transport requirements. With that, we can check
that the transport has XPT_CONG_CTRL set before processing an RPC. If it
doesn't we reject it with RPC_PROG_MISMATCH.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 96467c95f02e..e770abeed32d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -406,6 +406,9 @@ struct svc_version {
 	/* Don't care if the rpcbind registration fails */
 	bool			vs_rpcb_optnl;
 
+	/* Need xprt with congestion control */
+	bool			vs_need_cong_ctrl;
+
 	/* Override dispatch function (e.g. when caching replies).
 	 * A return value of 0 means drop the request. 
 	 * vs_dispatch == NULL means use default dispatcher.
-- 
cgit v1.2.3


From 3fc21924100b13f73c734d0ce8dfcfe913fcf7a8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 24 Feb 2017 14:55:48 -0800
Subject: mm: validate device_hotplug is held for memory hotplug

mem_hotplug_begin() assumes that it can set mem_hotplug.active_writer
and run the hotplug process without racing another thread.  Validate
this assumption with a lockdep assertion.

Link: http://lkml.kernel.org/r/148693886229.16345.1770484669403334689.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index bd684fc8ec1d..a48a7ff70164 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1139,6 +1139,7 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
+void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
-- 
cgit v1.2.3


From 0262d9c845ec349edf93f69688a5129c36cc2232 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 24 Feb 2017 14:55:59 -0800
Subject: memblock: embed memblock type name within struct memblock_type

Provide the name of each memblock type with struct memblock_type.  This
allows to get rid of the function memblock_type_name() and duplicating
the type names in __memblock_dump_all().

The only memblock_type usage out of mm/memblock.c seems to be
arch/s390/kernel/crash_dump.c.  While at it, give it a name.

Link: http://lkml.kernel.org/r/20170120123456.46508-4-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 38bcf00cbed3..bdfc65af4152 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,7 @@ struct memblock_type {
 	unsigned long max;	/* size of the allocated array */
 	phys_addr_t total_size;	/* size of all regions */
 	struct memblock_region *regions;
+	char *name;
 };
 
 struct memblock {
-- 
cgit v1.2.3


From d811914d87576c562e849c00d9f9beff45038801 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:56:02 -0800
Subject: userfaultfd: non-cooperative: rename *EVENT_MADVDONTNEED to
 *EVENT_REMOVE

Patch series "userfaultfd: non-cooperative: add madvise() event for
MADV_REMOVE request".

These patches add notification of madvise(MADV_REMOVE) event to
non-cooperative userfaultfd monitor.

The first pacth renames EVENT_MADVDONTNEED to EVENT_REMOVE along with
relevant functions and structures.  Using _REMOVE instead of
_MADVDONTNEED describes the event semantics more clearly and I hope it's
not too late for such change in the ABI.

This patch (of 3):

The UFFD_EVENT_MADVDONTNEED purpose is to notify uffd monitor about
removal of certain range from address space tracked by userfaultfd.
Hence, UFFD_EVENT_REMOVE seems to better reflect the operation
semantics.  Respectively, 'madv_dn' field of uffd_msg is renamed to
'remove' and the madvise_userfault_dontneed callback is renamed to
userfaultfd_remove.

Link: http://lkml.kernel.org/r/1484814154-1557-2-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f431861f22f1..2521542f6c07 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,10 +61,10 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
-extern void madvise_userfault_dontneed(struct vm_area_struct *vma,
-				       struct vm_area_struct **prev,
-				       unsigned long start,
-				       unsigned long end);
+extern void userfaultfd_remove(struct vm_area_struct *vma,
+			       struct vm_area_struct **prev,
+			       unsigned long start,
+			       unsigned long end);
 
 #else /* CONFIG_USERFAULTFD */
 
@@ -112,10 +112,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void madvise_userfault_dontneed(struct vm_area_struct *vma,
-					      struct vm_area_struct **prev,
-					      unsigned long start,
-					      unsigned long end)
+static inline void userfaultfd_remove(struct vm_area_struct *vma,
+				      struct vm_area_struct **prev,
+				      unsigned long start,
+				      unsigned long end)
 {
 }
 #endif /* CONFIG_USERFAULTFD */
-- 
cgit v1.2.3


From 1276ad68e2491d1ceeb65f55d790f9277593c459 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 24 Feb 2017 14:56:11 -0800
Subject: mm: vmscan: scan dirty pages even in laptop mode

Patch series "mm: vmscan: fix kswapd writeback regression".

We noticed a regression on multiple hadoop workloads when moving from
3.10 to 4.0 and 4.6, which involves kswapd getting tangled up in page
writeout, causing direct reclaim herds that also don't make progress.

I tracked it down to the thrash avoidance efforts after 3.10 that make
the kernel better at keeping use-once cache and use-many cache sorted on
the inactive and active list, with more aggressive protection of the
active list as long as there is inactive cache.  Unfortunately, our
workload's use-once cache is mostly from streaming writes.  Waiting for
writes to avoid potential reloads in the future is not a good tradeoff.

These patches do the following:

1. Wake the flushers when kswapd sees a lump of dirty pages. It's
   possible to be below the dirty background limit and still have cache
   velocity push them through the LRU. So start a-flushin'.

2. Let kswapd only write pages that have been rotated twice. This makes
   sure we really tried to get all the clean pages on the inactive list
   before resorting to horrible LRU-order writeback.

3. Move rotating dirty pages off the inactive list. Instead of churning
   or waiting on page writeback, we'll go after clean active cache. This
   might lead to thrashing, but in this state memory demand outstrips IO
   speed anyway, and reads are faster than writes.

Mel backported the series to 4.10-rc5 with one minor conflict and ran a
couple of tests on it.  Mix of read/write random workload didn't show
anything interesting.  Write-only database didn't show much difference
in performance but there were slight reductions in IO -- probably in the
noise.

simoop did show big differences although not as big as Mel expected.
This is Chris Mason's workload that similate the VM activity of hadoop.
Mel won't go through the full details but over the samples measured
during an hour it reported

                                         4.10.0-rc5            4.10.0-rc5
                                            vanilla         johannes-v1r1
Amean    p50-Read             21346531.56 (  0.00%) 21697513.24 ( -1.64%)
Amean    p95-Read             24700518.40 (  0.00%) 25743268.98 ( -4.22%)
Amean    p99-Read             27959842.13 (  0.00%) 28963271.11 ( -3.59%)
Amean    p50-Write                1138.04 (  0.00%)      989.82 ( 13.02%)
Amean    p95-Write             1106643.48 (  0.00%)    12104.00 ( 98.91%)
Amean    p99-Write             1569213.22 (  0.00%)    36343.38 ( 97.68%)
Amean    p50-Allocation          85159.82 (  0.00%)    79120.70 (  7.09%)
Amean    p95-Allocation         204222.58 (  0.00%)   129018.43 ( 36.82%)
Amean    p99-Allocation         278070.04 (  0.00%)   183354.43 ( 34.06%)
Amean    final-p50-Read       21266432.00 (  0.00%) 21921792.00 ( -3.08%)
Amean    final-p95-Read       24870912.00 (  0.00%) 26116096.00 ( -5.01%)
Amean    final-p99-Read       28147712.00 (  0.00%) 29523968.00 ( -4.89%)
Amean    final-p50-Write          1130.00 (  0.00%)      977.00 ( 13.54%)
Amean    final-p95-Write       1033216.00 (  0.00%)     2980.00 ( 99.71%)
Amean    final-p99-Write       1517568.00 (  0.00%)    32672.00 ( 97.85%)
Amean    final-p50-Allocation    86656.00 (  0.00%)    78464.00 (  9.45%)
Amean    final-p95-Allocation   211712.00 (  0.00%)   116608.00 ( 44.92%)
Amean    final-p99-Allocation   287232.00 (  0.00%)   168704.00 ( 41.27%)

The latencies are actually completely horrific in comparison to 4.4 (and
4.10-rc5 is worse than 4.9 according to historical data for reasons Mel
hasn't analysed yet).

Still, 95% of write latency (p95-write) is halved by the series and
allocation latency is way down.  Direct reclaim activity is one fifth of
what it was according to vmstats.  Kswapd activity is higher but this is
not necessarily surprising.  Kswapd efficiency is unchanged at 99% (99%
of pages scanned were reclaimed) but direct reclaim efficiency went from
77% to 99%

In the vanilla kernel, 627MB of data was written back from reclaim
context.  With the series, no data was written back.  With or without
the patch, pages are being immediately reclaimed after writeback
completes.  However, with the patch, only 1/8th of the pages are
reclaimed like this.

This patch (of 5):

We have an elaborate dirty/writeback throttling mechanism inside the
reclaim scanner, but for that to work the pages have to go through
shrink_page_list() and get counted for what they are.  Otherwise, we
mess up the LRU order and don't match reclaim speed to writeback.

Especially during deactivation, there is never a reason to skip dirty
pages; nothing is even trying to write them out from there.  Don't mess
up the LRU order for nothing, shuffle these pages along.

Link: http://lkml.kernel.org/r/20170123181641.23938-2-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 82fc632fd11d..8e02b3750fe0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -236,8 +236,6 @@ struct lruvec {
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
 #define LRU_ALL	     ((1 << NR_LRU_LISTS) - 1)
 
-/* Isolate clean file */
-#define ISOLATE_CLEAN		((__force isolate_mode_t)0x1)
 /* Isolate unmapped file */
 #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x2)
 /* Isolate for asynchronous migration */
-- 
cgit v1.2.3


From 726d061fbd3658e4bfeffa1b8e82da97de2ca4dd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 24 Feb 2017 14:56:14 -0800
Subject: mm: vmscan: kick flushers when we encounter dirty pages on the LRU

Memory pressure can put dirty pages at the end of the LRU without
anybody running into dirty limits.  Don't start writing individual pages
from kswapd while the flushers might be asleep.

Unlike the old direct reclaim flusher wakeup (removed in the next patch)
that flushes the number of pages just scanned, this patch wakes the
flushers for all outstanding dirty pages.  That seemed to perform better
in a synthetic test that pushes dirty pages to the end of the LRU and
into reclaim, because we know LRU aging outstrips writeback already, and
this way we give younger dirty pages a headstart rather than wait until
reclaim runs into them as well.  It also means less plugging and risk of
exhausting the struct request pool from reclaim.

There is a concern that this will cause temporary files that used to get
dirtied and truncated before writeback to now get written to disk under
memory pressure.  If this turns out to be a real problem, we'll have to
revisit this and tame the reclaim flusher wakeups.

[hannes@cmpxchg.org: mention dirty expiration as a condition]
  Link: http://lkml.kernel.org/r/20170126174739.GA30636@cmpxchg.org
Link: http://lkml.kernel.org/r/20170123181641.23938-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/writeback.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5527d910ba3d..a3c0cbd7c888 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -46,7 +46,7 @@ enum writeback_sync_modes {
  */
 enum wb_reason {
 	WB_REASON_BACKGROUND,
-	WB_REASON_TRY_TO_FREE_PAGES,
+	WB_REASON_VMSCAN,
 	WB_REASON_SYNC,
 	WB_REASON_PERIODIC,
 	WB_REASON_LAPTOP_TIMER,
-- 
cgit v1.2.3


From c55e8d035b28b2867e68b0e2d0eee2c0f1016b43 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 24 Feb 2017 14:56:23 -0800
Subject: mm: vmscan: move dirty pages out of the way until they're flushed

We noticed a performance regression when moving hadoop workloads from
3.10 kernels to 4.0 and 4.6.  This is accompanied by increased pageout
activity initiated by kswapd as well as frequent bursts of allocation
stalls and direct reclaim scans.  Even lowering the dirty ratios to the
equivalent of less than 1% of memory would not eliminate the issue,
suggesting that dirty pages concentrate where the scanner is looking.

This can be traced back to recent efforts of thrash avoidance.  Where
3.10 would not detect refaulting pages and continuously supply clean
cache to the inactive list, a thrashing workload on 4.0+ will detect and
activate refaulting pages right away, distilling used-once pages on the
inactive list much more effectively.  This is by design, and it makes
sense for clean cache.  But for the most part our workload's cache
faults are refaults and its use-once cache is from streaming writes.  We
end up with most of the inactive list dirty, and we don't go after the
active cache as long as we have use-once pages around.

But waiting for writes to avoid reclaiming clean cache that *might*
refault is a bad trade-off.  Even if the refaults happen, reads are
faster than writes.  Before getting bogged down on writeback, reclaim
should first look at *all* cache in the system, even active cache.

To accomplish this, activate pages that are dirty or under writeback
when they reach the end of the inactive LRU.  The pages are marked for
immediate reclaim, meaning they'll get moved back to the inactive LRU
tail as soon as they're written back and become reclaimable.  But in the
meantime, by reducing the inactive list to only immediately reclaimable
pages, we allow the scanner to deactivate and refill the inactive list
with clean cache from the active list tail to guarantee forward
progress.

[hannes@cmpxchg.org: update comment]
  Link: http://lkml.kernel.org/r/20170202191957.22872-8-hannes@cmpxchg.org
Link: http://lkml.kernel.org/r/20170123181641.23938-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 41d376e7116d..e030a68ead7e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page,
 	list_add(&page->lru, &lruvec->lists[lru]);
 }
 
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
+{
+	update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
+	list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
 static __always_inline void del_page_from_lru_list(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
-- 
cgit v1.2.3


From 11bac80004499ea59f361ef2a5516c84b6eab675 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 24 Feb 2017 14:56:41 -0800
Subject: mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf

->fault(), ->page_mkwrite(), and ->pfn_mkwrite() calls do not need to
take a vma and vmf parameter when the vma already resides in vmf.

Remove the vma parameter to simplify things.

[arnd@arndb.de: fix ARM build]
  Link: http://lkml.kernel.org/r/20170125223558.1451224-1-arnd@arndb.de
Link: http://lkml.kernel.org/r/148521301778.19116.10840599906674778980.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h   |  5 ++---
 include/linux/iomap.h |  3 +--
 include/linux/mm.h    | 10 +++++-----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 1e77ff5818f1..eeb02421c848 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,8 +38,7 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
-			const struct iomap_ops *ops);
+int dax_iomap_fault(struct vm_fault *vmf, const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
@@ -83,7 +82,7 @@ static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
 	return VM_FAULT_FALLBACK;
 }
 #endif
-int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
+int dax_pfn_mkwrite(struct vm_fault *vmf);
 
 static inline bool vma_is_dax(struct vm_area_struct *vma)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 891459caa278..7291810067eb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, const struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
-		const struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 574bc157a27c..3dd80ba6568a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -350,17 +350,17 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
-	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*fault)(struct vm_fault *vmf);
 	int (*pmd_fault)(struct vm_fault *vmf);
 	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
 	/* notification that a previously read-only page is about to become
 	 * writable, if an error is returned it will cause a SIGBUS */
-	int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*page_mkwrite)(struct vm_fault *vmf);
 
 	/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
-	int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf);
+	int (*pfn_mkwrite)(struct vm_fault *vmf);
 
 	/* called by access_process_vm when get_user_pages() fails, typically
 	 * for use by special VMAs that can switch between memory and hardware
@@ -2124,10 +2124,10 @@ extern void truncate_inode_pages_range(struct address_space *,
 extern void truncate_inode_pages_final(struct address_space *);
 
 /* generic vm_area_ops exported for stackable file systems */
-extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern int filemap_fault(struct vm_fault *vmf);
 extern void filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff);
-extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int filemap_page_mkwrite(struct vm_fault *vmf);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
-- 
cgit v1.2.3


From a2d581675d485eb7188f521f36efc114639a3096 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 24 Feb 2017 14:56:59 -0800
Subject: mm,fs,dax: change ->pmd_fault to ->huge_fault

Patch series "1G transparent hugepage support for device dax", v2.

The following series implements support for 1G trasparent hugepage on
x86 for device dax.  The bulk of the code was written by Mathew Wilcox a
while back supporting transparent 1G hugepage for fs DAX.  I have
forward ported the relevant bits to 4.10-rc.  The current submission has
only the necessary code to support device DAX.

Comments from Dan Williams: So the motivation and intended user of this
functionality mirrors the motivation and users of 1GB page support in
hugetlbfs.  Given expected capacities of persistent memory devices an
in-memory database may want to reduce tlb pressure beyond what they can
already achieve with 2MB mappings of a device-dax file.  We have
customer feedback to that effect as Willy mentioned in his previous
version of these patches [1].

[1]: https://lkml.org/lkml/2016/1/31/52

Comments from Nilesh @ Oracle:

There are applications which have a process model; and if you assume
10,000 processes attempting to mmap all the 6TB memory available on a
server; we are looking at the following:

processes         : 10,000
memory            :    6TB
pte @ 4k page size: 8 bytes / 4K of memory * #processes = 6TB / 4k * 8 * 10000 = 1.5GB * 80000 = 120,000GB
pmd @ 2M page size: 120,000 / 512 = ~240GB
pud @ 1G page size: 240GB / 512 = ~480MB

As you can see with 2M pages, this system will use up an exorbitant
amount of DRAM to hold the page tables; but the 1G pages finally brings
it down to a reasonable level.  Memory sizes will keep increasing; so
this number will keep increasing.

An argument can be made to convert the applications from process model
to thread model, but in the real world that may not be always practical.
Hopefully this helps explain the use case where this is valuable.

This patch (of 3):

In preparation for adding the ability to handle PUD pages, convert
vm_operations_struct.pmd_fault to vm_operations_struct.huge_fault.  The
vm_fault structure is extended to include a union of the different page
table pointers that may be needed, and three flag bits are reserved to
indicate which type of pointer is in the union.

[ross.zwisler@linux.intel.com: remove unused function ext4_dax_huge_fault()]
  Link: http://lkml.kernel.org/r/1485813172-7284-1-git-send-email-ross.zwisler@linux.intel.com
[dave.jiang@intel.com: clear PMD or PUD size flags when in fall through path]
  Link: http://lkml.kernel.org/r/148589842696.5820.16078080610311444794.stgit@djiang5-desk3.ch.intel.com
Link: http://lkml.kernel.org/r/148545058784.17912.6353162518188733642.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h |  6 ------
 include/linux/mm.h  | 10 +++++++++-
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index eeb02421c848..cf9af225962b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -70,17 +70,11 @@ static inline unsigned int dax_radix_order(void *entry)
 		return PMD_SHIFT - PAGE_SHIFT;
 	return 0;
 }
-int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops);
 #else
 static inline unsigned int dax_radix_order(void *entry)
 {
 	return 0;
 }
-static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
-		const struct iomap_ops *ops)
-{
-	return VM_FAULT_FALLBACK;
-}
 #endif
 int dax_pfn_mkwrite(struct vm_fault *vmf);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dd80ba6568a..035a688e5472 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,11 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100	/* The fault was during an instruction fetch */
 
+#define FAULT_FLAG_SIZE_MASK	0x7000	/* Support up to 8-level page tables */
+#define FAULT_FLAG_SIZE_PTE	0x0000	/* First level (eg 4k) */
+#define FAULT_FLAG_SIZE_PMD	0x1000	/* Second level (eg 2MB) */
+#define FAULT_FLAG_SIZE_PUD	0x2000	/* Third level (eg 1GB) */
+
 #define FAULT_FLAG_TRACE \
 	{ FAULT_FLAG_WRITE,		"WRITE" }, \
 	{ FAULT_FLAG_MKWRITE,		"MKWRITE" }, \
@@ -314,6 +319,9 @@ struct vm_fault {
 	unsigned long address;		/* Faulting virtual address */
 	pmd_t *pmd;			/* Pointer to pmd entry matching
 					 * the 'address' */
+	pud_t *pud;			/* Pointer to pud entry matching
+					 * the 'address'
+					 */
 	pte_t orig_pte;			/* Value of PTE at the time of fault */
 
 	struct page *cow_page;		/* Page handler may use for COW fault */
@@ -351,7 +359,7 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_fault *vmf);
-	int (*pmd_fault)(struct vm_fault *vmf);
+	int (*huge_fault)(struct vm_fault *vmf);
 	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
-- 
cgit v1.2.3


From a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 24 Feb 2017 14:57:02 -0800
Subject: mm, x86: add support for PUD-sized transparent hugepages

The current transparent hugepage code only supports PMDs.  This patch
adds support for transparent use of PUDs with DAX.  It does not include
support for anonymous pages.  x86 support code also added.

Most of this patch simply parallels the work that was done for huge
PMDs.  The only major difference is how the new ->pud_entry method in
mm_walk works.  The ->pmd_entry method replaces the ->pte_entry method,
whereas the ->pud_entry method works along with either ->pmd_entry or
->pte_entry.  The pagewalk code takes care of locking the PUD before
calling ->pud_walk, so handlers do not need to worry whether the PUD is
stable.

[dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()]
  Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com
[dave.jiang@intel.com: native_pud_clear missing on i386 build]
  Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com
Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Alexander Kapshuk <alexander.kapshuk@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h      | 83 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/mm.h           | 30 +++++++++++++++-
 include/linux/mmu_notifier.h | 14 ++++++++
 include/linux/pfn_t.h        | 12 +++++++
 4 files changed, 133 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f0029e786205..a3762d49ba39 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
 extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+			 pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+			 struct vm_area_struct *vma);
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+#else
+static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
+{
+}
+#endif
+
 extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
 extern int zap_huge_pmd(struct mmu_gather *tlb,
 			struct vm_area_struct *vma,
 			pmd_t *pmd, unsigned long addr);
+extern int zap_huge_pud(struct mmu_gather *tlb,
+			struct vm_area_struct *vma,
+			pud_t *pud, unsigned long addr);
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end,
 			unsigned char *vec);
@@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
-			pfn_t pfn, bool write);
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+			pmd_t *pmd, pfn_t pfn, bool write);
+int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+			pud_t *pud, pfn_t pfn, bool write);
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
 	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
@@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr;
 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-		pmd_t *pmd, int flags);
-
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)
 #define HPAGE_PMD_MASK	(~(HPAGE_PMD_SIZE - 1))
 
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#define HPAGE_PUD_SIZE	((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_MASK	(~(HPAGE_PUD_SIZE - 1))
+
 extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
 #define transparent_hugepage_enabled(__vma)				\
@@ -118,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page);
 
+void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
+		unsigned long address);
+
+#define split_huge_pud(__vma, __pud, __address)				\
+	do {								\
+		pud_t *____pud = (__pud);				\
+		if (pud_trans_huge(*____pud)				\
+					|| pud_devmap(*____pud))	\
+			__split_huge_pud(__vma, __pud, __address);	\
+	}  while (0)
+
 extern int hugepage_madvise(struct vm_area_struct *vma,
 			    unsigned long *vm_flags, int advice);
 extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -126,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
 				    long adjust_next);
 extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
 		struct vm_area_struct *vma);
+extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
+		struct vm_area_struct *vma);
 /* mmap_sem must be held on entry */
 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 		struct vm_area_struct *vma)
@@ -136,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 	else
 		return NULL;
 }
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+		struct vm_area_struct *vma)
+{
+	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
+	if (pud_trans_huge(*pud) || pud_devmap(*pud))
+		return __pud_trans_huge_lock(pud, vma);
+	else
+		return NULL;
+}
 static inline int hpage_nr_pages(struct page *page)
 {
 	if (unlikely(PageTransHuge(page)))
@@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page)
 	return 1;
 }
 
+struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+		pmd_t *pmd, int flags);
+struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+		pud_t *pud, int flags);
+
 extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
@@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 	return is_huge_zero_page(pmd_page(pmd));
 }
 
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+	return false;
+}
+
 struct page *mm_get_huge_zero_page(struct mm_struct *mm);
 void mm_put_huge_zero_page(struct mm_struct *mm);
 
@@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
 
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
+
 #define hpage_nr_pages(x) 1
 
 #define transparent_hugepage_enabled(__vma) 0
@@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct page *page) {}
 
+#define split_huge_pud(__vma, __pmd, __address)	\
+	do { } while (0)
+
 static inline int hugepage_madvise(struct vm_area_struct *vma,
 				   unsigned long *vm_flags, int advice)
 {
@@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 {
 	return NULL;
 }
+static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
+		struct vm_area_struct *vma)
+{
+	return NULL;
+}
 
 static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
@@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page)
 	return false;
 }
 
+static inline bool is_huge_zero_pud(pud_t pud)
+{
+	return false;
+}
+
 static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 {
 	return;
@@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
 {
 	return NULL;
 }
+
+static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
+		unsigned long addr, pud_t *pud, int flags)
+{
+	return NULL;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 035a688e5472..d8b75d7d6a9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd)
 {
 	return 0;
 }
+static inline int pud_devmap(pud_t pud)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 
 /**
  * mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ *	       this handler should only handle pud_trans_huge() puds.
+ *	       the pmd_entry or pte_entry callbacks will be used for
+ *	       regular PUDs.
  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
  *	       this handler is required to be able to handle
  *	       pmd_trans_huge() pmds.  They may simply choose to
@@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
  * (see the comment on walk_page_range() for more details)
  */
 struct mm_walk {
+	int (*pud_entry)(pud_t *pud, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk);
 	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pte_entry)(pte_t *pte, unsigned long addr,
@@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 	return ptl;
 }
 
-extern void __init pagecache_init(void);
+/*
+ * No scalability reason to split PUD locks yet, but follow the same pattern
+ * as the PMD locks to make it easier if we decide to.  The VM should not be
+ * considered ready to switch to split PUD locks yet; there may be places
+ * which need to be converted from page_table_lock.
+ */
+static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud)
+{
+	return &mm->page_table_lock;
+}
+
+static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
+{
+	spinlock_t *ptl = pud_lockptr(mm, pud);
+
+	spin_lock(ptl);
+	return ptl;
+}
 
+extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index a1a210d59961..51891fb0d3ce 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	___pmd;								\
 })
 
+#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud)		\
+({									\
+	unsigned long ___haddr = __haddr & HPAGE_PUD_MASK;		\
+	struct mm_struct *___mm = (__vma)->vm_mm;			\
+	pud_t ___pud;							\
+									\
+	___pud = pudp_huge_clear_flush(__vma, __haddr, __pud);		\
+	mmu_notifier_invalidate_range(___mm, ___haddr,			\
+				      ___haddr + HPAGE_PUD_SIZE);	\
+									\
+	___pud;								\
+})
+
 #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)		\
 ({									\
 	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
@@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define pmdp_clear_young_notify pmdp_test_and_clear_young
 #define	ptep_clear_flush_notify ptep_clear_flush
 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
 #define set_pte_at_notify set_pte_at
 
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 033fc7bbcefa..a49b3259cad7 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -90,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
 {
 	return pfn_pmd(pfn_t_to_pfn(pfn), pgprot);
 }
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot)
+{
+	return pfn_pud(pfn_t_to_pfn(pfn), pgprot);
+}
+#endif
 #endif
 
 #ifdef __HAVE_ARCH_PTE_DEVMAP
@@ -106,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn)
 }
 pte_t pte_mkdevmap(pte_t pte);
 pmd_t pmd_mkdevmap(pmd_t pmd);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pud_mkdevmap(pud_t pud);
 #endif
+#endif /* __HAVE_ARCH_PTE_DEVMAP */
+
 #endif /* _LINUX_PFN_T_H_ */
-- 
cgit v1.2.3


From c791ace1e747371658237f0d30234fef56c39669 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 24 Feb 2017 14:57:08 -0800
Subject: mm: replace FAULT_FLAG_SIZE with parameter to huge_fault

Since the introduction of FAULT_FLAG_SIZE to the vm_fault flag, it has
been somewhat painful with getting the flags set and removed at the
correct locations.  More than one kernel oops was introduced due to
difficulties of getting the placement correctly.

Remove the flag values and introduce an input parameter to huge_fault
that indicates the size of the page entry.  This makes the code easier
to trace and should avoid the issues we see with the fault flags where
removal of the flag was necessary in the fallback paths.

Link: http://lkml.kernel.org/r/148615748258.43180.1690152053774975329.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dax.h |  3 ++-
 include/linux/mm.h  | 14 ++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index cf9af225962b..d8a3dc042e1c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,7 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
-int dax_iomap_fault(struct vm_fault *vmf, const struct iomap_ops *ops);
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+		    const struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d8b75d7d6a9e..c65aa43b5712 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,11 +285,6 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_REMOTE	0x80	/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100	/* The fault was during an instruction fetch */
 
-#define FAULT_FLAG_SIZE_MASK	0x7000	/* Support up to 8-level page tables */
-#define FAULT_FLAG_SIZE_PTE	0x0000	/* First level (eg 4k) */
-#define FAULT_FLAG_SIZE_PMD	0x1000	/* Second level (eg 2MB) */
-#define FAULT_FLAG_SIZE_PUD	0x2000	/* Third level (eg 1GB) */
-
 #define FAULT_FLAG_TRACE \
 	{ FAULT_FLAG_WRITE,		"WRITE" }, \
 	{ FAULT_FLAG_MKWRITE,		"MKWRITE" }, \
@@ -349,6 +344,13 @@ struct vm_fault {
 					 */
 };
 
+/* page entry size for vm->huge_fault() */
+enum page_entry_size {
+	PE_SIZE_PTE = 0,
+	PE_SIZE_PMD,
+	PE_SIZE_PUD,
+};
+
 /*
  * These are the virtual MM functions - opening of an area, closing and
  * unmapping it (needed to keep files on disk up-to-date etc), pointer
@@ -359,7 +361,7 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	int (*mremap)(struct vm_area_struct * area);
 	int (*fault)(struct vm_fault *vmf);
-	int (*huge_fault)(struct vm_fault *vmf);
+	int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size);
 	void (*map_pages)(struct vm_fault *vmf,
 			pgoff_t start_pgoff, pgoff_t end_pgoff);
 
-- 
cgit v1.2.3


From 9e5bcd610ffcedf5e485e78a72762810b25c7f25 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 24 Feb 2017 14:57:29 -0800
Subject: mm/migration: make isolate_movable_page() return int type

Patch series "HWPOISON: soft offlining for non-lru movable page", v6.

After Minchan's commit bda807d44454 ("mm: migrate: support non-lru
movable page migration"), some type of non-lru page like zsmalloc and
virtio-balloon page also support migration.

Therefore, we can:

1) soft offlining no-lru movable pages, which means when memory
   corrected errors occur on a non-lru movable page, we can stop to use
   it by migrating data onto another page and disable the original
   (maybe half-broken) one.

2) enable memory hotplug for non-lru movable pages, i.e. we may offline
   blocks, which include such pages, by using non-lru page migration.

This patchset is heavily dependent on non-lru movable page migration.

This patch (of 4):

Change the return type of isolate_movable_page() from bool to int.  It
will return 0 when isolate movable page successfully, and return -EBUSY
when it isolates failed.

There is no functional change within this patch but prepare for later
patch.

[xieyisheng1@huawei.com: v6]
  Link: http://lkml.kernel.org/r/1486108770-630-2-git-send-email-xieyisheng1@huawei.com
Link: http://lkml.kernel.org/r/1485867981-16037-2-git-send-email-ysxie@foxmail.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Suggested-by: Michal Hocko <mhocko@kernel.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ae8d475a9385..43d5deb0c4cc 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -37,7 +37,7 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
 		unsigned long private, enum migrate_mode mode, int reason);
-extern bool isolate_movable_page(struct page *page, isolate_mode_t mode);
+extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
 extern void putback_movable_page(struct page *page);
 
 extern int migrate_prep(void);
-- 
cgit v1.2.3


From cbae0170e5329c79c0a36a81fedd2800146aca12 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 24 Feb 2017 14:57:32 -0800
Subject: mm/migration: make isolate_movable_page always defined

Define isolate_movable_page as a static inline function when
CONFIG_MIGRATION is not enable.  It should return -EBUSY here which
means failed to isolate movable pages.

This patch do not have any functional change but prepare for later
patch.

Link: http://lkml.kernel.org/r/1485867981-16037-3-git-send-email-ysxie@foxmail.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Suggested-by: Michal Hocko <mhocko@kernel.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Taku Izumi <izumi.taku@jp.fujitsu.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 43d5deb0c4cc..fa76b516fa47 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -56,6 +56,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
 		free_page_t free, unsigned long private, enum migrate_mode mode,
 		int reason)
 	{ return -ENOSYS; }
+static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
+	{ return -EBUSY; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
-- 
cgit v1.2.3


From ace71a19cec5eb430207c3269d8a2683f0574306 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 24 Feb 2017 14:57:45 -0800
Subject: mm: introduce page_vma_mapped_walk()

Introduce a new interface to check if a page is mapped into a vma.  It
aims to address shortcomings of page_check_address{,_transhuge}.

Existing interface is not able to handle PTE-mapped THPs: it only finds
the first PTE.  The rest lefted unnoticed.

page_vma_mapped_walk() iterates over all possible mapping of the page in
the vma.

Link: http://lkml.kernel.org/r/20170129173858.45174-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 15321fb1df6b..b76343610653 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/rwsem.h>
 #include <linux/memcontrol.h>
+#include <linux/highmem.h>
 
 /*
  * The anon_vma heads a list of private "related" vmas, to scan if
@@ -232,6 +233,31 @@ static inline bool page_check_address_transhuge(struct page *page,
 }
 #endif
 
+/* Avoid racy checks */
+#define PVMW_SYNC		(1 << 0)
+/* Look for migarion entries rather than present PTEs */
+#define PVMW_MIGRATION		(1 << 1)
+
+struct page_vma_mapped_walk {
+	struct page *page;
+	struct vm_area_struct *vma;
+	unsigned long address;
+	pmd_t *pmd;
+	pte_t *pte;
+	spinlock_t *ptl;
+	unsigned int flags;
+};
+
+static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
+{
+	if (pvmw->pte)
+		pte_unmap(pvmw->pte);
+	if (pvmw->ptl)
+		spin_unlock(pvmw->ptl);
+}
+
+bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
-- 
cgit v1.2.3


From d53a8b49a626fdfce4390710da6d04b4314db25f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 24 Feb 2017 14:58:13 -0800
Subject: mm: drop page_check_address{,_transhuge}

All users are gone. Let's drop them.

Link: http://lkml.kernel.org/r/20170129173858.45174-12-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 36 ------------------------------------
 1 file changed, 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b76343610653..8c89e902df3e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -197,42 +197,6 @@ int page_referenced(struct page *, int is_locked,
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
 
-/*
- * Used by uprobes to replace a userspace page safely
- */
-pte_t *__page_check_address(struct page *, struct mm_struct *,
-				unsigned long, spinlock_t **, int);
-
-static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm,
-					unsigned long address,
-					spinlock_t **ptlp, int sync)
-{
-	pte_t *ptep;
-
-	__cond_lock(*ptlp, ptep = __page_check_address(page, mm, address,
-						       ptlp, sync));
-	return ptep;
-}
-
-/*
- * Used by idle page tracking to check if a page was referenced via page
- * tables.
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-bool page_check_address_transhuge(struct page *page, struct mm_struct *mm,
-				  unsigned long address, pmd_t **pmdp,
-				  pte_t **ptep, spinlock_t **ptlp);
-#else
-static inline bool page_check_address_transhuge(struct page *page,
-				struct mm_struct *mm, unsigned long address,
-				pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp)
-{
-	*ptep = page_check_address(page, mm, address, ptlp, 0);
-	*pmdp = NULL;
-	return !!*ptep;
-}
-#endif
-
 /* Avoid racy checks */
 #define PVMW_SYNC		(1 << 0)
 /* Look for migarion entries rather than present PTEs */
-- 
cgit v1.2.3


From 897ab3e0c49e24b62e2d54d165c7afec6bbca65b Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:58:22 -0800
Subject: userfaultfd: non-cooperative: add event for memory unmaps

When a non-cooperative userfaultfd monitor copies pages in the
background, it may encounter regions that were already unmapped.
Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely
changes in the virtual memory layout.

Since there might be different uffd contexts for the affected VMAs, we
first should create a temporary representation for the unmap event for
each uffd context and then notify them one by one to the appropriate
userfault file descriptors.

The event notification occurs after the mmap_sem has been released.

[arnd@arndb.de: fix nommu build]
  Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de
[mhocko@suse.com: fix nommu build]
  Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h            | 14 +++++++++-----
 include/linux/userfaultfd_k.h | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c65aa43b5712..c6fcba1d1ae5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2090,18 +2090,22 @@ extern int install_special_mapping(struct mm_struct *mm,
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
-	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+	struct list_head *uf);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
-	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
-extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
+	struct list_head *uf);
+extern int do_munmap(struct mm_struct *, unsigned long, size_t,
+		     struct list_head *uf);
 
 static inline unsigned long
 do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long pgoff, unsigned long *populate)
+	unsigned long pgoff, unsigned long *populate,
+	struct list_head *uf)
 {
-	return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+	return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
 }
 
 #ifdef CONFIG_MMU
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 2521542f6c07..a40be5d0661b 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -66,6 +66,12 @@ extern void userfaultfd_remove(struct vm_area_struct *vma,
 			       unsigned long start,
 			       unsigned long end);
 
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+				  unsigned long start, unsigned long end,
+				  struct list_head *uf);
+extern void userfaultfd_unmap_complete(struct mm_struct *mm,
+				       struct list_head *uf);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -118,6 +124,18 @@ static inline void userfaultfd_remove(struct vm_area_struct *vma,
 				      unsigned long end)
 {
 }
+
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+					 unsigned long start, unsigned long end,
+					 struct list_head *uf)
+{
+	return 0;
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+					      struct list_head *uf)
+{
+}
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From ca49ca7114553587736fe78319e22f073b631380 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:58:25 -0800
Subject: userfaultfd: non-cooperative: add event for exit() notification

Allow userfaultfd monitor track termination of the processes that have
memory backed by the uffd.

[rppt@linux.vnet.ibm.com: add comment]
  Link: http://lkml.kernel.org/r/20170202135448.GB19804@rapoport-lnxLink: http://lkml.kernel.org/r/1485542673-24387-4-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index a40be5d0661b..0468548acebf 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -72,6 +72,8 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 				       struct list_head *uf);
 
+extern void userfaultfd_exit(struct mm_struct *mm);
+
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -136,6 +138,11 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
 					      struct list_head *uf)
 {
 }
+
+static inline void userfaultfd_exit(struct mm_struct *mm)
+{
+}
+
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From ca96b625341027f611c3e61351a70311077ebcf5 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 24 Feb 2017 14:58:37 -0800
Subject: mm: alloc_contig_range: allow to specify GFP mask

Currently alloc_contig_range assumes that the compaction should be done
with the default GFP_KERNEL flags.  This is probably right for all
current uses of this interface, but may change as CMA is used in more
use-cases (including being the default DMA memory allocator on some
platforms).

Change the function prototype, to allow for passing through the GFP mask
set by upper layers.

Also respect global restrictions by applying memalloc_noio_flags to the
passed in flags.

Link: http://lkml.kernel.org/r/20170127172328.18574-1-l.stach@pengutronix.de
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0fe0b6295ab5..db373b9d3223 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -541,7 +541,7 @@ static inline bool pm_suspended_storage(void)
 #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
-			      unsigned migratetype);
+			      unsigned migratetype, gfp_t gfp_mask);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
 #endif
 
-- 
cgit v1.2.3


From e2f466e32f56c8b3ee0d1a40cc39e1c779dfd598 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 24 Feb 2017 14:58:41 -0800
Subject: mm: cma_alloc: allow to specify GFP mask

Most users of this interface just want to use it with the default
GFP_KERNEL flags, but for cases where DMA memory is allocated it may be
called from a different context.

No functional change yet, just passing through the flag to the
underlying alloc_contig_range function.

Link: http://lkml.kernel.org/r/20170127172328.18574-2-l.stach@pengutronix.de
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cma.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 6f0a91b37f68..03f32d0bd1d8 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -29,6 +29,7 @@ extern int __init cma_declare_contiguous(phys_addr_t base,
 extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					unsigned int order_per_bit,
 					struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align);
+extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+			      gfp_t gfp_mask);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 #endif
-- 
cgit v1.2.3


From 712c604dcdf8186295e2af694adf52c6842ad100 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 24 Feb 2017 14:58:44 -0800
Subject: mm: wire up GFP flag passing in dma_alloc_from_contiguous

The callers of the DMA alloc functions already provide the proper
context GFP flags.  Make sure to pass them through to the CMA allocator,
to make the CMA compaction context aware.

Link: http://lkml.kernel.org/r/20170127172328.18574-3-l.stach@pengutronix.de
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-contiguous.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index fec734df1524..b67bf6ac907d 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order);
+				       unsigned int order, gfp_t gfp_mask);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count);
 
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 
 static inline
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order)
+				       unsigned int order, gfp_t gfp_mask)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From def5efe0376501ef7bd6b53ed061512c142e59aa Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 24 Feb 2017 14:58:47 -0800
Subject: mm, madvise: fail with ENOMEM when splitting vma will hit
 max_map_count

If madvise(2) advice will result in the underlying vma being split and
the number of areas mapped by the process will exceed
/proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.

EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
is temporarily unavailable.  It indicates that userspace should retry
the advice in the near future.  This is important for advice such as
MADV_DONTNEED which is often used by malloc implementations to free
memory back to the system: we really do want to free memory back when
madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
or mempolicies) cannot be allocated.

Encountering /proc/sys/vm/max_map_count is not a temporary failure,
however, so return ENOMEM to indicate this is a more serious issue.  A
followup patch to the man page will specify this behavior.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701241431120.42507@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c6fcba1d1ae5..ed233b002e33 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2041,8 +2041,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 	struct mempolicy *, struct vm_userfaultfd_ctx);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
-	struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
-- 
cgit v1.2.3


From 3a4f8a0b3ffa733ffbb327685e83b63383127cf6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 24 Feb 2017 14:59:36 -0800
Subject: mm: remove shmem_mapping() shmem_zero_setup() duplicates

Remove the prototypes for shmem_mapping() and shmem_zero_setup() from
linux/mm.h, since they are already provided in linux/shmem_fs.h.  But
shmem_fs.h must then provide the inline stub for shmem_mapping() when
CONFIG_SHMEM is not set, and a few more cfiles now need to #include it.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1702081658250.1549@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 10 ----------
 include/linux/shmem_fs.h |  7 +++++++
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed233b002e33..0d65dd72c0f4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1168,16 +1168,6 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
-int shmem_zero_setup(struct vm_area_struct *);
-#ifdef CONFIG_SHMEM
-bool shmem_mapping(struct address_space *mapping);
-#else
-static inline bool shmem_mapping(struct address_space *mapping)
-{
-	return false;
-}
-#endif
-
 extern bool can_do_mlock(void);
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index fdaac9d4d46d..a7d6bd2a918f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -57,7 +57,14 @@ extern int shmem_zero_setup(struct vm_area_struct *);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+#ifdef CONFIG_SHMEM
 extern bool shmem_mapping(struct address_space *mapping);
+#else
+static inline bool shmem_mapping(struct address_space *mapping)
+{
+	return false;
+}
+#endif /* CONFIG_SHMEM */
 extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
-- 
cgit v1.2.3


From dc18d706a4367454ad1fc51e06148d54e8ecfaa0 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 15:00:02 -0800
Subject: memory-hotplug: use dev_online for memhp_auto_online

Commit 31bc3858ea3e ("add automatic onlining policy for the newly added
memory") provides the capability to have added memory automatically
onlined during add, but this appears to be slightly broken.

The current implementation uses walk_memory_range() to call
online_memory_block, which uses memory_block_change_state() to online
the memory.  Instead, we should be calling device_online() for the
memory block in online_memory_block().  This would online the memory
(the memory bus online routine memory_subsys_online() called from
device_online calls memory_block_change_state()) and properly update the
device struct offline flag.

As a result of the current implementation, attempting to remove a memory
block after adding it using auto online fails.  This is because doing a
remove, for instance

  echo offline > /sys/devices/system/memory/memoryXXX/state

uses device_offline() which checks the dev->offline flag.

Link: http://lkml.kernel.org/r/20170222220744.8119.19687.stgit@ltcalpine2-lp14.aus.stglabs.ibm.com
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 093607f90b91..b723a686fc10 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -109,9 +109,6 @@ extern void unregister_memory_notifier(struct notifier_block *nb);
 extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
-extern int memory_block_change_state(struct memory_block *mem,
-				     unsigned long to_state,
-				     unsigned long from_state_req);
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern int unregister_memory_section(struct mem_section *);
 #endif
-- 
cgit v1.2.3


From f9fa1d919c696e90c887d8742198023e7639d139 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 24 Feb 2017 15:00:05 -0800
Subject: kasan: drain quarantine of memcg slab objects

Per memcg slab accounting and kasan have a problem with kmem_cache
destruction.
 - kmem_cache_create() allocates a kmem_cache, which is used for
   allocations from processes running in root (top) memcg.
 - Processes running in non root memcg and allocating with either
   __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg
   kmem_cache.
 - Kasan catches use-after-free by having kfree() and kmem_cache_free()
   defer freeing of objects. Objects are placed in a quarantine.
 - kmem_cache_destroy() destroys root and non root kmem_caches. It takes
   care to drain the quarantine of objects from the root memcg's
   kmem_cache, but ignores objects associated with non root memcg. This
   causes leaks because quarantined per memcg objects refer to per memcg
   kmem cache being destroyed.

To see the problem:

 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,)
 2) from non root memcg, allocate and free a few objects from cache
 3) dispose of the cache with kmem_cache_destroy() kmem_cache_destroy()
    will trigger a "Slab cache still has objects" warning indicating
    that the per memcg kmem_cache structure was leaked.

Fix the leak by draining kasan quarantined objects allocated from non
root memcg.

Racing memcg deletion is tricky, but handled.  kmem_cache_destroy() =>
shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache()
flushes per memcg quarantined objects, even if that memcg has been
rmdir'd and gone through memcg_deactivate_kmem_caches().

This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is
enabled.  So I don't think it's worth patching stable kernels.

Link: http://lkml.kernel.org/r/1482257462-36948-1-git-send-email-gthelen@google.com
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 820c0ad54a01..c908b25bf5a5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order);
 void kasan_cache_create(struct kmem_cache *cache, size_t *size,
 			unsigned long *flags);
 void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
 
 void kasan_poison_slab(struct page *page);
 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
 				      size_t *size,
 				      unsigned long *flags) {}
 static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
 
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
-- 
cgit v1.2.3


From 796f571b0c5cf3efd2f652779770fa7bbbc2bb03 Mon Sep 17 00:00:00 2001
From: Lafcadio Wluiki <wluikil@gmail.com>
Date: Fri, 24 Feb 2017 15:00:23 -0800
Subject: procfs: use an enum for possible hidepid values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, the hidepid parameter was checked by comparing literal
integers 0, 1, 2.  Let's add a proper enum for this, to make the
checking more expressive:

        0 → HIDEPID_OFF
        1 → HIDEPID_NO_ACCESS
        2 → HIDEPID_INVISIBLE

This changes the internal labelling only, the userspace-facing interface
remains unmodified, and still works with literal integers 0, 1, 2.

No functional changes.

Link: http://lkml.kernel.org/r/1484572984-13388-2-git-send-email-djalal@gmail.com
Signed-off-by: Lafcadio Wluiki <wluikil@gmail.com>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 34cce96741bc..c2a989dee876 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,12 @@ struct pidmap {
 
 struct fs_pin;
 
+enum { /* definitions for pid_namespace's hide_pid field */
+	HIDEPID_OFF	  = 0,
+	HIDEPID_NO_ACCESS = 1,
+	HIDEPID_INVISIBLE = 2,
+};
+
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
-- 
cgit v1.2.3


From 16f4e3195156f2f06e90d00a36bc48d7a513f253 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 24 Feb 2017 15:00:29 -0800
Subject: include/linux/iopoll.h: include <linux/ktime.h> instead of
 <linux/hrtimer.h>

The timer APIs this header needs are ktime_get(), ktime_add_us(), and
ktime_compare().  So, including <linux/ktime.h> seems enough.  This
commit will cut unnecessary header file parsing.

Link: http://lkml.kernel.org/r/1481679225-10885-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/iopoll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 1c30014ed176..d29e1e21bf3f 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -17,7 +17,7 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/io.h>
-- 
cgit v1.2.3


From a3f0825e7e37d99a02a8a1b1599687667ee50d04 Mon Sep 17 00:00:00 2001
From: Gideon Israel Dsouza <gidisrael@gmail.com>
Date: Fri, 24 Feb 2017 15:00:32 -0800
Subject: compiler-gcc.h: add a new macro to wrap gcc attribute

Add __mode(x) into compiler-gcc.h as part of a cleanup task I've taken
up, to replace gcc specific attributes with macros.

The next patch is a cleanup of the m68k subsystem and it requires a new
macro to wrap __attribute__ ((mode (...)))

Link: http://lkml.kernel.org/r/1485540901-1988-2-git-send-email-gidisrael@gmail.com
Signed-off-by: Gideon Israel Dsouza <gidisrael@gmail.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index fddd1a5eb322..811f7a915658 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,6 +122,7 @@
 #define __attribute_const__	__attribute__((__const__))
 #define __maybe_unused		__attribute__((unused))
 #define __always_unused		__attribute__((unused))
+#define __mode(x)               __attribute__((mode(x)))
 
 /* gcc version specific checks */
 
-- 
cgit v1.2.3


From 85caa95b9f19bb3a26d7e025d1134760b69e0c40 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 24 Feb 2017 15:00:38 -0800
Subject: bug: switch data corruption check to __must_check

The CHECK_DATA_CORRUPTION() macro was designed to have callers do
something meaningful/protective on failure.  However, using "return
false" in the macro too strictly limits the design patterns of callers.
Instead, let callers handle the logic test directly, but make sure that
the result IS checked by forcing __must_check (which appears to not be
able to be used directly on macro expressions).

Link: http://lkml.kernel.org/r/20170206204547.GA125312@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index baff2e8fc8a8..5828489309bb 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -124,18 +124,20 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 
 /*
  * Since detected data corruption should stop operation on the affected
- * structures, this returns false if the corruption condition is found.
+ * structures. Return value must be checked and sanely acted on by caller.
  */
+static inline __must_check bool check_data_corruption(bool v) { return v; }
 #define CHECK_DATA_CORRUPTION(condition, fmt, ...)			 \
-	do {								 \
-		if (unlikely(condition)) {				 \
+	check_data_corruption(({					 \
+		bool corruption = unlikely(condition);			 \
+		if (corruption) {					 \
 			if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
 				pr_err(fmt, ##__VA_ARGS__);		 \
 				BUG();					 \
 			} else						 \
 				WARN(1, fmt, ##__VA_ARGS__);		 \
-			return false;					 \
 		}							 \
-	} while (0)
+		corruption;						 \
+	}))
 
 #endif	/* _LINUX_BUG_H */
-- 
cgit v1.2.3


From 4f5901f5a6724f4ed1641e4a94978c5039a1f8c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Fri, 24 Feb 2017 15:01:01 -0800
Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST to support negative divisors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While working on a thermal driver I encounter a scenario where the
divisor could be negative, instead of adding local code to handle this I
though I first try to add support for this in DIV_ROUND_CLOSEST.

Add support to DIV_ROUND_CLOSEST for negative divisors if both dividend
and divisor variable types are signed.  This should not alter current
behavior for users of the macro as previously negative divisors where
not supported.

Before:

DIV_ROUND_CLOSEST(  59,  4) =  15
DIV_ROUND_CLOSEST(  59, -4) = -14
DIV_ROUND_CLOSEST( -59,  4) = -15
DIV_ROUND_CLOSEST( -59, -4) =  14

After:

DIV_ROUND_CLOSEST(  59,  4) =  15
DIV_ROUND_CLOSEST(  59, -4) = -15
DIV_ROUND_CLOSEST( -59,  4) = -15
DIV_ROUND_CLOSEST( -59, -4) =  15

[akpm@linux-foundation.org: fix comment, per Guenter]
Link: http://lkml.kernel.org/r/20161222102217.29011-1-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cb09238f6d32..4c26dc3a8295 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -100,16 +100,18 @@
 )
 
 /*
- * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors and
- * for negative dividends if the divisor variable type is unsigned.
+ * Divide positive or negative dividend by positive or negative divisor
+ * and round to closest integer. Result is undefined for negative
+ * divisors if he dividend variable type is unsigned and for negative
+ * dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
 	(((typeof(x))-1) > 0 ||				\
-	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
+	 ((typeof(divisor))-1) > 0 ||			\
+	 (((__x) > 0) == ((__d) > 0))) ?		\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3


From f231aebfc4cae2f6ed27a46a31e2630909513d77 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 24 Feb 2017 15:01:04 -0800
Subject: rbtree: use designated initializers

Prepare to mark sensitive kernel structures for randomization by making
sure they're using designated initializers.  These were identified
during allyesconfig builds of x86, arm, and arm64, with most initializer
fixes extracted from grsecurity.

Link: http://lkml.kernel.org/r/20161217010253.GA140470@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Jie Chen <fykcee1@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index d076183e49be..9702b6e183bc 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
 	old->rbaugmented = rbcompute(old);				\
 }									\
 rbstatic const struct rb_augment_callbacks rbname = {			\
-	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	\
+	.propagate = rbname ## _propagate,				\
+	.copy = rbname ## _copy,					\
+	.rotate = rbname ## _rotate					\
 };
 
 
-- 
cgit v1.2.3


From 4e1a33b105ddf201f66dcc44490c6086a25eca0b Mon Sep 17 00:00:00 2001
From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Date: Fri, 24 Feb 2017 15:01:12 -0800
Subject: lib: update LZ4 compressor module

Patch series "Update LZ4 compressor module", v7.

This patchset updates the LZ4 compression module to a version based on
LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between high
compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre and
(mostly, based on that) investigate data reduction techniques in behalf
of storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4
fast it is possible to enable applications to use fast and/or high
compression depending on the usecase.  For instance, ZRAM is offering a
LZ4 backend and could benefit from an updated LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4 Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCH 1/5] lib: Update LZ4 compressor module
[PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
[PATCH 5/5] lib/lz4: Remove back-compat wrappers

This patch (of 5):

Update the LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.  The kernel
module is inspired by the previous work by Chanho Min.  The updated LZ4
module will not break existing code since the patchset contains
appropriate changes.

API changes:

New method LZ4_compress_fast which differs from the variant available in
kernel by the new acceleration parameter, allowing to trade compression
ratio for more compression speed and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a
very fast decoder (multiple GB/s per core), able to reach RAM speed in
multi-core systems.  The decompressor allows to decompress data
compressed with LZ4 fast as well as the LZ4 HC (high compression)
algorithm.

Also the useful functions LZ4_decompress_safe_partial and
LZ4_compress_destsize were added.  The latter reverses the logic by
trying to compress as much data as possible from source to dest while
the former aims to decompress partial blocks of data.

A bunch of streaming functions were also added which allow
compressig/decompressing data in multiple steps (so called "streaming
mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize are now
known as LZ4_compress_default respectivley LZ4_decompress_safe.  The old
methods will be removed since there's no callers left in the code.

[arnd@arndb.de: fix KERNEL_LZ4 support]
  Link: http://lkml.kernel.org/r/20170208211946.2839649-1-arnd@arndb.de
[akpm@linux-foundation.org: simplify]
[akpm@linux-foundation.org: fix the simplification]
[4sschmid@informatik.uni-hamburg.de: fix performance regressions]
  Link: http://lkml.kernel.org/r/1486898178-17125-2-git-send-email-4sschmid@informatik.uni-hamburg.de
[4sschmid@informatik.uni-hamburg.de: v8]
  Link: http://lkml.kernel.org/r/1487182598-15351-2-git-send-email-4sschmid@informatik.uni-hamburg.de
Link: http://lkml.kernel.org/r/1486321748-19085-2-git-send-email-4sschmid@informatik.uni-hamburg.de
Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Bongkyu Kim <bongkyu.kim@lge.com>
Cc: Rui Salvaterra <rsalvaterra@gmail.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lz4.h | 762 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 696 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c59f321..1b0f8ca0f9c8 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,717 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
 
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
-static inline size_t lz4_compressbound(size_t isize)
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL			3
+#define LZ4HC_DEFAULT_CLEVEL			9
+#define LZ4HC_MAX_CLEVEL			16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+	uint32_t hashTable[LZ4_HASH_SIZE_U32];
+	uint32_t currentOffset;
+	uint32_t initCheck;
+	const uint8_t *dictionary;
+	uint8_t *bufferStart;
+	uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMSIZE_U64];
+	LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+	unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+	unsigned short	 chainTable[LZ4HC_MAXD];
+	/* next block to continue on current prefix */
+	const unsigned char *end;
+	/* All index relative to this position */
+	const unsigned char *base;
+	/* alternate base for extDict */
+	const unsigned char *dictBase;
+	/* below that point, need extDict */
+	unsigned int	 dictLimit;
+	/* below that point, no more dict */
+	unsigned int	 lowLimit;
+	/* index from which to continue dict update */
+	unsigned int	 nextToUpdate;
+	unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+	size_t table[LZ4_STREAMHCSIZE_SIZET];
+	LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t - information structure to track an
+ *	LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+	const uint8_t *externalDict;
+	size_t extDictSize;
+	const uint8_t *prefixEnd;
+	size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+	LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
-/*
- * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
+/**
+ * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ *	(data not compressible)
+ */
+static inline int lz4_compressbound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ *	from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ *	from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ *	or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
 
 /*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+ * lz4_compress() - For backward compatibility, see LZ4_compress_default
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND
+ * @dst_len: is the output size, which is returned after compress done
+ * @workmem: address of the working memory.
+ *
+ * Return: Success if return 0, Error if return < 0
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ *	or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ *	at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ *	already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ *	to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ * lz4_decompress_unknownoutputsize() - For backwards compatibility,
+ *	see LZ4_decompress_safe
+ * @src: source address of the compressed data
+ * @src_len: is the input size, therefore the compressed size
+ * @dest: output buffer address of the decompressed data
+ *	which must be already allocated
+ * @dest_len: is the max size of the destination buffer, which is
+ *	returned with actual size of decompressed data after decompress done
+ *
+ * Return: Success if return 0, Error if return (< 0)
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/**
+ * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
+ * @src: source address of the compressed data
+ * @src_len: is the input size, which is returned after decompress done
+ * @dest: output buffer address of the decompressed data,
+ *	which must be already allocated
+ * @actual_dest_len: is the size of uncompressed data, supposing it's known
+ *
+ * Return: Success if return 0, Error if return (< 0)
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ *	which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ *	This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/**
+ * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND.
+ * @dst_len: is the output size, which is returned after compress done
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Return	: Success if return 0, Error if return (< 0)
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ *  LZ4_compressBound(), to ensure operation success.
+ *  If, for any reason, previous data blocks can't be preserved unmodified
+ *  in memory during next compression block,
+ *  you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
-- 
cgit v1.2.3


From 69c78423b8f439b077929410bdf8f88e7031b891 Mon Sep 17 00:00:00 2001
From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Date: Fri, 24 Feb 2017 15:01:25 -0800
Subject: lib/lz4: remove back-compat wrappers

Remove the functions introduced as wrappers for providing backwards
compatibility to the prior LZ4 version.  They're not needed anymore
since there's no callers left.

Link: http://lkml.kernel.org/r/1486321748-19085-6-git-send-email-4sschmid@informatik.uni-hamburg.de
Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Cc: Bongkyu Kim <bongkyu.kim@lge.com>
Cc: Rui Salvaterra <rsalvaterra@gmail.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lz4.h | 69 -----------------------------------------------------
 1 file changed, 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 1b0f8ca0f9c8..394e3d9213b8 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -172,18 +172,6 @@ static inline int LZ4_compressBound(size_t isize)
 	return LZ4_COMPRESSBOUND(isize);
 }
 
-/**
- * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
- * @isize: Size of the input data
- *
- * Return: Max. size LZ4 may output in a "worst case" szenario
- *	(data not compressible)
- */
-static inline int lz4_compressbound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
 /**
  * LZ4_compress_default() - Compress data from source to dest
  * @source: source address of the original data
@@ -257,20 +245,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
 int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
 	int targetDestSize, void *wrkmem);
 
-/*
- * lz4_compress() - For backward compatibility, see LZ4_compress_default
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND
- * @dst_len: is the output size, which is returned after compress done
- * @workmem: address of the working memory.
- *
- * Return: Success if return 0, Error if return < 0
- */
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /*-************************************************************************
  *	Decompression Functions
  **************************************************************************/
@@ -346,34 +320,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
 int LZ4_decompress_safe_partial(const char *source, char *dest,
 	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
-/*
- * lz4_decompress_unknownoutputsize() - For backwards compatibility,
- *	see LZ4_decompress_safe
- * @src: source address of the compressed data
- * @src_len: is the input size, therefore the compressed size
- * @dest: output buffer address of the decompressed data
- *	which must be already allocated
- * @dest_len: is the max size of the destination buffer, which is
- *	returned with actual size of decompressed data after decompress done
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-	unsigned char *dest, size_t *dest_len);
-
-/**
- * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
- * @src: source address of the compressed data
- * @src_len: is the input size, which is returned after decompress done
- * @dest: output buffer address of the decompressed data,
- *	which must be already allocated
- * @actual_dest_len: is the size of uncompressed data, supposing it's known
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len);
-
 /*-************************************************************************
  *	LZ4 HC Compression
  **************************************************************************/
@@ -400,21 +346,6 @@ int lz4_decompress(const unsigned char *src, size_t *src_len,
 int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
 	int compressionLevel, void *wrkmem);
 
-/**
- * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND.
- * @dst_len: is the output size, which is returned after compress done
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- *
- * Return	: Success if return 0, Error if return (< 0)
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /**
  * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
  * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
-- 
cgit v1.2.3


From 3d1e236022cc1426b0834565995ddee2ca231cee Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 24 Feb 2017 22:31:02 -0600
Subject: objtool: Prevent GCC from merging annotate_unreachable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0-day bot reported some new objtool warnings which were caused by the
new annotate_unreachable() macro:

  fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: duplicate frame pointer save
  fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: frame pointer state mismatch
  fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: duplicate frame pointer save
  fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: frame pointer state mismatch
  fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: duplicate frame pointer save
  fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: frame pointer state mismatch
  fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: duplicate frame pointer save
  fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: frame pointer state mismatch

It turns out that, for older versions of GCC, if a function has multiple
BUG() incantations, GCC will sometimes merge the corresponding
annotate_unreachable() inline asm statements into a single block.  That
has the undesirable effect of removing one of the entries in the
__unreachable section, confusing objtool greatly.

A workaround for this issue is to ensure that each instance of the
inline asm statement uses a different label, so that GCC sees the
statements are unique and leaves them alone.  The inline asm ‘%=’ token
could be used for that, but unfortunately older versions of GCC don't
support it.  So I implemented a poor man's version of it with the
__LINE__ macro.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/0c14b00baf9f68d1b0221ddb6c88b925181c8be8.1487997036.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 8ea159fc489d..de471346b365 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -197,10 +197,10 @@
 
 #ifdef CONFIG_STACK_VALIDATION
 #define annotate_unreachable() ({					\
-	asm("1:\t\n"							\
+	asm("%c0:\t\n"							\
 	    ".pushsection __unreachable, \"a\"\t\n"			\
-	    ".long 1b\t\n"						\
-	    ".popsection\t\n");						\
+	    ".long %c0b\t\n"						\
+	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
 #else
 #define annotate_unreachable()
-- 
cgit v1.2.3


From 6dbf5cea05a7098a69f294c96b6d76f08562cae5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 13:25:14 +0100
Subject: cpuidle: menu: Avoid taking spinlock for accessing QoS values

After commit 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume
latency consideration) the cpuidle menu governor calls
dev_pm_qos_read_value() on CPU devices to read the current resume
latency QoS constraint values for them.  That function takes a spinlock
to prevent the device's power.qos pointer from becoming NULL during
the access which is a problem for the RT patchset where spinlocks are
converted into mutexes and the idle loop stops working.

However, it is not even necessary for the menu governor to take
that spinlock, because the power.qos pointer accessed under it
cannot be modified during the access anyway.

For this reason, introduce a "raw" routine for accessing device
QoS resume latency constraints without locking and use it in the
menu governor.

Fixes: 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume latency consideration)
Acked-by: Alex Shi <alex.shi@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_qos.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 0f65d36c2a75..5aba9f47899d 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -173,6 +173,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev)
 {
 	return dev->power.qos->flags_req->data.flr.flags;
 }
+
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
+{
+	return IS_ERR_OR_NULL(dev->power.qos) ?
+		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+}
 #else
 static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
 							  s32 mask)
@@ -237,6 +243,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {}
 
 static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; }
 static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 51be7a9a261ce18c520fb3928b168feb77522745 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 12 Jan 2017 23:36:32 +0200
Subject: virtio_mmio: expose header to userspace

It's handy for userspace emulators like QEMU.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_mmio.h | 141 --------------------------------------------
 1 file changed, 141 deletions(-)
 delete mode 100644 include/linux/virtio_mmio.h

(limited to 'include/linux')

diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h
deleted file mode 100644
index c4b09689ab64..000000000000
--- a/include/linux/virtio_mmio.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Virtio platform device driver
- *
- * Copyright 2011, ARM Ltd.
- *
- * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
- *
- * This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of IBM nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _LINUX_VIRTIO_MMIO_H
-#define _LINUX_VIRTIO_MMIO_H
-
-/*
- * Control registers
- */
-
-/* Magic value ("virt" string) - Read Only */
-#define VIRTIO_MMIO_MAGIC_VALUE		0x000
-
-/* Virtio device version - Read Only */
-#define VIRTIO_MMIO_VERSION		0x004
-
-/* Virtio device ID - Read Only */
-#define VIRTIO_MMIO_DEVICE_ID		0x008
-
-/* Virtio vendor ID - Read Only */
-#define VIRTIO_MMIO_VENDOR_ID		0x00c
-
-/* Bitmask of the features supported by the device (host)
- * (32 bits per set) - Read Only */
-#define VIRTIO_MMIO_DEVICE_FEATURES	0x010
-
-/* Device (host) features set selector - Write Only */
-#define VIRTIO_MMIO_DEVICE_FEATURES_SEL	0x014
-
-/* Bitmask of features activated by the driver (guest)
- * (32 bits per set) - Write Only */
-#define VIRTIO_MMIO_DRIVER_FEATURES	0x020
-
-/* Activated features set selector - Write Only */
-#define VIRTIO_MMIO_DRIVER_FEATURES_SEL	0x024
-
-
-#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */
-
-/* Guest's memory page size in bytes - Write Only */
-#define VIRTIO_MMIO_GUEST_PAGE_SIZE	0x028
-
-#endif
-
-
-/* Queue selector - Write Only */
-#define VIRTIO_MMIO_QUEUE_SEL		0x030
-
-/* Maximum size of the currently selected queue - Read Only */
-#define VIRTIO_MMIO_QUEUE_NUM_MAX	0x034
-
-/* Queue size for the currently selected queue - Write Only */
-#define VIRTIO_MMIO_QUEUE_NUM		0x038
-
-
-#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */
-
-/* Used Ring alignment for the currently selected queue - Write Only */
-#define VIRTIO_MMIO_QUEUE_ALIGN		0x03c
-
-/* Guest's PFN for the currently selected queue - Read Write */
-#define VIRTIO_MMIO_QUEUE_PFN		0x040
-
-#endif
-
-
-/* Ready bit for the currently selected queue - Read Write */
-#define VIRTIO_MMIO_QUEUE_READY		0x044
-
-/* Queue notifier - Write Only */
-#define VIRTIO_MMIO_QUEUE_NOTIFY	0x050
-
-/* Interrupt status - Read Only */
-#define VIRTIO_MMIO_INTERRUPT_STATUS	0x060
-
-/* Interrupt acknowledge - Write Only */
-#define VIRTIO_MMIO_INTERRUPT_ACK	0x064
-
-/* Device status register - Read Write */
-#define VIRTIO_MMIO_STATUS		0x070
-
-/* Selected queue's Descriptor Table address, 64 bits in two halves */
-#define VIRTIO_MMIO_QUEUE_DESC_LOW	0x080
-#define VIRTIO_MMIO_QUEUE_DESC_HIGH	0x084
-
-/* Selected queue's Available Ring address, 64 bits in two halves */
-#define VIRTIO_MMIO_QUEUE_AVAIL_LOW	0x090
-#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH	0x094
-
-/* Selected queue's Used Ring address, 64 bits in two halves */
-#define VIRTIO_MMIO_QUEUE_USED_LOW	0x0a0
-#define VIRTIO_MMIO_QUEUE_USED_HIGH	0x0a4
-
-/* Configuration atomicity value */
-#define VIRTIO_MMIO_CONFIG_GENERATION	0x0fc
-
-/* The config space is defined by each driver as
- * the per-driver configuration space - Read Write */
-#define VIRTIO_MMIO_CONFIG		0x100
-
-
-
-/*
- * Interrupt flags (re: interrupt status & acknowledge registers)
- */
-
-#define VIRTIO_MMIO_INT_VRING		(1 << 0)
-#define VIRTIO_MMIO_INT_CONFIG		(1 << 1)
-
-#endif
-- 
cgit v1.2.3


From 22ad0b6ab46683975c6da032f1c2593066c7b3bd Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Feb 2017 10:38:09 -0800
Subject: f2fs: add bitmaps for empty or full NAT blocks

This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.

If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.

After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_NAT_BITS_FLAG	0x00000080
 #define CP_CRC_RECOVERY_FLAG	0x00000040
 #define CP_FASTBOOT_FLAG	0x00000020
 #define CP_FSCK_FLAG		0x00000010
-- 
cgit v1.2.3


From 4ac912427c4214d8031d9ad6fbc3bc75e71512df Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 23 Feb 2017 10:53:49 +0800
Subject: f2fs: introduce free nid bitmap

In scenario of intensively node allocation, free nids will be ran out
soon, then it needs to stop to load free nids by traversing NAT blocks,
in worse case, if NAT blocks does not be cached in memory, it generates
IOs which slows down our foreground operations.

In order to speed up node allocation, in this patch we introduce a new
free_nid_bitmap array, so there is an bitmap table for each NAT block,
Once the NAT block is loaded, related bitmap cache will be switched on,
and bitmap will be set during traversing nat entries in NAT block, later
we can query and update nid usage status in memory completely.

With such implementation, I expect performance of node allocation can be
improved in the long-term after filesystem image is mounted.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1c92ace2e8f8..e2d239ed4c60 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -279,6 +279,7 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
cgit v1.2.3


From fb5e31d970ce8b4941f03ed765d7dbefc39f22d9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:22 +0100
Subject: virtio: allow drivers to request IRQ affinity when creating VQs

Add a struct irq_affinity pointer to the find_vqs methods, which if set
is used to tell the PCI layer to create the MSI-X vectors for our I/O
virtqueues with the proper affinity from the start.  Compared to after
the fact affinity hints this gives us an instantly working setup and
allows to allocate the irq descritors node-local and avoid interconnect
traffic.  Last but not least this will allow blk-mq queues are created
based on the interrupt affinity for storage drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 26c155bb639b..2ebe506fe41a 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -7,6 +7,8 @@
 #include <linux/virtio_byteorder.h>
 #include <uapi/linux/virtio_config.h>
 
+struct irq_affinity;
+
 /**
  * virtio_config_ops - operations for configuring a virtio device
  * @get: read the value of a configuration field
@@ -68,9 +70,8 @@ struct virtio_config_ops {
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char * const names[]);
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[], struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
@@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	vq_callback_t *callbacks[] = { c };
 	const char *names[] = { n };
 	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
-- 
cgit v1.2.3


From bbaba479563910aaa51e59bb9027a09e396d3a3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:23 +0100
Subject: virtio: provide a method to get the IRQ affinity mask for a virtqueue

This basically passed up the pci_irq_get_affinity information through
virtio through an optional get_vq_affinity method.  It is only implemented
by the PCI backend for now, and only when we use per-virtqueue IRQs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 2ebe506fe41a..8355bab175e1 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -58,6 +58,7 @@ struct irq_affinity;
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
  * @set_vq_affinity: set the affinity for a virtqueue.
+ * @get_vq_affinity: get the affinity for a virtqueue (optional).
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -77,6 +78,8 @@ struct virtio_config_ops {
 	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
+			int index);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
cgit v1.2.3


From 73473427bb551686e4b68ecd99bfd27e6635286a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:24 +0100
Subject: blk-mq: provide a default queue mapping for virtio device

Similar to the PCI version, just calling into virtio instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/blk-mq-virtio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 include/linux/blk-mq-virtio.h

(limited to 'include/linux')

diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
new file mode 100644
index 000000000000..b1ef6e14744f
--- /dev/null
+++ b/include/linux/blk-mq-virtio.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_BLK_MQ_VIRTIO_H
+#define _LINUX_BLK_MQ_VIRTIO_H
+
+struct blk_mq_tag_set;
+struct virtio_device;
+
+int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+		struct virtio_device *vdev, int first_vec);
+
+#endif /* _LINUX_BLK_MQ_VIRTIO_H */
-- 
cgit v1.2.3


From 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:26 +0100
Subject: virtio_scsi: use virtio IRQ affinity

Use automatic IRQ affinity assignment in the virtio layer if available,
and build the blk-mq queues based on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/cpuhotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 921acaaa1601..01aea80a503e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -26,7 +26,6 @@ enum cpuhp_state {
 	CPUHP_ARM_OMAP_WAKE_DEAD,
 	CPUHP_IRQ_POLL_DEAD,
 	CPUHP_BLOCK_SOFTIRQ_DEAD,
-	CPUHP_VIRT_SCSI_DEAD,
 	CPUHP_ACPI_CPUDRV_DEAD,
 	CPUHP_S390_PFAULT_DEAD,
 	CPUHP_BLK_MQ_DEAD,
-- 
cgit v1.2.3


From 7d134b2ce639448199052fd573a324f7e7cd5ed8 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 27 Feb 2017 14:26:56 -0800
Subject: kprobes: move kprobe declarations to asm-generic/kprobes.h

Often all is needed is these small helpers, instead of compiler.h or a
full kprobes.h.  This is important for asm helpers, in fact even some
asm/kprobes.h make use of these helpers...  instead just keep a generic
asm file with helpers useful for asm code with the least amount of
clutter as possible.

Likewise we need now to also address what to do about this file for both
when architectures have CONFIG_HAVE_KPROBES, and when they do not.  Then
for when architectures have CONFIG_HAVE_KPROBES but have disabled
CONFIG_KPROBES.

Right now most asm/kprobes.h do not have guards against CONFIG_KPROBES,
this means most architecture code cannot include asm/kprobes.h safely.
Correct this and add guards for architectures missing them.
Additionally provide architectures that not have kprobes support with
the default asm-generic solution.  This lets us force asm/kprobes.h on
the header include/linux/kprobes.h always, but most importantly we can
now safely include just asm/kprobes.h on architecture code without
bringing the full kitchen sink of header files.

Two architectures already provided a guard against CONFIG_KPROBES on its
kprobes.h: sh, arch.  The rest of the architectures needed gaurds added.
We avoid including any not-needed headers on asm/kprobes.h unless
kprobes have been enabled.

In a subsequent atomic change we can try now to remove compiler.h from
include/linux/kprobes.h.

During this sweep I've also identified a few architectures defining a
common macro needed for both kprobes and ftrace, that of the definition
of the breakput instruction up.  Some refer to this as
BREAKPOINT_INSTRUCTION.  This must be kept outside of the #ifdef
CONFIG_KPROBES guard.

[mcgrof@kernel.org: fix arm64 build]
  Link: http://lkml.kernel.org/r/CAB=NE6X1WMByuARS4mZ1g9+W=LuVBnMDnh_5zyN0CLADaVh=Jw@mail.gmail.com
[sfr@canb.auug.org.au: fixup for kprobes declarations moving]
  Link: http://lkml.kernel.org/r/20170214165933.13ebd4f4@canb.auug.org.au
Link: http://lkml.kernel.org/r/20170203233139.32682-1-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h |  8 --------
 include/linux/kprobes.h  | 19 +++----------------
 2 files changed, 3 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 91c30cba984e..b2eb9c0a68c4 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -570,12 +570,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	(_________p1); \
 })
 
-/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
-#ifdef CONFIG_KPROBES
-# define __kprobes	__attribute__((__section__(".kprobes.text")))
-# define nokprobe_inline	__always_inline
-#else
-# define __kprobes
-# define nokprobe_inline	inline
-#endif
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 16ddfb8b304a..c328e4f7dcad 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -29,7 +29,7 @@
  *		<jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
  *		<prasanna@in.ibm.com> added function-return probes.
  */
-#include <linux/compiler.h>	/* for __kprobes */
+#include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
@@ -40,9 +40,9 @@
 #include <linux/rcupdate.h>
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
+#include <asm/kprobes.h>
 
 #ifdef CONFIG_KPROBES
-#include <asm/kprobes.h>
 
 /* kprobe_status settings */
 #define KPROBE_HIT_ACTIVE	0x00000001
@@ -51,6 +51,7 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 #else /* CONFIG_KPROBES */
+#include <asm-generic/kprobes.h>
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
 	int dummy;
@@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 }
 #endif
 
-#ifdef CONFIG_KPROBES
-/*
- * Blacklist ganerating macro. Specify functions which is not probed
- * by using this macro.
- */
-#define __NOKPROBE_SYMBOL(fname)			\
-static unsigned long __used				\
-	__attribute__((section("_kprobe_blacklist")))	\
-	_kbl_addr_##fname = (unsigned long)fname;
-#define NOKPROBE_SYMBOL(fname)	__NOKPROBE_SYMBOL(fname)
-#else
-#define NOKPROBE_SYMBOL(fname)
-#endif
-
 #endif /* _LINUX_KPROBES_H */
-- 
cgit v1.2.3


From 441398d378f29a5ad6d0fcda07918e54e4961800 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@list.ru>
Date: Mon, 27 Feb 2017 14:27:25 -0800
Subject: sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT

Currently SS_AUTODISARM is not supported in compatibility mode, but does
not return -EINVAL either.  This makes dosemu built with -m32 on x86_64
to crash.  Also the kernel's sigaltstack selftest fails if compiled with
-m32.

This patch adds the needed support.

Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru
Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
Cc: Milosz Tanski <milosz@adfin.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Waiman Long <Waiman.Long@hpe.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: Wang Xiaoqiang <wangxq10@lzu.edu.cn>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9e40be522793..aef47be2a5c1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 	compat_stack_t __user *__uss = uss; \
 	struct task_struct *t = current; \
 	put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \
-	put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+	put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
 	put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+	if (t->sas_ss_flags & SS_AUTODISARM) \
+		sas_ss_reset(t); \
 } while (0);
 
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-- 
cgit v1.2.3


From e3b5a342ab9643b4079c8d417d90519388469341 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 27 Feb 2017 14:27:37 -0800
Subject: include/linux/pid.h: use for_each_thread() in do_each_pid_thread()

while_each_pid_thread() is using while_each_thread(), which is unsafe
under RCU lock according to commit 0c740d0afc3b ("introduce
for_each_thread() to replace the buggy while_each_thread()").  Use
for_each_thread() in do_each_pid_thread() which is safe under RCU lock.

Link: http://lkml.kernel.org/r/201702011947.DBD56740.OMVHOLOtSJFFFQ@I-love.SAKURA.ne.jp
Link: http://lkml.kernel.org/r/1486041779-4401-2-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 23705a53abba..298ead5512e5 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -191,10 +191,10 @@ pid_t pid_vnr(struct pid *pid);
 #define do_each_pid_thread(pid, type, task)				\
 	do_each_pid_task(pid, type, task) {				\
 		struct task_struct *tg___ = task;			\
-		do {
+		for_each_thread(tg___, task) {
 
 #define while_each_pid_thread(pid, type, task)				\
-		} while_each_thread(tg___, task);			\
+		}							\
 		task = tg___;						\
 	} while_each_pid_task(pid, type, task)
 #endif /* _LINUX_PID_H */
-- 
cgit v1.2.3


From 9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Mon, 27 Feb 2017 14:28:18 -0800
Subject: ipc/sem: add hysteresis

sysv sem has two lock modes: One with per-semaphore locks, one lock mode
with a single global lock for the whole array.  When switching from the
per-semaphore locks to the global lock, all per-semaphore locks must be
scanned for ongoing operations.

The patch adds a hysteresis for switching from the global lock to the
per semaphore locks.  This reduces how often the per-semaphore locks
must be scanned.

Compared to the initial patch, this is a simplified solution: Setting
USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior.

In theory, a workload with exactly 10 simple sops and then one complex
op now scales a bit worse, but this is pure theory: If there is
concurrency, the it won't be exactly 10:1:10:1:10:1:...  If there is no
concurrency, then there is no need for scalability.

Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: <1vier1@web.de>
Cc: kernel test robot <xiaolong.ye@intel.com>
Cc: <felixh@informatik.uni-bremen.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d0efd6e6c20a..4fc222f8755d 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
 	struct list_head	list_id;	/* undo requests on this array */
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
-	bool			complex_mode;	/* no parallel simple ops */
+	unsigned int		use_global_lock;/* >0: global lock required */
 };
 
 #ifdef CONFIG_SYSVIPC
-- 
cgit v1.2.3


From 93407472a21b82f39c955ea7787e5bc7da100642 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 27 Feb 2017 14:28:32 -0800
Subject: fs: add i_blocksize()

Replace all 1 << inode->i_blkbits and (1 << inode->i_blkbits) in fs
branch.

This patch also fixes multiple checkpatch warnings: WARNING: Prefer
'unsigned int' to bare use of 'unsigned'

Thanks to Andrew Morton for suggesting more appropriate function instead
of macro.

[geliangtang@gmail.com: truncate: use i_blocksize()]
  Link: http://lkml.kernel.org/r/9c8b2cd83c8f5653805d43debde9fa8817e02fc4.1484895804.git.geliangtang@gmail.com
Link: http://lkml.kernel.org/r/1481319905-10126-1-git-send-email-fabf@skynet.be
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c930cbc19342..c64f2cb7d364 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -655,6 +655,11 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 };
 
+static inline unsigned int i_blocksize(const struct inode *node)
+{
+	return (1 << node->i_blkbits);
+}
+
 static inline int inode_unhashed(struct inode *inode)
 {
 	return hlist_unhashed(&inode->i_hash);
-- 
cgit v1.2.3


From 03440c4e5e2f167764997a7e0f2dbb279d8078e6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 27 Feb 2017 14:28:49 -0800
Subject: scripts/spelling.txt: add "an union" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  an union||a union

Link: http://lkml.kernel.org/r/1481573103-11329-5-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dcache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c965e4469499..591b6c16f9c1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -562,7 +562,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * @inode: inode to select the dentry from multiple layers (can be NULL)
  * @flags: open flags to control copy-up behavior
  *
- * If dentry is on an union/overlay, then return the underlying, real dentry.
+ * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
  *
  * See also: Documentation/filesystems/vfs.txt
@@ -581,7 +581,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
  * d_real_inode - Return the real inode
  * @dentry: The dentry to query
  *
- * If dentry is on an union/overlay, then return the underlying, real inode.
+ * If dentry is on a union/overlay, then return the underlying, real inode.
  * Otherwise return d_inode().
  */
 static inline struct inode *d_real_inode(const struct dentry *dentry)
-- 
cgit v1.2.3


From 8ab102d60a0c19df602f3758848d55f0703bf9bb Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 27 Feb 2017 14:28:55 -0800
Subject: scripts/spelling.txt: add "partiton" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  partiton||partition

Link: http://lkml.kernel.org/r/1481573103-11329-7-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mtd/qinfo.h | 2 +-
 include/linux/spi/flash.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h
index 7b3d487d8b3f..b532ce524dae 100644
--- a/include/linux/mtd/qinfo.h
+++ b/include/linux/mtd/qinfo.h
@@ -14,7 +14,7 @@
  * @DevId - Chip Device ID
  * @qinfo - pointer to qinfo records describing the chip
  * @numchips - number of chips including virual RWW partitions
- * @chipshift - Chip/partiton size 2^chipshift
+ * @chipshift - Chip/partition size 2^chipshift
  * @chips - per-chip data structure
  */
 struct lpddr_private {
diff --git a/include/linux/spi/flash.h b/include/linux/spi/flash.h
index 3f22932e67a4..f4199e758f97 100644
--- a/include/linux/spi/flash.h
+++ b/include/linux/spi/flash.h
@@ -7,7 +7,7 @@ struct mtd_partition;
  * struct flash_platform_data: board-specific flash data
  * @name: optional flash device name (eg, as used with mtdparts=)
  * @parts: optional array of mtd_partitions for static partitioning
- * @nr_parts: number of mtd_partitions for static partitoning
+ * @nr_parts: number of mtd_partitions for static partitioning
  * @type: optional flash device type (e.g. m25p80 vs m25p64), for use
  *	with chips that can't be queried for JEDEC or other IDs
  *
-- 
cgit v1.2.3


From 4091fb95b5f8dea37568d1a94c8227244bade891 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 27 Feb 2017 14:29:56 -0800
Subject: scripts/spelling.txt: add "followings" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  followings||following

While we are here, add a missing colon in the boilerplate in DT binding
documents.  The "you SoC" in allwinner,sunxi-pinctrl.txt was fixed as
well.

I reworded "as the followings:" to "as follows:" for
drivers/usb/gadget/udc/renesas_usb3.c.

Link: http://lkml.kernel.org/r/1481573103-11329-32-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kconfig.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
index 8f2e059e4d45..4d748603e818 100644
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -8,7 +8,7 @@
 
 /*
  * The use of "&&" / "||" is limited in certain expressions.
- * The followings enable to calculate "and" / "or" with macro expansion only.
+ * The following enable to calculate "and" / "or" with macro expansion only.
  */
 #define __and(x, y)			___and(x, y)
 #define ___and(x, y)			____and(__ARG_PLACEHOLDER_##x, y)
-- 
cgit v1.2.3


From f1f1007644ffc8051a4c11427d58b1967ae7b75a Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Mon, 27 Feb 2017 14:30:07 -0800
Subject: mm: add new mmgrab() helper

Apart from adding the helper function itself, the rest of the kernel is
converted mechanically using:

  git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_count);/mmgrab\(\1\);/'
  git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_count);/mmgrab\(\&\1\);/'

This is needed for a later patch that hooks into the helper, but might
be a worthwhile cleanup on its own.

(Michal Hocko provided most of the kerneldoc comment.)

Link: http://lkml.kernel.org/r/20161218123229.22952-1-vegard.nossum@oracle.com
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 451e241f32c5..7cfa5546c840 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2904,6 +2904,28 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
  */
 extern struct mm_struct * mm_alloc(void);
 
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_count);
+}
+
 /* mmdrop drops the mm and the page tables */
 extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct *mm)
-- 
cgit v1.2.3


From 3fce371bfac2be0396ffc1e763600e6c6b1bb52a Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Mon, 27 Feb 2017 14:30:10 -0800
Subject: mm: add new mmget() helper

Apart from adding the helper function itself, the rest of the kernel is
converted mechanically using:

  git grep -l 'atomic_inc.*mm_users' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_users);/mmget\(\1\);/'
  git grep -l 'atomic_inc.*mm_users' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_users);/mmget\(\&\1\);/'

This is needed for a later patch that hooks into the helper, but might
be a worthwhile cleanup on its own.

(Michal Hocko provided most of the kerneldoc comment.)

Link: http://lkml.kernel.org/r/20161218123229.22952-2-vegard.nossum@oracle.com
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7cfa5546c840..4a28deb5f210 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2948,6 +2948,27 @@ static inline void mmdrop_async(struct mm_struct *mm)
 	}
 }
 
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_users);
+}
+
 static inline bool mmget_not_zero(struct mm_struct *mm)
 {
 	return atomic_inc_not_zero(&mm->mm_users);
-- 
cgit v1.2.3


From b279ddc3382426f8e05068de3488a2993f68dc26 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Mon, 27 Feb 2017 14:30:16 -0800
Subject: mm: clarify mm_struct.mm_{users,count} documentation

Clarify documentation relating to mm_users and mm_count, and switch to
kernel-doc syntax.

Link: http://lkml.kernel.org/r/20161218123229.22952-4-vegard.nossum@oracle.com
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 808751d7b737..4f6d440ad785 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -407,8 +407,27 @@ struct mm_struct {
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
-	atomic_t mm_users;			/* How many users with user space? */
-	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
+
+	/**
+	 * @mm_users: The number of users including userspace.
+	 *
+	 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
+	 * to 0 (i.e. when the task exits and there are no other temporary
+	 * reference holders), we also release a reference on @mm_count
+	 * (which may then free the &struct mm_struct if @mm_count also
+	 * drops to 0).
+	 */
+	atomic_t mm_users;
+
+	/**
+	 * @mm_count: The number of references to &struct mm_struct
+	 * (@mm_users count as 1).
+	 *
+	 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
+	 * &struct mm_struct is freed.
+	 */
+	atomic_t mm_count;
+
 	atomic_long_t nr_ptes;			/* PTE page table pages */
 #if CONFIG_PGTABLE_LEVELS > 2
 	atomic_long_t nr_pmds;			/* PMD page table pages */
-- 
cgit v1.2.3


From 2959a5f726f6510d6dd7c958f8877e08d0cf589c Mon Sep 17 00:00:00 2001
From: Jinbum Park <jinb.park7@gmail.com>
Date: Mon, 27 Feb 2017 14:30:22 -0800
Subject: mm: add arch-independent testcases for RODATA

This patch makes arch-independent testcases for RODATA.  Both x86 and
x86_64 already have testcases for RODATA, But they are arch-specific
because using inline assembly directly.

And cacheflush.h is not a suitable location for rodata-test related
things.  Since they were in cacheflush.h, If someone change the state of
CONFIG_DEBUG_RODATA_TEST, It cause overhead of kernel build.

To solve the above issues, write arch-independent testcases and move it
to shared location.

[jinb.park7@gmail.com: fix config dependency]
  Link: http://lkml.kernel.org/r/20170209131625.GA16954@pjb1027-Latitude-E5410
Link: http://lkml.kernel.org/r/20170129105436.GA9303@pjb1027-Latitude-E5410
Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rodata_test.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/rodata_test.h

(limited to 'include/linux')

diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h
new file mode 100644
index 000000000000..ea05f6c51413
--- /dev/null
+++ b/include/linux/rodata_test.h
@@ -0,0 +1,23 @@
+/*
+ * rodata_test.h: functional test for mark_rodata_ro function
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _RODATA_TEST_H
+#define _RODATA_TEST_H
+
+#ifdef CONFIG_DEBUG_RODATA_TEST
+extern const int rodata_test_data;
+void rodata_test(void);
+#else
+static inline void rodata_test(void) {}
+#endif
+
+#endif /* _RODATA_TEST_H */
-- 
cgit v1.2.3


From 4e4636cf981b5b629fbfb78aa9f232e015f7d521 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 27 Feb 2017 22:21:16 -0600
Subject: objtool: Enclose contents of unreachable() macro in a block

Guenter Roeck reported a boot failure in mips64.  It was bisected to the
following commit:

  d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")

The unreachable() macro was formerly only composed of a single
statement.  The above commit added a second statement, but neglected to
enclose the statements in a block.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170228042116.glmwmwiohcix7o4a@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index de471346b365..f457b520ead6 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -215,7 +215,8 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() annotate_unreachable(); __builtin_unreachable()
+#define unreachable() \
+	do { annotate_unreachable(); __builtin_unreachable(); } while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
-- 
cgit v1.2.3


From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Sat, 11 Feb 2017 14:02:49 +0100
Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure

gpmc_probe_onenand_child returns success even on gpmc_onenand_init
failure. Fix that.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/omap-gpmc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 35d0fd7a4948..e821a3132a3e 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
 #endif
 
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else
 #define board_onenand_data	NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
 {
+	return 0;
 }
 #endif
-- 
cgit v1.2.3


From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Tue, 21 Feb 2017 10:44:45 +0100
Subject: ARM: OMAP2+: Remove legacy gpmc-nand.c

This code is no longer used and can be removed as we
are using device tree.

Removing this code also removes a dependency between
drivers/mtd and arch/arm/mach-omap2 making furhter driver
changes easier.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
[tony@atomide.com: removed from header too, updated comments]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/omap-gpmc.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index e821a3132a3e..fd0de00c0d77 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -76,17 +76,6 @@ struct gpmc_timings;
 struct omap_nand_platform_data;
 struct omap_onenand_platform_data;
 
-#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d,
-			  struct gpmc_timings *gpmc_t);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
-				 struct gpmc_timings *gpmc_t)
-{
-	return 0;
-}
-#endif
-
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
 extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else
-- 
cgit v1.2.3


From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 28 Feb 2017 18:32:48 -0800
Subject: nfit, libnvdimm: fix interleave set cookie calculation

The interleave-set cookie is a sum that sanity checks the composition of
an interleave set has not changed from when the namespace was initially
created.  The checksum is calculated by sorting the DIMMs by their
location in the interleave-set. The comparison for the sort must be
64-bit wide, not byte-by-byte as performed by memcmp() in the broken
case.

Fix the implementation to accept correct cookie values in addition to
the Linux "memcmp" order cookies, but only allow correct cookies to be
generated going forward. It does mean that namespaces created by
third-party-tooling, or created by newer kernels with this fix, will not
validate on older kernels. However, there are a couple mitigating
conditions:

    1/ platforms with namespace-label capable NVDIMMs are not widely
       available.

    2/ interleave-sets with a single-dimm are by definition not affected
       (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case.

The cookie stored in the namespace label will be fixed by any write the
namespace label, the most straightforward way to achieve this is to
write to the "alt_name" attribute of a namespace in sysfs.

Cc: <stable@vger.kernel.org>
Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure")
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Tested-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 8458c5351e56..77e7af32543f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -70,6 +70,8 @@ struct nd_cmd_desc {
 
 struct nd_interleave_set {
 	u64 cookie;
+	/* compatibility with initial buggy Linux implementation */
+	u64 altcookie;
 };
 
 struct nd_mapping_desc {
-- 
cgit v1.2.3


From 55149d06534ae2a7ba5f7a078353deb89b3fe891 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 1 Mar 2017 00:05:04 -0600
Subject: objtool, compiler.h: Fix __unreachable section relocation size

Linus reported the following commit broke module loading on his laptop:

  d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")

It showed errors like the following:

  module: overflow in relocation type 10 val ffffffffc02afc81
  module: 'nvme' likely not compiled with -mcmodel=kernel

The problem is that the __unreachable section addresses are stored using
the '.long' asm directive, which isn't big enough for .text section
kernel addresses.  Use relative addresses instead:

  ".long %c0b - .\t\n"

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170301060504.oltm3iws6fmubnom@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 76e28c229805..b6bb9019d87f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -201,7 +201,7 @@
 #define annotate_unreachable() ({					\
 	asm("%c0:\t\n"							\
 	    ".pushsection __unreachable, \"a\"\t\n"			\
-	    ".long %c0b\t\n"						\
+	    ".long %c0b - .\t\n"					\
 	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
 #else
-- 
cgit v1.2.3


From e3736c3eb3a6f7c0966923b629c9f92b558aa9c7 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 20 Feb 2017 13:06:21 +0200
Subject: kvm: convert kvm.users_count from atomic_t to refcount_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8d69d5150748..2c14ad9809da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -26,6 +26,7 @@
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
 #include <linux/swait.h>
+#include <linux/refcount.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -401,7 +402,7 @@ struct kvm {
 #endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
-	atomic_t users_count;
+	refcount_t users_count;
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 	spinlock_t ring_lock;
-- 
cgit v1.2.3


From b2d0fe35471d1a71471f99147ffb5986bd60e744 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 28 Feb 2017 15:02:15 +0300
Subject: net/mlx4: && vs & typo

Bitwise & was obviously intended here.

Fixes: 745d8ae4622c ("net/mlx4: Spoofcheck and zero MAC can't coexist")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index e965e5090d96..a858bcb6220b 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
 	int i;
 
 	for (i = ETH_ALEN; i > 0; i--) {
-		addr[i - 1] = mac && 0xFF;
+		addr[i - 1] = mac & 0xFF;
 		mac >>= 8;
 	}
 }
-- 
cgit v1.2.3


From 39e6c8208d7b6fb9d2047850fb3327db567b564b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Feb 2017 10:34:50 -0800
Subject: net: solve a NAPI race

While playing with mlx4 hardware timestamping of RX packets, I found
that some packets were received by TCP stack with a ~200 ms delay...

Since the timestamp was provided by the NIC, and my probe was added
in tcp_v4_rcv() while in BH handler, I was confident it was not
a sender issue, or a drop in the network.

This would happen with a very low probability, but hurting RPC
workloads.

A NAPI driver normally arms the IRQ after the napi_complete_done(),
after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab
it.

Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit
while IRQ are not disabled, we might have later an IRQ firing and
finding this bit set, right before napi_complete_done() clears it.

This can happen with busy polling users, or if gro_flush_timeout is
used. But some other uses of napi_schedule() in drivers can cause this
as well.

thread 1                                 thread 2 (could be on same cpu, or not)

// busy polling or napi_watchdog()
napi_schedule();
...
napi->poll()

device polling:
read 2 packets from ring buffer
                                          Additional 3rd packet is
available.
                                          device hard irq

                                          // does nothing because
NAPI_STATE_SCHED bit is owned by thread 1
                                          napi_schedule();

napi_complete_done(napi, 2);
rearm_irq();

Note that rearm_irq() will not force the device to send an additional
IRQ for the packet it already signaled (3rd packet in my example)

This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep()
can set if it could not grab NAPI_STATE_SCHED

Then napi_complete_done() properly reschedules the napi to make sure
we do not miss something.

Since we manipulate multiple bits at once, use cmpxchg() like in
sk_busy_loop() to provide proper transactions.

In v2, I changed napi_watchdog() to use a relaxed variant of
napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point.

In v3, I added more details in the changelog and clears
NAPI_STATE_MISSED in busy_poll_stop()

In v4, I added the ideas given by Alexander Duyck in v3 review

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f40f0ab3847a..97456b2539e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -330,6 +330,7 @@ struct napi_struct {
 
 enum {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
+	NAPI_STATE_MISSED,	/* reschedule a napi */
 	NAPI_STATE_DISABLE,	/* Disable pending */
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
@@ -338,12 +339,13 @@ enum {
 };
 
 enum {
-	NAPIF_STATE_SCHED	 = (1UL << NAPI_STATE_SCHED),
-	NAPIF_STATE_DISABLE	 = (1UL << NAPI_STATE_DISABLE),
-	NAPIF_STATE_NPSVC	 = (1UL << NAPI_STATE_NPSVC),
-	NAPIF_STATE_HASHED	 = (1UL << NAPI_STATE_HASHED),
-	NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
-	NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
+	NAPIF_STATE_SCHED	 = BIT(NAPI_STATE_SCHED),
+	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
+	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
+	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
+	NAPIF_STATE_HASHED	 = BIT(NAPI_STATE_HASHED),
+	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
@@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n)
 	return test_bit(NAPI_STATE_DISABLE, &n->state);
 }
 
-/**
- *	napi_schedule_prep - check if NAPI can be scheduled
- *	@n: NAPI context
- *
- * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable to
- * insure only one NAPI poll instance runs.  We also make
- * sure there is no pending NAPI disable.
- */
-static inline bool napi_schedule_prep(struct napi_struct *n)
-{
-	return !napi_disable_pending(n) &&
-		!test_and_set_bit(NAPI_STATE_SCHED, &n->state);
-}
+bool napi_schedule_prep(struct napi_struct *n);
 
 /**
  *	napi_schedule - schedule NAPI poll
-- 
cgit v1.2.3


From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 1 Mar 2017 12:04:44 -0600
Subject: objtool, modules: Discard objtool annotation sections for modules

The '__unreachable' and '__func_stack_frame_non_standard' sections are
only used at compile time.  They're discarded for vmlinux but they
should also be discarded for modules.

Since this is a recurring pattern, prefix the section names with
".discard.".  It's a nice convention and vmlinux.lds.h already discards
such sections.

Also remove the 'a' (allocatable) flag from the __unreachable section
since it doesn't make sense for a discarded section.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 2 +-
 include/linux/frame.h        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index b6bb9019d87f..0efef9cf014f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -200,7 +200,7 @@
 #ifdef CONFIG_STACK_VALIDATION
 #define annotate_unreachable() ({					\
 	asm("%c0:\t\n"							\
-	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".pushsection .discard.unreachable\t\n"			\
 	    ".long %c0b - .\t\n"					\
 	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
diff --git a/include/linux/frame.h b/include/linux/frame.h
index e6baaba3f1ae..d772c61c31da 100644
--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -11,7 +11,7 @@
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 #define STACK_FRAME_NON_STANDARD(func) \
-	static void __used __section(__func_stack_frame_non_standard) \
+	static void __used __section(.discard.func_stack_frame_non_standard) \
 		*__func_stack_frame_non_standard_##func = func
 
 #else /* !CONFIG_STACK_VALIDATION */
-- 
cgit v1.2.3


From 0837e49ab3fa8d903a499984575d71efee8097ce Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Mar 2017 15:11:23 +0000
Subject: KEYS: Differentiate uses of rcu_dereference_key() and
 user_key_payload()

rcu_dereference_key() and user_key_payload() are currently being used in
two different, incompatible ways:

 (1) As a wrapper to rcu_dereference() - when only the RCU read lock used
     to protect the key.

 (2) As a wrapper to rcu_dereference_protected() - when the key semaphor is
     used to protect the key and the may be being modified.

Fix this by splitting both of the key wrappers to produce:

 (1) RCU accessors for keys when caller has the key semaphore locked:

	dereference_key_locked()
	user_key_payload_locked()

 (2) RCU accessors for keys when caller holds the RCU read lock:

	dereference_key_rcu()
	user_key_payload_rcu()

This should fix following warning in the NFS idmapper

  ===============================
  [ INFO: suspicious RCU usage. ]
  4.10.0 #1 Tainted: G        W
  -------------------------------
  ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage!
  other info that might help us debug this:
  rcu_scheduler_active = 2, debug_locks = 0
  1 lock held by mount.nfs/5987:
    #0:  (rcu_read_lock){......}, at: [<d000000002527abc>] nfs_idmap_get_key+0x15c/0x420 [nfsv4]
  stack backtrace:
  CPU: 1 PID: 5987 Comm: mount.nfs Tainted: G        W       4.10.0 #1
  Call Trace:
    dump_stack+0xe8/0x154 (unreliable)
    lockdep_rcu_suspicious+0x140/0x190
    nfs_idmap_get_key+0x380/0x420 [nfsv4]
    nfs_map_name_to_uid+0x2a0/0x3b0 [nfsv4]
    decode_getfattr_attrs+0xfac/0x16b0 [nfsv4]
    decode_getfattr_generic.constprop.106+0xbc/0x150 [nfsv4]
    nfs4_xdr_dec_lookup_root+0xac/0xb0 [nfsv4]
    rpcauth_unwrap_resp+0xe8/0x140 [sunrpc]
    call_decode+0x29c/0x910 [sunrpc]
    __rpc_execute+0x140/0x8f0 [sunrpc]
    rpc_run_task+0x170/0x200 [sunrpc]
    nfs4_call_sync_sequence+0x68/0xa0 [nfsv4]
    _nfs4_lookup_root.isra.44+0xd0/0xf0 [nfsv4]
    nfs4_lookup_root+0xe0/0x350 [nfsv4]
    nfs4_lookup_root_sec+0x70/0xa0 [nfsv4]
    nfs4_find_root_sec+0xc4/0x100 [nfsv4]
    nfs4_proc_get_rootfh+0x5c/0xf0 [nfsv4]
    nfs4_get_rootfh+0x6c/0x190 [nfsv4]
    nfs4_server_common_setup+0xc4/0x260 [nfsv4]
    nfs4_create_server+0x278/0x3c0 [nfsv4]
    nfs4_remote_mount+0x50/0xb0 [nfsv4]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    nfs_do_root_mount+0xb0/0x140 [nfsv4]
    nfs4_try_mount+0x60/0x100 [nfsv4]
    nfs_fs_mount+0x5ec/0xda0 [nfs]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    do_mount+0x254/0xf70
    SyS_mount+0x94/0x100
    system_call+0x38/0xe0

Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/key.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 722914798f37..e45212f2777e 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key)
 		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
 }
 
-#define rcu_dereference_key(KEY)					\
+#define dereference_key_rcu(KEY)					\
+	(rcu_dereference((KEY)->payload.rcu_data0))
+
+#define dereference_key_locked(KEY)					\
 	(rcu_dereference_protected((KEY)->payload.rcu_data0,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
-- 
cgit v1.2.3


From eb1e011a14748a1d9df9a7d7df9a5711721a1bdb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Feb 2017 09:49:26 +0100
Subject: average: change to declare precision, not factor

Declaring the factor is counter-intuitive, and people are prone
to using small(-ish) values even when that makes no sense.

Change the DECLARE_EWMA() macro to take the fractional precision,
in bits, rather than a factor, and update all users.

While at it, add some more documentation.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/average.h | 61 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/average.h b/include/linux/average.h
index d04aa58280de..7ddaf340d2ac 100644
--- a/include/linux/average.h
+++ b/include/linux/average.h
@@ -1,45 +1,66 @@
 #ifndef _LINUX_AVERAGE_H
 #define _LINUX_AVERAGE_H
 
-/* Exponentially weighted moving average (EWMA) */
+/*
+ * Exponentially weighted moving average (EWMA)
+ *
+ * This implements a fixed-precision EWMA algorithm, with both the
+ * precision and fall-off coefficient determined at compile-time
+ * and built into the generated helper funtions.
+ *
+ * The first argument to the macro is the name that will be used
+ * for the struct and helper functions.
+ *
+ * The second argument, the precision, expresses how many bits are
+ * used for the fractional part of the fixed-precision values.
+ *
+ * The third argument, the weight reciprocal, determines how the
+ * new values will be weighed vs. the old state, new values will
+ * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note
+ * that this parameter must be a power of two for efficiency.
+ */
 
-#define DECLARE_EWMA(name, _factor, _weight)				\
+#define DECLARE_EWMA(name, _precision, _weight_rcp)			\
 	struct ewma_##name {						\
 		unsigned long internal;					\
 	};								\
 	static inline void ewma_##name##_init(struct ewma_##name *e)	\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		/*							\
+		 * Even if you want to feed it just 0/1 you should have	\
+		 * some bits for the non-fractional part...		\
+		 */							\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 		e->internal = 0;					\
 	}								\
 	static inline unsigned long					\
 	ewma_##name##_read(struct ewma_##name *e)			\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
-		return e->internal >> ilog2(_factor);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
+		return e->internal >> (_precision);			\
 	}								\
 	static inline void ewma_##name##_add(struct ewma_##name *e,	\
 					     unsigned long val)		\
 	{								\
 		unsigned long internal = ACCESS_ONCE(e->internal);	\
-		unsigned long weight = ilog2(_weight);			\
-		unsigned long factor = ilog2(_factor);			\
+		unsigned long weight_rcp = ilog2(_weight_rcp);		\
+		unsigned long precision = _precision;			\
 									\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 									\
 		ACCESS_ONCE(e->internal) = internal ?			\
-			(((internal << weight) - internal) +		\
-				(val << factor)) >> weight :		\
-			(val << factor);				\
+			(((internal << weight_rcp) - internal) +	\
+				(val << precision)) >> weight_rcp :	\
+			(val << precision);				\
 	}
 
 #endif /* _LINUX_AVERAGE_H */
-- 
cgit v1.2.3


From 9ccd27cc2e8bfbec502d7c64a353dc4489536b81 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:11:09 +0100
Subject: sched/headers: Make all include/linux/sched/*.h headers build
 standalone

Make each header self-sufficient, so that it can be built successfully
both in an allnoconfig and allyesconfig kernel.

Also standardize the naming of their header guards.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/deadline.h |  8 +++++---
 include/linux/sched/prio.h     |  6 +++---
 include/linux/sched/rt.h       | 10 ++++++----
 include/linux/sched/sysctl.h   | 10 +++++++---
 4 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2ae913d..975be862e083 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,5 +1,7 @@
-#ifndef _SCHED_DEADLINE_H
-#define _SCHED_DEADLINE_H
+#ifndef _LINUX_SCHED_DEADLINE_H
+#define _LINUX_SCHED_DEADLINE_H
+
+#include <linux/sched.h>
 
 /*
  * SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b)
 	return (s64)(a - b) < 0;
 }
 
-#endif /* _SCHED_DEADLINE_H */
+#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index d9cf5a5762d9..2cc450f6ec54 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -1,5 +1,5 @@
-#ifndef _SCHED_PRIO_H
-#define _SCHED_PRIO_H
+#ifndef _LINUX_SCHED_PRIO_H
+#define _LINUX_SCHED_PRIO_H
 
 #define MAX_NICE	19
 #define MIN_NICE	-20
@@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio)
 	return (MAX_NICE - prio + 1);
 }
 
-#endif /* _SCHED_PRIO_H */
+#endif /* _LINUX_SCHED_PRIO_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a30b172df6e1..3bd668414f61 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -1,7 +1,9 @@
-#ifndef _SCHED_RT_H
-#define _SCHED_RT_H
+#ifndef _LINUX_SCHED_RT_H
+#define _LINUX_SCHED_RT_H
 
-#include <linux/sched/prio.h>
+#include <linux/sched.h>
+
+struct task_struct;
 
 static inline int rt_prio(int prio)
 {
@@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void);
  */
 #define RR_TIMESLICE		(100 * HZ / 1000)
 
-#endif /* _SCHED_RT_H */
+#endif /* _LINUX_SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 49308e142aae..0f5ecd4d298e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -1,5 +1,9 @@
-#ifndef _SCHED_SYSCTL_H
-#define _SCHED_SYSCTL_H
+#ifndef _LINUX_SCHED_SYSCTL_H
+#define _LINUX_SCHED_SYSCTL_H
+
+#include <linux/types.h>
+
+struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 extern int	     sysctl_hung_task_check_count;
@@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
-#endif /* _SCHED_SYSCTL_H */
+#endif /* _LINUX_SCHED_SYSCTL_H */
-- 
cgit v1.2.3


From c930b2c0de32f45ce8f67affe936ce7a05b07b00 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:22:54 +0100
Subject: sched/core: Convert ___assert_task_state() link time assert to
 BUILD_BUG_ON()

The length of TASK_STATE_TO_CHAR_STR was still checked using the old
link-time manual error method - convert it to BUILD_BUG_ON(). This
has a couple of advantages:

 - it's more obvious what's going on

 - it reduces the size and complexity of <linux/sched.h>

 - BUILD_BUG_ON() will fail during compilation, with a clearer
   error message than the link time assert.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a28deb5f210..c204613396cd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,9 +223,6 @@ extern void proc_sched_set_task(struct task_struct *p);
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
-extern char ___assert_task_state[1 - 2*!!(
-		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-
 /* Convenience macros for the sake of set_current_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-- 
cgit v1.2.3


From 59ddbcb2f45b958cf1f11f122b666cbcf50cd57b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:37:48 +0100
Subject: sched/core: Move the get_preempt_disable_ip() inline to sched/core.c

It's defined in <linux/sched.h>, but nothing outside the scheduler
uses it - so move it to the sched/core.c usage site.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c204613396cd..df42cac04243 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3419,15 +3419,6 @@ static inline void cond_resched_rcu(void)
 #endif
 }
 
-static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-{
-#ifdef CONFIG_DEBUG_PREEMPT
-	return p->preempt_disable_ip;
-#else
-	return 0;
-#endif
-}
-
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
-- 
cgit v1.2.3


From 0c98d344fe5c27f6e4bce42ac503e9e9a51c7d1d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:38:10 +0100
Subject: sched/core: Remove the tsk_cpus_allowed() wrapper

So the original intention of tsk_cpus_allowed() was to 'future-proof'
the field - but it's pretty ineffectual at that, because half of
the code uses ->cpus_allowed directly ...

Also, the wrapper makes the code longer than the original expression!

So just get rid of it. This also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index df42cac04243..6d1cc20cc477 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,9 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-/* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
-
 static inline int tsk_nr_cpus_allowed(struct task_struct *p)
 {
 	return p->nr_cpus_allowed;
-- 
cgit v1.2.3


From 4b53a3412d6663214ce9c754eff9373a9cff9dee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:41:03 +0100
Subject: sched/core: Remove the tsk_nr_cpus_allowed() wrapper

tsk_nr_cpus_allowed() too is a pretty pointless wrapper that
is not used consistently and which makes the code both harder
to read and longer as well.

So remove it - this also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d1cc20cc477..e732881517f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,11 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-static inline int tsk_nr_cpus_allowed(struct task_struct *p)
-{
-	return p->nr_cpus_allowed;
-}
-
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
-- 
cgit v1.2.3


From f9411ebe3d85cbbea06298241e6053d031d281fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:50:49 +0100
Subject: rcu: Separate the RCU synchronization types and APIs into
 <linux/rcupdate_wait.h>

So rcupdate.h is a pretty complex header, in particular it includes
<linux/completion.h> which includes <linux/wait.h> - creating a
dependency that includes <linux/wait.h> in <linux/sched.h>,
which prevents the isolation of <linux/sched.h> from the derived
<linux/wait.h> header.

Solve part of the problem by decoupling rcupdate.h from completions:
this can be done by separating out the rcu_synchronize types and APIs,
and updating their usage sites.

Since this is a mostly RCU-internal types this will not just simplify
<linux/sched.h>'s dependencies, but will make all the hundreds of
.c files that include rcupdate.h but not completions or wait.h build
faster.

( For rcutiny this means that two dependent APIs have to be uninlined,
  but that shouldn't be much of a problem as they are rare variants. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/dcache.h        |  1 +
 include/linux/rcupdate.h      | 40 ----------------------------------
 include/linux/rcupdate_wait.h | 50 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/rcutiny.h       | 11 ++--------
 4 files changed, 53 insertions(+), 49 deletions(-)
 create mode 100644 include/linux/rcupdate_wait.h

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 591b6c16f9c1..d2e38dc6172c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,6 +11,7 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 #include <linux/stringhash.h>
+#include <linux/wait.h>
 
 struct path;
 struct vfsmount;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6ade6a52d9d4..de88b33c0974 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,7 +40,6 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
-#include <linux/completion.h>
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
@@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head,
 
 void synchronize_sched(void);
 
-/*
- * Structure allowing asynchronous waiting on RCU.
- */
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-void wakeme_after_rcu(struct rcu_head *head);
-
-void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
-		   struct rcu_synchronize *rs_array);
-
-#define _wait_rcu_gp(checktiny, ...) \
-do {									\
-	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
-	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
-	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
-			__crcu_array, __rs_array);			\
-} while (0)
-
-#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
-
-/**
- * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for the flavors to wait on.
- *
- * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
- * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
- * domain requires you to write a wrapper function for that SRCU domain's
- * call_srcu() function, supplying the corresponding srcu_struct.
- *
- * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
- * is automatically a grace period.
- */
-#define synchronize_rcu_mult(...) \
-	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
new file mode 100644
index 000000000000..e774b4f5f220
--- /dev/null
+++ b/include/linux/rcupdate_wait.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H
+#define _LINUX_SCHED_RCUPDATE_WAIT_H
+
+/*
+ * RCU synchronization types and methods:
+ */
+
+#include <linux/rcupdate.h>
+#include <linux/completion.h>
+
+/*
+ * Structure allowing asynchronous waiting on RCU.
+ */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+void wakeme_after_rcu(struct rcu_head *head);
+
+void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
+		   struct rcu_synchronize *rs_array);
+
+#define _wait_rcu_gp(checktiny, ...) \
+do {									\
+	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
+	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
+	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
+			__crcu_array, __rs_array);			\
+} while (0)
+
+#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
+
+/**
+ * synchronize_rcu_mult - Wait concurrently for multiple grace periods
+ * @...: List of call_rcu() functions for the flavors to wait on.
+ *
+ * This macro waits concurrently for multiple flavors of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
+ * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
+ * domain requires you to write a wrapper function for that SRCU domain's
+ * call_srcu() function, supplying the corresponding srcu_struct.
+ *
+ * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
+ * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
+ * is automatically a grace period.
+ */
+#define synchronize_rcu_mult(...) \
+	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
+
+#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4f9b2fa2173d..b452953e21c8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline void rcu_barrier_bh(void)
-{
-	wait_rcu_gp(call_rcu_bh);
-}
-
-static inline void rcu_barrier_sched(void)
-{
-	wait_rcu_gp(call_rcu_sched);
-}
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);
 
 static inline void synchronize_rcu_expedited(void)
 {
-- 
cgit v1.2.3


From 780de9dd2720debc14c501dab4dc80d1f75ad50e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:50:56 +0100
Subject: sched/headers, cgroups: Remove the threadgroup_change_*() wrappery

threadgroup_change_begin()/end() is a pointless wrapper around
cgroup_threadgroup_change_begin()/end(), minus a might_sleep()
in the !CONFIG_CGROUPS=y case.

Remove the wrappery, move the might_sleep() (the down_read()
already has a might_sleep() check).

This debloats <linux/sched.h> a bit and simplifies this API.

Update all call sites.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cgroup-defs.h | 13 ++++++++-----
 include/linux/sched.h       | 28 ----------------------------
 2 files changed, 8 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 3c02404cfce9..6a3f850cabab 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
  * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_begin() and allows cgroup operations to
- * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ * Allows cgroup operations to synchronize against threadgroup changes
+ * using a percpu_rw_semaphore.
  */
 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
 {
@@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
  * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_end().  Counterpart of
- * cgroup_threadcgroup_change_begin().
+ * Counterpart of cgroup_threadcgroup_change_begin().
  */
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 {
@@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 
 #define CGROUP_SUBSYS_COUNT 0
 
-static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	might_sleep();
+}
+
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 
 #endif	/* CONFIG_CGROUPS */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e732881517f2..3f61baac928b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3162,34 +3162,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-/**
- * threadgroup_change_begin - mark the beginning of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * All operations which modify a threadgroup - a new thread joining the
- * group, death of a member thread (the assertion of PF_EXITING) and
- * exec(2) dethreading the process and replacing the leader - are wrapped
- * by threadgroup_change_{begin|end}().  This is to provide a place which
- * subsystems needing threadgroup stability can hook into for
- * synchronization.
- */
-static inline void threadgroup_change_begin(struct task_struct *tsk)
-{
-	might_sleep();
-	cgroup_threadgroup_change_begin(tsk);
-}
-
-/**
- * threadgroup_change_end - mark the end of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * See threadgroup_change_begin().
- */
-static inline void threadgroup_change_end(struct task_struct *tsk)
-{
-	cgroup_threadgroup_change_end(tsk);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
-- 
cgit v1.2.3


From 314ff7851fc8ea66cbf48eaa93d8ebfb5ca084a9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:31 +0100
Subject: mm/vmacache, sched/headers: Introduce 'struct vmacache' and move it
 from <linux/sched.h> to <linux/mm_types>

The <linux/sched.h> header includes various vmacache related defines,
which are arguably misplaced.

Move them to mm_types.h and minimize the sched.h impact by putting
all task vmacache state into a new 'struct vmacache' structure.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 12 ++++++++++++
 include/linux/sched.h    | 11 ++++-------
 include/linux/vmacache.h |  2 +-
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4f6d440ad785..137797cd7b50 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -360,6 +360,18 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f61baac928b..e87c97e1a947 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -134,10 +134,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
@@ -1550,9 +1546,10 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
-	/* per-thread vma caching */
-	u32 vmacache_seqnum;
-	struct vm_area_struct *vmacache[VMACACHE_SIZE];
+
+	/* Per-thread vma caching: */
+	struct vmacache vmacache;
+
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index c3fa0fd43949..1081db987391 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -12,7 +12,7 @@
 
 static inline void vmacache_flush(struct task_struct *tsk)
 {
-	memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
 }
 
 extern void vmacache_flush_all(struct mm_struct *mm);
-- 
cgit v1.2.3


From af8601ad420f6afa6445c927ad9f36d9700d96d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: kasan, sched/headers: Uninline kasan_enable/disable_current()

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

But kasan.h has almost no need for sched.h: its only use of
scheduler functionality is in two inline functions which are
not used very frequently - so uninline kasan_enable_current()
and kasan_disable_current().

Also add a <linux/sched.h> dependency to a .c file that depended
on kasan.h including it.

This paves the way to remove the <linux/sched.h> include from kasan.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index c908b25bf5a5..7793036eac80 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr)
 }
 
 /* Enable reporting bugs after kasan_disable_current() */
-static inline void kasan_enable_current(void)
-{
-	current->kasan_depth++;
-}
+extern void kasan_enable_current(void);
 
 /* Disable reporting bugs for current task */
-static inline void kasan_disable_current(void)
-{
-	current->kasan_depth--;
-}
+extern void kasan_disable_current(void);
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
-- 
cgit v1.2.3


From 105ab3d8ce7269887d24d224054677125e18037c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/topology.h>

We are going to split <linux/sched/topology.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h         | 1 +
 include/linux/sched/topology.h | 6 ++++++
 2 files changed, 7 insertions(+)
 create mode 100644 include/linux/sched/topology.h

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index bfc204e70338..c608c39cb161 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
new file mode 100644
index 000000000000..4f1159f76f92
--- /dev/null
+++ b/include/linux/sched/topology.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
cgit v1.2.3


From 4c822698cba8bdd93724117eded12bf34eb80252 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/idle.h>

We are going to split  <linux/sched/idle.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/idle.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/idle.h     | 6 ++++++
 include/linux/sched/topology.h | 2 ++
 2 files changed, 8 insertions(+)
 create mode 100644 include/linux/sched/idle.h

(limited to 'include/linux')

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
new file mode 100644
index 000000000000..a3cf79b86986
--- /dev/null
+++ b/include/linux/sched/idle.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_IDLE_H
+#define _LINUX_SCHED_IDLE_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_IDLE_H */
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4f1159f76f92..184b56b8aa64 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -3,4 +3,6 @@
 
 #include <linux/sched.h>
 
+#include <linux/sched/idle.h>
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
cgit v1.2.3


From 84f001e15737f8214b0f5f0f7dfec0fb1027938f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/wake_q.h>

We are going to split <linux/sched/wake_q.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/wake_q.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/wake_q.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/wake_q.h

(limited to 'include/linux')

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
new file mode 100644
index 000000000000..6383b35e4eba
--- /dev/null
+++ b/include/linux/sched/wake_q.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_WAKE_Q_H
+#define _LINUX_SCHED_WAKE_Q_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_WAKE_Q_H */
-- 
cgit v1.2.3


From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/clock.h>

We are going to split <linux/sched/clock.h> out of <linux/sched.h>, which
will have to be picked up from other headers and .c files.

Create a trivial placeholder <linux/sched/clock.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/blkdev.h      | 1 +
 include/linux/sched/clock.h | 6 ++++++
 include/linux/skbuff.h      | 1 +
 3 files changed, 8 insertions(+)
 create mode 100644 include/linux/sched/clock.h

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aecca0e7d9ca..796016e63c1d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -2,6 +2,7 @@
 #define _LINUX_BLKDEV_H
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #ifdef CONFIG_BLOCK
 
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
new file mode 100644
index 000000000000..7cb7d79b2cf9
--- /dev/null
+++ b/include/linux/sched/clock.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CLOCK_H
+#define _LINUX_SCHED_CLOCK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CLOCK_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 69ccd2636911..c776abd86937 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,6 +34,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
-- 
cgit v1.2.3


From 4f17722c7256af8e17c2c4f29f170247264bdf48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 08:45:17 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/loadavg.h>

We are going to split <linux/sched/loadavg.h> out of <linux/sched.h>, which
will have to be picked up from a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/loadavg.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/loadavg.h

(limited to 'include/linux')

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
new file mode 100644
index 000000000000..3ae74be327e8
--- /dev/null
+++ b/include/linux/sched/loadavg.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_LOADAVG_H
+#define _LINUX_SCHED_LOADAVG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_LOADAVG_H */
-- 
cgit v1.2.3


From 4eb5aaa3af8a54e5e9bac90e2b42bbab1f1ee5a3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/autogroup.h>

We are going to split <linux/sched/autogroup.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/autogroup.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/autogroup.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/autogroup.h

(limited to 'include/linux')

diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
new file mode 100644
index 000000000000..d745f22e57dc
--- /dev/null
+++ b/include/linux/sched/autogroup.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_AUTOGROUP_H
+#define _LINUX_SCHED_AUTOGROUP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_AUTOGROUP_H */
-- 
cgit v1.2.3


From 6e84f31522f931027bf695752087ece278c10d3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/mm.h>

We are going to split <linux/sched/mm.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/mm.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

The APIs that are going to be moved first are:

   mm_alloc()
   __mmdrop()
   mmdrop()
   mmdrop_async_fn()
   mmdrop_async()
   mmget_not_zero()
   mmput()
   mmput_async()
   get_task_mm()
   mm_access()
   mm_release()

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/mm.h

(limited to 'include/linux')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644
index 000000000000..a7adba1cd0a9
--- /dev/null
+++ b/include/linux/sched/mm.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From f7ccbae45c5e2c1077654b0e857e7efb1aa31c92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/coredump.h>

We are going to split <linux/sched/coredump.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/coredump.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/khugepaged.h     | 3 ++-
 include/linux/ksm.h            | 1 +
 include/linux/sched/coredump.h | 6 ++++++
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/coredump.h

(limited to 'include/linux')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 1e032a1ddb3e..5d9a400af509 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_KHUGEPAGED_H
 #define _LINUX_KHUGEPAGED_H
 
-#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
+#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 481c8c4627ca..e1cfda4bee58 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 
 struct stable_node;
 struct mem_cgroup;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
new file mode 100644
index 000000000000..ab81e564e076
--- /dev/null
+++ b/include/linux/sched/coredump.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_COREDUMP_H
+#define _LINUX_SCHED_COREDUMP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_COREDUMP_H */
-- 
cgit v1.2.3


From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/signal.h>

We are going to split <linux/sched/signal.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/signal.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/oom.h            | 2 +-
 include/linux/ptrace.h         | 1 +
 include/linux/sched/signal.h   | 6 ++++++
 include/linux/signalfd.h       | 2 +-
 include/linux/taskstats_kern.h | 2 +-
 5 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/sched/signal.h

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index b4e36e92bc87..8a266e2be5a6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -2,7 +2,7 @@
 #define __INCLUDE_LINUX_OOM_H
 
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/nodemask.h>
 #include <uapi/linux/oom.h>
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e0e539321ab9..422bc2e4cb6a 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -3,6 +3,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/sched/signal.h>		/* For send_sig(), same_thread_group(), etc. */
 #include <linux/err.h>			/* for IS_ERR_VALUE */
 #include <linux/bug.h>			/* For BUG_ON.  */
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
new file mode 100644
index 000000000000..da69cd05cd68
--- /dev/null
+++ b/include/linux/sched/signal.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index eadbe227c256..4985048640a7 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,7 +8,7 @@
 #define _LINUX_SIGNALFD_H
 
 #include <uapi/linux/signalfd.h>
-
+#include <linux/sched/signal.h>
 
 #ifdef CONFIG_SIGNALFD
 
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 58de6edf751f..e2a5daf8d14f 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -8,7 +8,7 @@
 #define _LINUX_TASKSTATS_KERN_H
 
 #include <linux/taskstats.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 
 #ifdef CONFIG_TASKSTATS
-- 
cgit v1.2.3


From 8703e8a465b1e9cadc3680b4b1248f5987e54518 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/user.h>

We are going to split <linux/sched/user.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/user.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cred.h       | 2 +-
 include/linux/sched/user.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/user.h

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index f0e70a1bb3ac..045d33e48069 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,8 +18,8 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched/user.h>
 
-struct user_struct;
 struct cred;
 struct inode;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
new file mode 100644
index 000000000000..83238e5ddadd
--- /dev/null
+++ b/include/linux/sched/user.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_USER_H
+#define _LINUX_SCHED_USER_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_USER_H */
-- 
cgit v1.2.3


From 55687da166bf51129ed6b110d7711f4c7560abe2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/cpufreq.h>

We are going to split <linux/sched/cpufreq.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/cpufreq.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/cpufreq.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/cpufreq.h

(limited to 'include/linux')

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
new file mode 100644
index 000000000000..07ed9664fa20
--- /dev/null
+++ b/include/linux/sched/cpufreq.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CPUFREQ_H
+#define _LINUX_SCHED_CPUFREQ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CPUFREQ_H */
-- 
cgit v1.2.3


From 6a3827d7509cbf96b7e961f8957c1f01d1bcf894 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/numa_balancing.h>

We are going to split <linux/sched/numa_balancing.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/numa_balancing.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/numa_balancing.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/numa_balancing.h

(limited to 'include/linux')

diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
new file mode 100644
index 000000000000..999182279f78
--- /dev/null
+++ b/include/linux/sched/numa_balancing.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NUMA_BALANCING_H
+#define _LINUX_SCHED_NUMA_BALANCING_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NUMA_BALANCING_H */
-- 
cgit v1.2.3


From 1e4bae6468375f81a3d4bdf436deaa9ea294e12b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:32 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/jobctl.h>

We are going to split <linux/sched/jobctl.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/jobctl.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/jobctl.h | 4 ++++
 include/linux/sched/signal.h | 1 +
 2 files changed, 5 insertions(+)
 create mode 100644 include/linux/sched/jobctl.h

(limited to 'include/linux')

diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
new file mode 100644
index 000000000000..228e4644937b
--- /dev/null
+++ b/include/linux/sched/jobctl.h
@@ -0,0 +1,4 @@
+#ifndef _LINUX_SCHED_JOBCTL_H
+#define _LINUX_SCHED_JOBCTL_H
+
+#endif /* _LINUX_SCHED_JOBCTL_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index da69cd05cd68..ce93c4a02b79 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_SIGNAL_H
 
 #include <linux/sched.h>
+#include <linux/sched/jobctl.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
-- 
cgit v1.2.3


From 5b825c3af1d8a0af4deb4a5eb349d0d0050c62e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: sched/headers: Prepare to remove <linux/cred.h> inclusion from
 <linux/sched.h>

Add #include <linux/cred.h> dependencies to all .c files rely on sched.h
doing that for them.

Note that even if the count where we need to add extra headers seems high,
it's still a net win, because <linux/sched.h> is included in over
2,200 files ...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cred.h         | 1 +
 include/linux/sched/signal.h | 1 +
 include/linux/wait.h         | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 045d33e48069..b03e7d049a64 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,6 +18,7 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched.h>
 #include <linux/sched/user.h>
 
 struct cred;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ce93c4a02b79..0f4e9f4a43fd 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1421132e9086..aacb1282d19a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -6,6 +6,7 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
-- 
cgit v1.2.3


From 037741a6d4ab2e4b0580dae97aca963d8a8dc68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: sched/headers: Prepare for the removal of <linux/rtmutex.h> from
 <linux/sched.h>

Fix up missing #includes in other places that rely on sched.h doing that for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bed8fbb45f31..6b183521c616 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
 #include <linux/mutex.h>
+#include <linux/rtmutex.h>
 #include <linux/irqdomain.h>		/* for Host Notify IRQ */
 #include <linux/of.h>		/* for struct device_node */
 #include <linux/swab.h>		/* for swab16 */
-- 
cgit v1.2.3


From cc5efc2323a89dcf1a02c17b9b9f255c5a6e0492 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: sched/headers: Prepare for the removal of various unrelated headers
 from <linux/sched.h>

We are going to remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

Fix up a single .h file that got hold of <linux/sysctl.h> via one of these headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/user_namespace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 363e0e8082a9..08264641b502 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/sysctl.h>
 #include <linux/err.h>
 
 #define UID_GID_MAP_MAX_EXTENTS 5
-- 
cgit v1.2.3


From b12fb7f46af7d548503d75be59f493f958c6d1b3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:33 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/xacct.h>

We are going to split <linux/sched/xacct.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/xacct.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the .c file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/xacct.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/xacct.h

(limited to 'include/linux')

diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
new file mode 100644
index 000000000000..890f7ce5cd27
--- /dev/null
+++ b/include/linux/sched/xacct.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_XACCT_H
+#define _LINUX_SCHED_XACCT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_XACCT_H */
-- 
cgit v1.2.3


From 174cd4b1e5fbd0d74c68cf3a74f5bd4923485512 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: sched/headers: Prepare to move signal wakeup & sigpending methods
 from <linux/sched.h> into <linux/sched/signal.h>

Fix up affected files that include this signal functionality via sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sunrpc/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h
index d222f47550af..11a7536c0fd2 100644
--- a/include/linux/sunrpc/types.h
+++ b/include/linux/sunrpc/types.h
@@ -10,6 +10,7 @@
 #define _LINUX_SUNRPC_TYPES_H_
 
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/list.h>
-- 
cgit v1.2.3


From fd7712337ff09a248df424c5843c149586a3f017 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: sched/headers: Prepare to remove the <linux/gfp.h> include from
 <linux/sched.h>

<linux/topology.h> is still needed - also update other headers
and .c files that depend on sched.h including gfp.h (and its
sub-headers) for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 1 +
 include/linux/sched/mm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e87c97e1a947..8ae7b3d85658 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -56,6 +56,7 @@ struct sched_param {
 #include <linux/llist.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a7adba1cd0a9..1cf7941bb946 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 03441a3482a31462c93509939a388877e3cd9261 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/stat.h>

We are going to split <linux/sched/stat.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/stat.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/stat.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/stat.h

(limited to 'include/linux')

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
new file mode 100644
index 000000000000..30fe012aecb0
--- /dev/null
+++ b/include/linux/sched/stat.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_STAT_H
+#define _LINUX_SCHED_STAT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_STAT_H */
-- 
cgit v1.2.3


From 370c91355c76cbcaad8ff79b4bb15a7f2ea59433 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/nohz.h>

We are going to split <linux/sched/nohz.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/nohz.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/nohz.h

(limited to 'include/linux')

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
new file mode 100644
index 000000000000..fe6caf0dab10
--- /dev/null
+++ b/include/linux/sched/nohz.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NOHZ_H
+#define _LINUX_SCHED_NOHZ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NOHZ_H */
-- 
cgit v1.2.3


From b17b01533b719e9949e437abf66436a875739b40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/debug.h>

We are going to split <linux/sched/debug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/debug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/debug.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/debug.h

(limited to 'include/linux')

diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
new file mode 100644
index 000000000000..55bc0cd35fd5
--- /dev/null
+++ b/include/linux/sched/debug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_DEBUG_H
+#define _LINUX_SCHED_DEBUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_DEBUG_H */
-- 
cgit v1.2.3


From ef8bd77f332bb0a4e467d7171bbfc6c57aa08a88 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/hotplug.h>

We are going to split <linux/sched/hotplug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/hotplug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/hotplug.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/hotplug.h

(limited to 'include/linux')

diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
new file mode 100644
index 000000000000..34670ed24894
--- /dev/null
+++ b/include/linux/sched/hotplug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_HOTPLUG_H
+#define _LINUX_SCHED_HOTPLUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_HOTPLUG_H */
-- 
cgit v1.2.3


From 299300258d1bc4e997b7db340a2e06636757fe2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/task.h>

We are going to split <linux/sched/task.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 include/linux/sched/task.h

(limited to 'include/linux')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
new file mode 100644
index 000000000000..0023c91ff821
--- /dev/null
+++ b/include/linux/sched/task.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_H
+#define _LINUX_SCHED_TASK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_H */
-- 
cgit v1.2.3


From 68db0cf10678630d286f4bbbbdfa102951a35faa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:37 +0100
Subject: sched/headers: Prepare for new header dependencies before moving code
 to <linux/sched/task_stack.h>

We are going to split <linux/sched/task_stack.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task_stack.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/elfcore.h          | 2 ++
 include/linux/perf_regs.h        | 2 ++
 include/linux/sched/task_stack.h | 6 ++++++
 3 files changed, 10 insertions(+)
 create mode 100644 include/linux/sched/task_stack.h

(limited to 'include/linux')

diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 698d51a0eea3..c8240a12c42d 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -3,6 +3,8 @@
 
 #include <linux/user.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/elf.h>
 #include <uapi/linux/elfcore.h>
 
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index a5f98d53d732..9b7dd59fe28d 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PERF_REGS_H
 #define _LINUX_PERF_REGS_H
 
+#include <linux/sched/task_stack.h>
+
 struct perf_regs {
 	__u64		abi;
 	struct pt_regs	*regs;
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
new file mode 100644
index 000000000000..933cd49824c2
--- /dev/null
+++ b/include/linux/sched/task_stack.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_STACK_H
+#define _LINUX_SCHED_TASK_STACK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
cgit v1.2.3


From dfc3401a33086a3fd465468e171ea0e82430569b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: sched/headers: Prepare to move the 'root_task_group' declaration to
 <linux/sched/autogroup.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a85d61f7614..452c9799318c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <linux/rbtree.h>
+#include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
 
-- 
cgit v1.2.3


From e6d930b4e0115eb0732eec0efb11c3b09a8000fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: signals: Prepare to split out <linux/signal_types.h> from
 <linux/signal.h>

Introduce dummy header and add dependencies to places that will depend on it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/signal.h       | 3 +--
 include/linux/signal_types.h | 7 +++++++
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/signal_types.h

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5308304993be..d1c2b05a7a55 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SIGNAL_H
 #define _LINUX_SIGNAL_H
 
-#include <linux/list.h>
 #include <linux/bug.h>
-#include <uapi/linux/signal.h>
+#include <linux/signal_types.h>
 
 struct task_struct;
 
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
new file mode 100644
index 000000000000..28799c88f490
--- /dev/null
+++ b/include/linux/signal_types.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SIGNAL_TYPES_H
+#define _LINUX_SIGNAL_TYPES_H
+
+#include <linux/list.h>
+#include <uapi/linux/signal.h>
+
+#endif /* _LINUX_SIGNAL_TYPES_H */
-- 
cgit v1.2.3


From f361bf4a66c9bfabace46f6ff5d97005c9b524fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: sched/headers: Prepare for the reduction of <linux/sched.h>'s signal
 API dependency

Instead of including the full <linux/signal.h>, we are going to include the
types-only <linux/signal_types.h> header in <linux/sched.h>, to further
decouple the scheduler header from the signal headers.

This means that various files which relied on the full <linux/signal.h> need
to be updated to gain an explicit dependency on it.

Update the code that relies on sched.h's inclusion of the <linux/signal.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 1 +
 include/linux/sched/signal.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ae7b3d85658..ea05116bc3c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,6 +36,7 @@ struct sched_param {
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0f4e9f4a43fd..7e10a7824523 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
-- 
cgit v1.2.3


From 2e58f173ab89b29a1373088b8727133dbf7322b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: mm/headers, sched/headers: Prepare to split <linux/mm_types_task.h>
 out of <linux/mm_types.h>

We are going to separate all the MM types that are embedded directly in 'struct task_struct'
into the new <linux/mm_types_task.h> header.

Create a new <linux/mm_types_task.h> that only contains some includes from mm_types.h itself.

This should be trivially correct and easy to bisect to.

(This patch does not materially move the types yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      |  6 +++---
 include/linux/mm_types_task.h | 10 ++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/mm_types_task.h

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 137797cd7b50..6daaf0a51968 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1,9 +1,9 @@
 #ifndef _LINUX_MM_TYPES_H
 #define _LINUX_MM_TYPES_H
 
+#include <linux/mm_types_task.h>
+
 #include <linux/auxvec.h>
-#include <linux/types.h>
-#include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
@@ -13,7 +13,7 @@
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
-#include <asm/page.h>
+
 #include <asm/mmu.h>
 
 #ifndef AT_VECTOR_SIZE_ARCH
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
new file mode 100644
index 000000000000..2419d302f03c
--- /dev/null
+++ b/include/linux/mm_types_task.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_MM_TYPES_TASK_H
+#define _LINUX_MM_TYPES_TASK_H
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+
+#endif /* _LINUX_MM_TYPES_TASK_H */
-- 
cgit v1.2.3


From 589ee62844e042b0b7d19ef57fb4cff77f3ca294 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: sched/headers: Prepare to remove the <linux/mm_types.h> dependency
 from <linux/sched.h>

Update code that relied on sched.h including various MM types for them.

This will allow us to remove the <linux/mm_types.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 include/linux/sched/mm.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 452c9799318c..f6841c19c913 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,7 @@
 #include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
+#include <linux/mm_types.h>
 
 #include <asm/thread_info.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1cf7941bb946..d32e3932b2e3 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,6 +2,7 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 9164bb4a18dfa592cd0aca455ea57abf89ca4526 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: sched/headers: Prepare to move 'init_task' and 'init_thread_union'
 from <linux/sched.h> to <linux/sched/task.h>

Update all usage sites first.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/signal.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7e10a7824523..320eca0446cd 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -5,5 +5,6 @@
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
+#include <linux/sched/task.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
-- 
cgit v1.2.3


From b2d091031075ac9a1598e3cc3a29c28f02e64c0d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: sched/headers: Prepare to use <linux/rcuupdate.h> instead of
 <linux/rculist.h> in <linux/sched.h>

We don't actually need the full rculist.h header in sched.h anymore,
we will be able to include the smaller rcupdate.h header instead.

But first update code that relied on the implicit header inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/dmar.h         | 2 +-
 include/linux/pid.h          | 2 +-
 include/linux/rhashtable.h   | 2 +-
 include/linux/sched/signal.h | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e9bc9292bd3a..e8ffba1052d3 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -26,7 +26,7 @@
 #include <linux/msi.h>
 #include <linux/irqreturn.h>
 #include <linux/rwsem.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 struct acpi_dmar_header;
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 298ead5512e5..4d179316e431 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 enum pid_type
 {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f2e12a845910..092292b6675e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -25,7 +25,7 @@
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 /*
  * The end of the chain is marked with a special nulls marks which has
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 320eca0446cd..c6958a53fef3 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/rculist.h>
 #include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
-- 
cgit v1.2.3


From f719ff9bcee2a422647790f12d53d3755f47c727 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: sched/headers: Prepare to move the task_lock()/unlock() APIs to
 <linux/sched/task.h>

But first update the code that uses these facilities with the
new header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index c608c39cb161..611fce58d670 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/task.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
-- 
cgit v1.2.3


From 32ef5517c298042ed58408545f475df43afe1f24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: sched/headers: Prepare to move cputime functionality from
 <linux/sched.h> into <linux/sched/cputime.h>

Introduce a trivial, mostly empty <linux/sched/cputime.h> header
to prepare for the moving of cputime functionality out of sched.h.

Update all code that relies on these facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/cputime.h | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 include/linux/sched/cputime.h

(limited to 'include/linux')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
new file mode 100644
index 000000000000..6ed4fe43de28
--- /dev/null
+++ b/include/linux/sched/cputime.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SCHED_CPUTIME_H
+#define _LINUX_SCHED_CPUTIME_H
+
+#include <linux/sched/signal.h>
+#include <linux/cputime.h>
+
+#endif /* _LINUX_SCHED_CPUTIME_H */
-- 
cgit v1.2.3


From 1777e4635507265ba53d8dc4cd248e7d7c306fa0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: sched/headers: Prepare to move _init() prototypes from
 <linux/sched.h> to <linux/sched/init.h>

But first introduce a trivial header and update usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpu.h        | 2 ++
 include/linux/sched/init.h | 6 ++++++
 2 files changed, 8 insertions(+)
 create mode 100644 include/linux/sched/init.h

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 21f9c74496e7..f92081234afd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,6 +30,8 @@ struct cpu {
 
 extern void boot_cpu_init(void);
 extern void boot_cpu_state_init(void);
+extern void cpu_init(void);
+extern void trap_init(void);
 
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
new file mode 100644
index 000000000000..15e46313e55e
--- /dev/null
+++ b/include/linux/sched/init.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_INIT_H
+#define _LINUX_SCHED_INIT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_INIT_H */
-- 
cgit v1.2.3


From 50d34394cee68dd12c5e01fff073d1167700bfce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: sched/headers: Prepare to remove the <linux/magic.h> include from
 <linux/sched/task_stack.h>

Update files that depend on the magic.h inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task_stack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 933cd49824c2..d2d578e85f7d 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_TASK_STACK_H
 
 #include <linux/sched.h>
+#include <linux/magic.h>
 
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
cgit v1.2.3


From b69339ba109549beeaf45c45c52ac7025bfcd954 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:15:03 +0100
Subject: sched/headers: Prepare to remove spurious <linux/sched.h> inclusion
 dependencies

In the following patches we are going to remove various headers
from sched.h and other headers that sched.h includes.

To make those patches build cleanly prepare the scene by adding
dependencies to various files that learned to rely on those
to-be-removed dependencies.

These changes all make sense standalone: they add a header for
a data type that a particular .c or .h file is using.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea05116bc3c2..b5c6d602dfe9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@ struct sched_param {
 #include <linux/types.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
+#include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/rbtree.h>
 #include <linux/thread_info.h>
-- 
cgit v1.2.3


From a60b9eda67beac2318fa159545ba7d022f780736 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Move scheduler topology interfaces to
 <linux/sched/topology.h>

The vast majority of sched.h users does not require the topology types and
interfaces, so split them out into <linux/sched/topology.h>.

This reduces the size of linux/sched.h by ~6%.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 212 ----------------------------------------
 include/linux/sched/topology.h | 213 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b5c6d602dfe9..55480782ce7f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -960,12 +960,6 @@ enum cpu_idle_type {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Increase resolution of cpu_capacity calculations
- */
-#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
-#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
-
 /*
  * Wake-queues are lists of tasks with a pending wakeup, whose
  * callers have already marked the task as woken internally,
@@ -1016,214 +1010,8 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-#ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
-#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
-#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
-#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
-#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
-#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
-#define SD_NUMA			0x4000	/* cross-node balancing */
-
-#ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
-	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
-	return SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
-	return SD_NUMA;
-}
-#endif
-
-extern int arch_asym_cpu_priority(int cpu);
-
-struct sched_domain_attr {
-	int relax_domain_level;
-};
-
-#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
-	.relax_domain_level = -1,			\
-}
-
-extern int sched_domain_level_max;
-
-struct sched_group;
-
-struct sched_domain_shared {
-	atomic_t	ref;
-	atomic_t	nr_busy_cpus;
-	int		has_idle_cores;
-};
-
-struct sched_domain {
-	/* These fields must be setup */
-	struct sched_domain *parent;	/* top domain must be null terminated */
-	struct sched_domain *child;	/* bottom domain must be null terminated */
-	struct sched_group *groups;	/* the balancing groups of the domain */
-	unsigned long min_interval;	/* Minimum balance interval ms */
-	unsigned long max_interval;	/* Maximum balance interval ms */
-	unsigned int busy_factor;	/* less balancing by factor if busy */
-	unsigned int imbalance_pct;	/* No balance until over watermark */
-	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
-	unsigned int busy_idx;
-	unsigned int idle_idx;
-	unsigned int newidle_idx;
-	unsigned int wake_idx;
-	unsigned int forkexec_idx;
-	unsigned int smt_gain;
-
-	int nohz_idle;			/* NOHZ IDLE status */
-	int flags;			/* See SD_* */
-	int level;
-
-	/* Runtime fields. */
-	unsigned long last_balance;	/* init to jiffies. units in jiffies */
-	unsigned int balance_interval;	/* initialise to 1. units in ms. */
-	unsigned int nr_balance_failed; /* initialise to 0 */
-
-	/* idle_balance() stats */
-	u64 max_newidle_lb_cost;
-	unsigned long next_decay_max_lb_cost;
-
-	u64 avg_scan_cost;		/* select_idle_sibling */
-
-#ifdef CONFIG_SCHEDSTATS
-	/* load_balance() stats */
-	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
-
-	/* Active load balancing */
-	unsigned int alb_count;
-	unsigned int alb_failed;
-	unsigned int alb_pushed;
-
-	/* SD_BALANCE_EXEC stats */
-	unsigned int sbe_count;
-	unsigned int sbe_balanced;
-	unsigned int sbe_pushed;
-
-	/* SD_BALANCE_FORK stats */
-	unsigned int sbf_count;
-	unsigned int sbf_balanced;
-	unsigned int sbf_pushed;
-
-	/* try_to_wake_up() stats */
-	unsigned int ttwu_wake_remote;
-	unsigned int ttwu_move_affine;
-	unsigned int ttwu_move_balance;
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-	char *name;
-#endif
-	union {
-		void *private;		/* used during construction */
-		struct rcu_head rcu;	/* used during destruction */
-	};
-	struct sched_domain_shared *shared;
-
-	unsigned int span_weight;
-	/*
-	 * Span of all CPUs in this domain.
-	 *
-	 * NOTE: this field is variable length. (Allocated dynamically
-	 * by attaching extra space to the end of the structure,
-	 * depending on how many CPUs the kernel has booted up with)
-	 */
-	unsigned long span[0];
-};
-
-static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-{
-	return to_cpumask(sd->span);
-}
-
-extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-				    struct sched_domain_attr *dattr_new);
-
-/* Allocate an array of sched domains, for partition_sched_domains(). */
-cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
-void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
-
-bool cpus_share_cache(int this_cpu, int that_cpu);
-
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-typedef int (*sched_domain_flags_f)(void);
-
-#define SDTL_OVERLAP	0x01
-
-struct sd_data {
-	struct sched_domain **__percpu sd;
-	struct sched_domain_shared **__percpu sds;
-	struct sched_group **__percpu sg;
-	struct sched_group_capacity **__percpu sgc;
-};
-
-struct sched_domain_topology_level {
-	sched_domain_mask_f mask;
-	sched_domain_flags_f sd_flags;
-	int		    flags;
-	int		    numa_level;
-	struct sd_data      data;
-#ifdef CONFIG_SCHED_DEBUG
-	char                *name;
-#endif
-};
-
-extern void set_sched_topology(struct sched_domain_topology_level *tl);
 extern void wake_up_if_idle(int cpu);
 
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(type)		.name = #type
-#else
-# define SD_INIT_NAME(type)
-#endif
-
-#else /* CONFIG_SMP */
-
-struct sched_domain_attr;
-
-static inline void
-partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-			struct sched_domain_attr *dattr_new)
-{
-}
-
-static inline bool cpus_share_cache(int this_cpu, int that_cpu)
-{
-	return true;
-}
-
-#endif	/* !CONFIG_SMP */
-
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 184b56b8aa64..b3550b36e65d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -5,4 +5,217 @@
 
 #include <linux/sched/idle.h>
 
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+
+#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
+#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
+#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
+#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
+#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x4000	/* cross-node balancing */
+
+/*
+ * Increase resolution of cpu_capacity calculations
+ */
+#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
+
+#ifdef CONFIG_SCHED_SMT
+static inline int cpu_smt_flags(void)
+{
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline int cpu_core_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline int cpu_numa_flags(void)
+{
+	return SD_NUMA;
+}
+#endif
+
+extern int arch_asym_cpu_priority(int cpu);
+
+struct sched_domain_attr {
+	int relax_domain_level;
+};
+
+#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
+	.relax_domain_level = -1,			\
+}
+
+extern int sched_domain_level_max;
+
+struct sched_group;
+
+struct sched_domain_shared {
+	atomic_t	ref;
+	atomic_t	nr_busy_cpus;
+	int		has_idle_cores;
+};
+
+struct sched_domain {
+	/* These fields must be setup */
+	struct sched_domain *parent;	/* top domain must be null terminated */
+	struct sched_domain *child;	/* bottom domain must be null terminated */
+	struct sched_group *groups;	/* the balancing groups of the domain */
+	unsigned long min_interval;	/* Minimum balance interval ms */
+	unsigned long max_interval;	/* Maximum balance interval ms */
+	unsigned int busy_factor;	/* less balancing by factor if busy */
+	unsigned int imbalance_pct;	/* No balance until over watermark */
+	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
+	unsigned int forkexec_idx;
+	unsigned int smt_gain;
+
+	int nohz_idle;			/* NOHZ IDLE status */
+	int flags;			/* See SD_* */
+	int level;
+
+	/* Runtime fields. */
+	unsigned long last_balance;	/* init to jiffies. units in jiffies */
+	unsigned int balance_interval;	/* initialise to 1. units in ms. */
+	unsigned int nr_balance_failed; /* initialise to 0 */
+
+	/* idle_balance() stats */
+	u64 max_newidle_lb_cost;
+	unsigned long next_decay_max_lb_cost;
+
+	u64 avg_scan_cost;		/* select_idle_sibling */
+
+#ifdef CONFIG_SCHEDSTATS
+	/* load_balance() stats */
+	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+
+	/* Active load balancing */
+	unsigned int alb_count;
+	unsigned int alb_failed;
+	unsigned int alb_pushed;
+
+	/* SD_BALANCE_EXEC stats */
+	unsigned int sbe_count;
+	unsigned int sbe_balanced;
+	unsigned int sbe_pushed;
+
+	/* SD_BALANCE_FORK stats */
+	unsigned int sbf_count;
+	unsigned int sbf_balanced;
+	unsigned int sbf_pushed;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_wake_remote;
+	unsigned int ttwu_move_affine;
+	unsigned int ttwu_move_balance;
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+	char *name;
+#endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
+	struct sched_domain_shared *shared;
+
+	unsigned int span_weight;
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 */
+	unsigned long span[0];
+};
+
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+				    struct sched_domain_attr *dattr_new);
+
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP	0x01
+
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_domain_shared **__percpu sds;
+	struct sched_group **__percpu sg;
+	struct sched_group_capacity **__percpu sgc;
+};
+
+struct sched_domain_topology_level {
+	sched_domain_mask_f mask;
+	sched_domain_flags_f sd_flags;
+	int		    flags;
+	int		    numa_level;
+	struct sd_data      data;
+#ifdef CONFIG_SCHED_DEBUG
+	char                *name;
+#endif
+};
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type)		.name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
+#else /* CONFIG_SMP */
+
+struct sched_domain_attr;
+
+static inline void
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+			struct sched_domain_attr *dattr_new)
+{
+}
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+	return true;
+}
+
+#endif	/* !CONFIG_SMP */
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
cgit v1.2.3


From b768917d2c08edc4b5616c86a569e524d190434b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:51:00 +0100
Subject: sched/headers: Move the 'cpu_idle_type' enum from <linux/sched.h> to
 <linux/sched/idle.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/idle.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55480782ce7f..8a8252e9e0c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -943,13 +943,6 @@ static inline int sched_info_on(void)
 void force_schedstat_enabled(void);
 #endif
 
-enum cpu_idle_type {
-	CPU_IDLE,
-	CPU_NOT_IDLE,
-	CPU_NEWLY_IDLE,
-	CPU_MAX_IDLE_TYPES
-};
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index a3cf79b86986..6aabc9968cba 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+enum cpu_idle_type {
+	CPU_IDLE,
+	CPU_NOT_IDLE,
+	CPU_NEWLY_IDLE,
+	CPU_MAX_IDLE_TYPES
+};
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
cgit v1.2.3


From 4437722b0486c36dc90a1adf584fca4cea6cf1de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:58:52 +0100
Subject: sched/headers: Move the wake_up_if_idle() prototype to
 <linux/sched/idle.h>

No need to clutter <linux/sched.h> with this rarely used prototype.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 2 --
 include/linux/sched/idle.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a8252e9e0c7..c30705c3e553 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1003,8 +1003,6 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-extern void wake_up_if_idle(int cpu);
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 6aabc9968cba..72b6b6ab6354 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -10,4 +10,6 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
+extern void wake_up_if_idle(int cpu);
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
cgit v1.2.3


From 5dbe91de599423d243738a205367506444ebb4a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:47:28 +0100
Subject: sched/headers: Move idle polling methods to <linux/sched/idle.h>

Further reduce the size of <linux/sched.h> by moving these APIs:

	tsk_is_polling()
	__current_set_polling()
	current_set_polling_and_test()
	__current_clr_polling()
	current_clr_polling_and_test()
	current_clr_polling()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 76 ----------------------------------------------
 include/linux/sched/idle.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30705c3e553..4f512e337584 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3176,82 +3176,6 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
-/*
- * Idle thread specific functions to determine the need_resched
- * polling state.
- */
-#ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
-
-static inline void __current_set_polling(void)
-{
-	set_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	__current_set_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-static inline void __current_clr_polling(void)
-{
-	clear_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-#else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void __current_set_polling(void) { }
-static inline void __current_clr_polling(void) { }
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-#endif
-
-static inline void current_clr_polling(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
-	 * Once the bit is cleared, we'll get IPIs with every new
-	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
-	 * fold.
-	 */
-	smp_mb(); /* paired with resched_curr() */
-
-	preempt_fold_need_resched();
-}
-
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 72b6b6ab6354..66ee32f87f0b 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -12,4 +12,80 @@ enum cpu_idle_type {
 
 extern void wake_up_if_idle(int cpu);
 
+/*
+ * Idle thread specific functions to determine the need_resched
+ * polling state.
+ */
+#ifdef TIF_POLLING_NRFLAG
+static inline int tsk_is_polling(struct task_struct *p)
+{
+	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+}
+
+static inline void __current_set_polling(void)
+{
+	set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
+{
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+#else
+static inline int tsk_is_polling(struct task_struct *p) { return 0; }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+#endif
+
+static inline void current_clr_polling(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+	 * Once the bit is cleared, we'll get IPIs with every new
+	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+	 * fold.
+	 */
+	smp_mb(); /* paired with resched_curr() */
+
+	preempt_fold_need_resched();
+}
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
cgit v1.2.3


From eb61baf69871b9836783a81bc451189edb0d9de2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 17:09:06 +0100
Subject: sched/headers: Move the wake-queue types and interfaces from sched.h
 into <linux/sched/wake_q.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 54 ++++----------------------------------------
 include/linux/sched/wake_q.h | 47 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f512e337584..5cda7cf09505 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -953,56 +953,6 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Wake-queues are lists of tasks with a pending wakeup, whose
- * callers have already marked the task as woken internally,
- * and can thus carry on. A common use case is being able to
- * do the wakeups once the corresponding user lock as been
- * released.
- *
- * We hold reference to each task in the list across the wakeup,
- * thus guaranteeing that the memory is still valid by the time
- * the actual wakeups are performed in wake_up_q().
- *
- * One per task suffices, because there's never a need for a task to be
- * in two wake queues simultaneously; it is forbidden to abandon a task
- * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
- * already in a wake queue, the wakeup will happen soon and the second
- * waker can just skip it.
- *
- * The DEFINE_WAKE_Q macro declares and initializes the list head.
- * wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function. Otherwise, the list can be
- * re-initialized for later re-use by wake_q_init().
- *
- * Note that this can cause spurious wakeups. schedule() callers
- * must ensure the call is done inside a loop, confirming that the
- * wakeup condition has in fact occurred.
- */
-struct wake_q_node {
-	struct wake_q_node *next;
-};
-
-struct wake_q_head {
-	struct wake_q_node *first;
-	struct wake_q_node **lastp;
-};
-
-#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
-
-#define DEFINE_WAKE_Q(name)				\
-	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
-
-static inline void wake_q_init(struct wake_q_head *head)
-{
-	head->first = WAKE_Q_TAIL;
-	head->lastp = &head->first;
-}
-
-extern void wake_q_add(struct wake_q_head *head,
-		       struct task_struct *task);
-extern void wake_up_q(struct wake_q_head *head);
-
 struct io_context;			/* See blkdev.h */
 
 
@@ -1234,6 +1184,10 @@ enum perf_event_task_context {
 	perf_nr_task_contexts,
 };
 
+struct wake_q_node {
+	struct wake_q_node *next;
+};
+
 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
 	/*
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 6383b35e4eba..d03d8a9047dc 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -1,6 +1,53 @@
 #ifndef _LINUX_SCHED_WAKE_Q_H
 #define _LINUX_SCHED_WAKE_Q_H
 
+/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+
 #include <linux/sched.h>
 
+struct wake_q_head {
+	struct wake_q_node *first;
+	struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define DEFINE_WAKE_Q(name)				\
+	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+static inline void wake_q_init(struct wake_q_head *head)
+{
+	head->first = WAKE_Q_TAIL;
+	head->lastp = &head->first;
+}
+
+extern void wake_q_add(struct wake_q_head *head,
+		       struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
 #endif /* _LINUX_SCHED_WAKE_Q_H */
-- 
cgit v1.2.3


From 5689810360c2e88ce1619e8bcfa859852f9a1d1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 7 Feb 2017 12:07:18 +0100
Subject: sched/headers: Move scheduler clock interfaces to
 <linux/sched/clock.h>

Move the sched_clock interfaces into a separate header file, to reduce
the size of sched.h.

Include <linux/sched/clock.h> in all files that made use of one of the
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 100 --------------------------------------------
 include/linux/sched/clock.h |  98 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5cda7cf09505..b42d2f9d673a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2190,103 +2190,6 @@ static inline void calc_load_exit_idle(void) { }
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/*
- * Do not use outside of architecture code which knows its limitations.
- *
- * sched_clock() has no promise of monotonicity or bounded drift between
- * CPUs, use (which you should not) requires disabling IRQs.
- *
- * Please use one of the three interfaces below.
- */
-extern unsigned long long notrace sched_clock(void);
-/*
- * See the comment in kernel/sched/clock.c
- */
-extern u64 running_clock(void);
-extern u64 sched_clock_cpu(int cpu);
-
-
-extern void sched_clock_init(void);
-
-#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
-static inline void sched_clock_tick(void)
-{
-}
-
-static inline void clear_sched_clock_stable(void)
-{
-}
-
-static inline void sched_clock_idle_sleep_event(void)
-{
-}
-
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
-}
-
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock();
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock();
-}
-#else
-extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
-extern int sched_clock_stable(void);
-extern void clear_sched_clock_stable(void);
-
-extern void sched_clock_tick(void);
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
-/*
- * As outlined in clock.c, provides a fast, high resolution, nanosecond
- * time source that is monotonic per cpu argument and has bounded drift
- * between cpus.
- *
- * ######################### BIG FAT WARNING ##########################
- * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
- * # go backwards !!                                                  #
- * ####################################################################
- */
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock_cpu(cpu);
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock_cpu(raw_smp_processor_id());
-}
-#endif
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
-#else
-static inline void enable_sched_clock_irqtime(void) {}
-static inline void disable_sched_clock_irqtime(void) {}
-#endif
-
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
@@ -2297,9 +2200,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern void idle_task_exit(void);
 #else
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 7cb7d79b2cf9..ac12f71d359c 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -3,4 +3,102 @@
 
 #include <linux/sched.h>
 
+/*
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
+ */
+extern unsigned long long notrace sched_clock(void);
+
+/*
+ * See the comment in kernel/sched/clock.c
+ */
+extern u64 running_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
+
+extern void sched_clock_init(void);
+
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void clear_sched_clock_stable(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock();
+}
+#else
+extern void sched_clock_init_late(void);
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
+
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock_cpu(raw_smp_processor_id());
+}
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
+ * The reason for this explicit opt-in is not to have perf penalty with
+ * slow sched_clocks.
+ */
+extern void enable_sched_clock_irqtime(void);
+extern void disable_sched_clock_irqtime(void);
+#else
+static inline void enable_sched_clock_irqtime(void) {}
+static inline void disable_sched_clock_irqtime(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_CLOCK_H */
-- 
cgit v1.2.3


From 47913d4ebd99c827c82c4f29eb282a119c3f2aeb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:00:26 +0100
Subject: sched/headers, delayacct: Move the 'struct task_delay_info'
 definition from <linux/sched.h> to <linux/delayacct.h>

The 'struct task_delay_info' definition does not have to be in sched.h,
because task_struct only has a pointer to it.

So move it to <linux/delayacct.h> to reduce the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/sched.h     | 39 ++++-----------------------------------
 2 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 00e60f79a9cc..6b769cbd1000 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -30,7 +30,43 @@
 #define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+	spinlock_t	lock;
+	unsigned int	flags;	/* Private per-task flags */
+
+	/* For each stat XXX, add following, aligned appropriately
+	 *
+	 * struct timespec XXX_start, XXX_end;
+	 * u64 XXX_delay;
+	 * u32 XXX_count;
+	 *
+	 * Atomicity of updates to XXX_delay, XXX_count protected by
+	 * single lock above (split into XXX_lock if contention is an issue).
+	 */
+
+	/*
+	 * XXX_count is incremented on every XXX operation, the delay
+	 * associated with the operation is added to XXX_delay.
+	 * XXX_delay contains the accumulated delay time in nanoseconds.
+	 */
+	u64 blkio_start;	/* Shared by blkio, swapin */
+	u64 blkio_delay;	/* wait for sync block io completion */
+	u64 swapin_delay;	/* wait for swapin block io completion */
+	u32 blkio_count;	/* total count of the number of sync block */
+				/* io operations performed */
+	u32 swapin_count;	/* total count of the number of swapin block */
+				/* io operations performed */
+
+	u64 freepages_start;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
+};
+#endif
 
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b42d2f9d673a..7d6998858fa3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -893,39 +893,7 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-#ifdef CONFIG_TASK_DELAY_ACCT
-struct task_delay_info {
-	spinlock_t	lock;
-	unsigned int	flags;	/* Private per-task flags */
-
-	/* For each stat XXX, add following, aligned appropriately
-	 *
-	 * struct timespec XXX_start, XXX_end;
-	 * u64 XXX_delay;
-	 * u32 XXX_count;
-	 *
-	 * Atomicity of updates to XXX_delay, XXX_count protected by
-	 * single lock above (split into XXX_lock if contention is an issue).
-	 */
-
-	/*
-	 * XXX_count is incremented on every XXX operation, the delay
-	 * associated with the operation is added to XXX_delay.
-	 * XXX_delay contains the accumulated delay time in nanoseconds.
-	 */
-	u64 blkio_start;	/* Shared by blkio, swapin */
-	u64 blkio_delay;	/* wait for sync block io completion */
-	u64 swapin_delay;	/* wait for swapin block io completion */
-	u32 blkio_count;	/* total count of the number of sync block */
-				/* io operations performed */
-	u32 swapin_count;	/* total count of the number of swapin block */
-				/* io operations performed */
-
-	u64 freepages_start;
-	u64 freepages_delay;	/* wait for memory reclaim */
-	u32 freepages_count;	/* total count of memory reclaim */
-};
-#endif	/* CONFIG_TASK_DELAY_ACCT */
+struct task_delay_info;
 
 static inline int sched_info_on(void)
 {
@@ -1612,9 +1580,10 @@ struct task_struct {
 
 	struct page_frag task_frag;
 
-#ifdef	CONFIG_TASK_DELAY_ACCT
-	struct task_delay_info *delays;
+#ifdef CONFIG_TASK_DELAY_ACCT
+	struct task_delay_info		*delays;
 #endif
+
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
-- 
cgit v1.2.3


From e2d1e2aec572a2138dea74d53be54a1406d419c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:07:51 +0100
Subject: sched/headers: Move various ABI definitions to
 <uapi/linux/sched/types.h>

Move scheduler ABI types (struct sched_attr, struct sched_param, etc.) into
the new UAPI header.

This further reduces the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 70 ++-------------------------------------------------
 1 file changed, 2 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d6998858fa3..5c481906e835 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,11 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-
-struct sched_param {
-	int sched_priority;
-};
-
 #include <asm/param.h>	/* for HZ */
 
 #include <linux/capability.h>
@@ -64,69 +59,8 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
-
-/*
- * Extended scheduling parameters data structure.
- *
- * This is needed because the original struct sched_param can not be
- * altered without introducing ABI issues with legacy applications
- * (e.g., in sched_getparam()).
- *
- * However, the possibility of specifying more than just a priority for
- * the tasks may be useful for a wide variety of application fields, e.g.,
- * multimedia, streaming, automation and control, and many others.
- *
- * This variant (sched_attr) is meant at describing a so-called
- * sporadic time-constrained task. In such model a task is specified by:
- *  - the activation period or minimum instance inter-arrival time;
- *  - the maximum (or average, depending on the actual scheduling
- *    discipline) computation time of all instances, a.k.a. runtime;
- *  - the deadline (relative to the actual activation time) of each
- *    instance.
- * Very briefly, a periodic (sporadic) task asks for the execution of
- * some specific computation --which is typically called an instance--
- * (at most) every period. Moreover, each instance typically lasts no more
- * than the runtime and must be completed by time instant t equal to
- * the instance activation time + the deadline.
- *
- * This is reflected by the actual fields of the sched_attr structure:
- *
- *  @size		size of the structure, for fwd/bwd compat.
- *
- *  @sched_policy	task's scheduling policy
- *  @sched_flags	for customizing the scheduler behaviour
- *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
- *  @sched_priority	task's static priority (SCHED_FIFO/RR)
- *  @sched_deadline	representative of the task's deadline
- *  @sched_runtime	representative of the task's runtime
- *  @sched_period	representative of the task's period
- *
- * Given this task model, there are a multiplicity of scheduling algorithms
- * and policies, that can be used to ensure all the tasks will make their
- * timing constraints.
- *
- * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
- * only user of this new interface. More information about the algorithm
- * available in the scheduling class file or in Documentation/.
- */
-struct sched_attr {
-	u32 size;
-
-	u32 sched_policy;
-	u64 sched_flags;
-
-	/* SCHED_NORMAL, SCHED_BATCH */
-	s32 sched_nice;
-
-	/* SCHED_FIFO, SCHED_RR */
-	u32 sched_priority;
-
-	/* SCHED_DEADLINE */
-	u64 sched_runtime;
-	u64 sched_deadline;
-	u64 sched_period;
-};
+struct sched_attr;
+struct sched_param;
 
 struct futex_pi_state;
 struct robust_list_head;
-- 
cgit v1.2.3


From dea38c74cb9205341f52b8d8ae18f61247a43ea8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:25:37 +0100
Subject: sched/headers: Move loadavg related definitions from <linux/sched.h>
 to <linux/sched/loadavg.h>

Move these bits to <linux/sched/loadavg.h>, to reduce the size and
complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 27 ---------------------------
 include/linux/sched/loadavg.h | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c481906e835..78adf7a0cac1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,31 +71,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-/*
- * These are the constant used to fake the fixed-point load-average
- * counting. Some notes:
- *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
- *    a load-average precision of 10 bits integer + 11 bits fractional
- *  - if you want to count load-averages more often, you need more
- *    precision, or rounding will get you. With 2-second counting freq,
- *    the EXP_n values would be 1981, 2034 and 2043 if still using only
- *    11 bit fractions.
- */
-extern unsigned long avenrun[];		/* Load averages */
-extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
-
-#define FSHIFT		11		/* nr of bits of precision */
-#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
-#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
-#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
-#define EXP_5		2014		/* 1/exp(5sec/5min) */
-#define EXP_15		2037		/* 1/exp(5sec/15min) */
-
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
-
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -106,8 +81,6 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
-extern void calc_global_load(unsigned long ticks);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index 3ae74be327e8..c392e28ce0ac 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -3,4 +3,31 @@
 
 #include <linux/sched.h>
 
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ *    a load-average precision of 10 bits integer + 11 bits fractional
+ *  - if you want to count load-averages more often, you need more
+ *    precision, or rounding will get you. With 2-second counting freq,
+ *    the EXP_n values would be 1981, 2034 and 2043 if still using only
+ *    11 bit fractions.
+ */
+extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+
+#define FSHIFT		11		/* nr of bits of precision */
+#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
+#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5		2014		/* 1/exp(5sec/5min) */
+#define EXP_15		2037		/* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+	load *= exp; \
+	load += n*(FIXED_1-exp); \
+	load >>= FSHIFT;
+
+extern void calc_global_load(unsigned long ticks);
+
 #endif /* _LINUX_SCHED_LOADAVG_H */
-- 
cgit v1.2.3


From de8f1c77313d8c908b2897f268d466c13df161d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:41:24 +0100
Subject: sched/headers: Move autogroup APIs into <linux/sched/autogroup.h>

Further reduce the size of sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h           | 18 ------------------
 include/linux/sched/autogroup.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78adf7a0cac1..042620729230 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2092,24 +2092,6 @@ static inline void wake_up_nohz_cpu(int cpu) { }
 extern u64 scheduler_tick_max_deferment(void);
 #endif
 
-#ifdef CONFIG_SCHED_AUTOGROUP
-extern void sched_autogroup_create_attach(struct task_struct *p);
-extern void sched_autogroup_detach(struct task_struct *p);
-extern void sched_autogroup_fork(struct signal_struct *sig);
-extern void sched_autogroup_exit(struct signal_struct *sig);
-extern void sched_autogroup_exit_task(struct task_struct *p);
-#ifdef CONFIG_PROC_FS
-extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
-#endif
-#else
-static inline void sched_autogroup_create_attach(struct task_struct *p) { }
-static inline void sched_autogroup_detach(struct task_struct *p) { }
-static inline void sched_autogroup_fork(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit_task(struct task_struct *p) { }
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index d745f22e57dc..586bdf37330d 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+struct signal_struct;
+struct seq_file;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
+#endif
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */
-- 
cgit v1.2.3


From 27ddb689909cd0bab30524a5f720ae3a3e55acac Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:15 -0800
Subject: PCI: add an API to get node from vector

Next patch will use the API to get the node from vector for nvme device

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/pci.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 282ed32244ce..eb3da1a04e6c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
 const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+int pci_irq_get_node(struct pci_dev *pdev, int vec);
 
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
 {
 	return cpu_possible_mask;
 }
+
+static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	return first_online_node;
+}
 #endif
 
 static inline int
-- 
cgit v1.2.3


From 6bae363ee3057a14eec93440826813603559273a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:10 -0500
Subject: blk-mq: Export blk_mq_freeze_queue_wait

Drivers can start a freeze, so this provides a way to wait for frozen.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 001d30d727c5..8dacf680c851 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
-- 
cgit v1.2.3


From f91328c40a559362b6e7b7bfee01ca17fda87592 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:11 -0500
Subject: blk-mq: Provide freeze queue timeout

A driver may wish to take corrective action if queued requests do not
complete within a set time.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8dacf680c851..b296a9006117 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -246,6 +246,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+				     unsigned long timeout);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
-- 
cgit v1.2.3


From 474c90156c8dcc2fa815e6716cc9394d7930cb9c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 2 Mar 2017 12:17:22 -0800
Subject: give up on gcc ilog2() constant optimizations

gcc-7 has an "optimization" pass that completely screws up, and
generates the code expansion for the (impossible) case of calling
ilog2() with a zero constant, even when the code gcc compiles does not
actually have a zero constant.

And we try to generate a compile-time error for anybody doing ilog2() on
a constant where that doesn't make sense (be it zero or negative).  So
now gcc7 will fail the build due to our sanity checking, because it
created that constant-zero case that didn't actually exist in the source
code.

There's a whole long discussion on the kernel mailing about how to work
around this gcc bug.  The gcc people themselevs have discussed their
"feature" in

   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72785

but it's all water under the bridge, because while it looked at one
point like it would be solved by the time gcc7 was released, that was
not to be.

So now we have to deal with this compiler braindamage.

And the only simple approach seems to be to just delete the code that
tries to warn about bad uses of ilog2().

So now "ilog2()" will just return 0 not just for the value 1, but for
any non-positive value too.

It's not like I can recall anybody having ever actually tried to use
this function on any invalid value, but maybe the sanity check just
meant that such code never made it out in public.

Reported-by: Laura Abbott <labbott@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>,
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/log2.h b/include/linux/log2.h
index ef3d4f67118c..c373295f359f 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -15,12 +15,6 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 
-/*
- * deal with unrepresentable constant logarithms
- */
-extern __attribute__((const, noreturn))
-int ____ilog2_NaN(void);
-
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented
@@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) < 2 ? 0 :			\
 		(n) & (1ULL << 63) ? 63 :	\
 		(n) & (1ULL << 62) ? 62 :	\
 		(n) & (1ULL << 61) ? 61 :	\
@@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		(n) & (1ULL <<  1) ?  1 :	\
-		(n) & (1ULL <<  0) ?  0 :	\
-		____ilog2_NaN()			\
-				   ) :		\
+		1 ) :				\
 	(sizeof(n) <= 4) ?			\
 	__ilog2_u32(n) :			\
 	__ilog2_u64(n)				\
-- 
cgit v1.2.3


From 68e21be2916b359fd8afb536c1911dc014cfd03e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:08:20 +0100
Subject: sched/headers: Move task->mm handling methods to <linux/sched/mm.h>

Move the following task->mm helper APIs into a new header file,
<linux/sched/mm.h>, to further reduce the size and complexity
of <linux/sched.h>.

Here are how the APIs are used in various kernel files:

  # mm_alloc():
  arch/arm/mach-rpc/ecard.c
  fs/exec.c
  include/linux/sched/mm.h
  kernel/fork.c

  # __mmdrop():
  arch/arc/include/asm/mmu_context.h
  include/linux/sched/mm.h
  kernel/fork.c

  # mmdrop():
  arch/arm/mach-rpc/ecard.c
  arch/m68k/sun3/mmu_emu.c
  arch/x86/mm/tlb.c
  drivers/gpu/drm/amd/amdkfd/kfd_process.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/hw/hfi1/file_ops.c
  drivers/vfio/vfio_iommu_spapr_tce.c
  fs/exec.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/mmu_notifier.h
  include/linux/sched/mm.h
  kernel/fork.c
  kernel/futex.c
  kernel/sched/core.c
  mm/khugepaged.c
  mm/ksm.c
  mm/mmu_context.c
  mm/mmu_notifier.c
  mm/oom_kill.c
  virt/kvm/kvm_main.c

  # mmdrop_async_fn():
  include/linux/sched/mm.h

  # mmdrop_async():
  include/linux/sched/mm.h
  kernel/fork.c

  # mmget_not_zero():
  fs/userfaultfd.c
  include/linux/sched/mm.h
  mm/oom_kill.c

  # mmput():
  arch/arc/include/asm/mmu_context.h
  arch/arc/kernel/troubleshoot.c
  arch/frv/mm/mmu-context.c
  arch/powerpc/platforms/cell/spufs/context.c
  arch/sparc/include/asm/mmu_context_32.h
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/core/uverbs_main.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/exec.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/events/uprobes.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/oom_kill.c
  mm/process_vm_access.c
  mm/rmap.c
  mm/swapfile.c
  mm/util.c
  virt/kvm/async_pf.c

  # mmput_async():
  include/linux/sched/mm.h
  kernel/fork.c
  mm/oom_kill.c

  # get_task_mm():
  arch/arc/kernel/troubleshoot.c
  arch/powerpc/platforms/cell/spufs/context.c
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/util.c

  # mm_access():
  fs/proc/base.c
  include/linux/sched/mm.h
  kernel/fork.c
  mm/process_vm_access.c

  # mm_release():
  arch/arc/include/asm/mmu_context.h
  fs/exec.c
  include/linux/sched/mm.h
  include/uapi/linux/sched.h
  kernel/exit.c
  kernel/fork.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 95 ------------------------------------------------
 include/linux/sched/mm.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 042620729230..d29bbe0ee41f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2379,101 +2379,6 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
 	return sp;
 }
 
-/*
- * Routines for handling mm_structs
- */
-extern struct mm_struct * mm_alloc(void);
-
-/**
- * mmgrab() - Pin a &struct mm_struct.
- * @mm: The &struct mm_struct to pin.
- *
- * Make sure that @mm will not get freed even after the owning task
- * exits. This doesn't guarantee that the associated address space
- * will still exist later on and mmget_not_zero() has to be used before
- * accessing it.
- *
- * This is a preferred way to to pin @mm for a longer/unbounded amount
- * of time.
- *
- * Use mmdrop() to release the reference acquired by mmgrab().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmgrab(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_count);
-}
-
-/* mmdrop drops the mm and the page tables */
-extern void __mmdrop(struct mm_struct *);
-static inline void mmdrop(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-		__mmdrop(mm);
-}
-
-static inline void mmdrop_async_fn(struct work_struct *work)
-{
-	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
-	__mmdrop(mm);
-}
-
-static inline void mmdrop_async(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
-		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
-		schedule_work(&mm->async_put_work);
-	}
-}
-
-/**
- * mmget() - Pin the address space associated with a &struct mm_struct.
- * @mm: The address space to pin.
- *
- * Make sure that the address space of the given &struct mm_struct doesn't
- * go away. This does not protect against parts of the address space being
- * modified or freed, however.
- *
- * Never use this function to pin this address space for an
- * unbounded/indefinite amount of time.
- *
- * Use mmput() to release the reference acquired by mmget().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmget(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_users);
-}
-
-static inline bool mmget_not_zero(struct mm_struct *mm)
-{
-	return atomic_inc_not_zero(&mm->mm_users);
-}
-
-/* mmput gets rid of the mappings and all user-space */
-extern void mmput(struct mm_struct *);
-#ifdef CONFIG_MMU
-/* same as above but performs the slow path from the async context. Can
- * be called from the atomic context as well
- */
-extern void mmput_async(struct mm_struct *);
-#endif
-
-/* Grab a reference to a task's mm, if it is not already going away */
-extern struct mm_struct *get_task_mm(struct task_struct *task);
-/*
- * Grab a reference to a task's mm, if it is not already going away
- * and ptrace_may_access with the mode parameter passed to it
- * succeeds.
- */
-extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
-
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
 			struct task_struct *, unsigned long);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d32e3932b2e3..be1ae55f5ab9 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -5,4 +5,99 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_count);
+}
+
+/* mmdrop drops the mm and the page tables */
+extern void __mmdrop(struct mm_struct *);
+static inline void mmdrop(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+		__mmdrop(mm);
+}
+
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+	__mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+		schedule_work(&mm->async_put_work);
+	}
+}
+
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_users);
+}
+
+static inline bool mmget_not_zero(struct mm_struct *mm)
+{
+	return atomic_inc_not_zero(&mm->mm_users);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+#ifdef CONFIG_MMU
+/* same as above but performs the slow path from the async context. Can
+ * be called from the atomic context as well
+ */
+extern void mmput_async(struct mm_struct *);
+#endif
+
+/* Grab a reference to a task's mm, if it is not already going away */
+extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(struct task_struct *, struct mm_struct *);
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 11701c6768367294c5086738d49196192aaf3d60 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:21:47 +0100
Subject: sched/headers: Move task->mm coredumping related defines and methods
 from <linux/sched.h> to <linux/sched/coredump.h>

This further reduces the size and complexity of <linux/sched.h>.

These are the definitions and APIs that are moved:

  # MMF_*:
  fs/binfmt_elf.c
  fs/binfmt_elf_fdpic.c
  fs/exec.c
  fs/proc/base.c
  include/linux/khugepaged.h
  include/linux/ksm.h
  include/linux/sched/coredump.h
  kernel/events/uprobes.c
  kernel/fork.c
  mm/huge_memory.c
  mm/khugepaged.c
  mm/ksm.c
  mm/memory.c
  mm/oom_kill.c

  # SUID_DUMP_*:
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c
  kernel/sysctl.c

  # get_dumpable():
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c

  # set_dumpable():
  fs/exec.c
  include/linux/sched/coredump.h
  kernel/cred.c
  kernel/sys.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 68 -----------------------------------------
 include/linux/sched/coredump.h | 69 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d29bbe0ee41f..7934cd0acbc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -361,74 +361,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-#define SUID_DUMP_USER		1	/* Dump as user of process */
-#define SUID_DUMP_ROOT		2	/* Dump as root */
-
-/* mm flags */
-
-/* for SUID_DUMP_* above */
-#define MMF_DUMPABLE_BITS 2
-#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
-
-extern void set_dumpable(struct mm_struct *mm, int value);
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
-static inline int __get_dumpable(unsigned long mm_flags)
-{
-	return mm_flags & MMF_DUMPABLE_MASK;
-}
-
-static inline int get_dumpable(struct mm_struct *mm)
-{
-	return __get_dumpable(mm->flags);
-}
-
-/* coredump filter bits */
-#define MMF_DUMP_ANON_PRIVATE	2
-#define MMF_DUMP_ANON_SHARED	3
-#define MMF_DUMP_MAPPED_PRIVATE	4
-#define MMF_DUMP_MAPPED_SHARED	5
-#define MMF_DUMP_ELF_HEADERS	6
-#define MMF_DUMP_HUGETLB_PRIVATE 7
-#define MMF_DUMP_HUGETLB_SHARED  8
-#define MMF_DUMP_DAX_PRIVATE	9
-#define MMF_DUMP_DAX_SHARED	10
-
-#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	9
-#define MMF_DUMP_FILTER_MASK \
-	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
-#define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
-
-#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
-# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
-#else
-# define MMF_DUMP_MASK_DEFAULT_ELF	0
-#endif
-					/* leave room for more dump flags */
-#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
-#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
-
-#define MMF_HAS_UPROBES		19	/* has uprobes */
-#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
-#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
-
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
-
 struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ab81e564e076..f46912aa4f4c 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -2,5 +2,74 @@
 #define _LINUX_SCHED_COREDUMP_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
+/* mm flags */
+
+/* for SUID_DUMP_* above */
+#define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+	return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+	return __get_dumpable(mm->flags);
+}
+
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE	2
+#define MMF_DUMP_ANON_SHARED	3
+#define MMF_DUMP_MAPPED_PRIVATE	4
+#define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
+
+#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS	9
+#define MMF_DUMP_FILTER_MASK \
+	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+
+#define MMF_HAS_UPROBES		19	/* has uprobes */
+#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
+#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
-- 
cgit v1.2.3


From c3edc4010e9d102eb7b8f17d15c2ebc425fed63c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 08:35:14 +0100
Subject: sched/headers: Move task_struct::signal and task_struct::sighand
 types and accessors into <linux/sched/signal.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_struct::signal and task_struct::sighand are pointers, which would normally make it
straightforward to not define those types in sched.h.

That is not so, because the types are accompanied by a myriad of APIs (macros and inline
functions) that dereference them.

Split the types and the APIs out of sched.h and move them into a new header, <linux/sched/signal.h>.

With this change sched.h does not know about 'struct signal' and 'struct sighand' anymore,
trying to put accessors into sched.h as a test fails the following way:

  ./include/linux/sched.h: In function ‘test_signal_types’:
  ./include/linux/sched.h:2461:18: error: dereferencing pointer to incomplete type ‘struct signal_struct’
                    ^

This reduces the size and complexity of sched.h significantly.

Update all headers and .c code that relied on getting the signal handling
functionality from <linux/sched.h> to include <linux/sched/signal.h>.

The list of affected files in the preparatory patch was partly generated by
grepping for the APIs, and partly by doing coverage build testing, both
all[yes|mod|def|no]config builds on 64-bit and 32-bit x86, and an array of
cross-architecture builds.

Nevertheless some (trivial) build breakage is still expected related to rare
Kconfig combinations and in-flight patches to various kernel code, but most
of it should be handled by this patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 499 +-----------------------------------------
 include/linux/sched/signal.h | 502 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 505 insertions(+), 496 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7934cd0acbc7..c1586104d4c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,9 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
+struct signal_struct;
+struct sighand_struct;
+
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -361,13 +364,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct sighand_struct {
-	atomic_t		count;
-	struct k_sigaction	action[_NSIG];
-	spinlock_t		siglock;
-	wait_queue_head_t	signalfd_wqh;
-};
-
 struct pacct_struct {
 	int			ac_flag;
 	long			ac_exitcode;
@@ -485,195 +481,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * NOTE! "signal_struct" does not have its own
- * locking, because a shared signal_struct always
- * implies a shared sighand_struct, so locking
- * sighand_struct is always a proper superset of
- * the locking of signal_struct.
- */
-struct signal_struct {
-	atomic_t		sigcnt;
-	atomic_t		live;
-	int			nr_threads;
-	struct list_head	thread_head;
-
-	wait_queue_head_t	wait_chldexit;	/* for wait4() */
-
-	/* current thread group signal load-balancing target: */
-	struct task_struct	*curr_target;
-
-	/* shared signal handling: */
-	struct sigpending	shared_pending;
-
-	/* thread group exit support */
-	int			group_exit_code;
-	/* overloaded:
-	 * - notify group_exit_task when ->count is equal to notify_count
-	 * - everyone except group_exit_task is stopped during signal delivery
-	 *   of fatal signals, group_exit_task processes the signal.
-	 */
-	int			notify_count;
-	struct task_struct	*group_exit_task;
-
-	/* thread group stop support, overloads group_exit_code too */
-	int			group_stop_count;
-	unsigned int		flags; /* see SIGNAL_* flags below */
-
-	/*
-	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
-	 * manager, to re-parent orphan (double-forking) child processes
-	 * to this process instead of 'init'. The service manager is
-	 * able to receive SIGCHLD signals and is able to investigate
-	 * the process until it calls wait(). All children of this
-	 * process will inherit a flag if they should look for a
-	 * child_subreaper process at exit.
-	 */
-	unsigned int		is_child_subreaper:1;
-	unsigned int		has_child_subreaper:1;
-
-#ifdef CONFIG_POSIX_TIMERS
-
-	/* POSIX.1b Interval Timers */
-	int			posix_timer_id;
-	struct list_head	posix_timers;
-
-	/* ITIMER_REAL timer for the process */
-	struct hrtimer real_timer;
-	ktime_t it_real_incr;
-
-	/*
-	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
-	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
-	 * values are defined to 0 and 1 respectively
-	 */
-	struct cpu_itimer it[2];
-
-	/*
-	 * Thread group totals for process CPU timers.
-	 * See thread_group_cputimer(), et al, for details.
-	 */
-	struct thread_group_cputimer cputimer;
-
-	/* Earliest-expiration cache. */
-	struct task_cputime cputime_expires;
-
-	struct list_head cpu_timers[3];
-
-#endif
-
-	struct pid *leader_pid;
-
-#ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
-#endif
-
-	struct pid *tty_old_pgrp;
-
-	/* boolean value for session group leader */
-	int leader;
-
-	struct tty_struct *tty; /* NULL if no tty */
-
-#ifdef CONFIG_SCHED_AUTOGROUP
-	struct autogroup *autogroup;
-#endif
-	/*
-	 * Cumulative resource counters for dead threads in the group,
-	 * and for reaped dead child processes forked by this group.
-	 * Live threads maintain their own counters and add to these
-	 * in __exit_signal, except for the group leader.
-	 */
-	seqlock_t stats_lock;
-	u64 utime, stime, cutime, cstime;
-	u64 gtime;
-	u64 cgtime;
-	struct prev_cputime prev_cputime;
-	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
-	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
-	unsigned long inblock, oublock, cinblock, coublock;
-	unsigned long maxrss, cmaxrss;
-	struct task_io_accounting ioac;
-
-	/*
-	 * Cumulative ns of schedule CPU time fo dead threads in the
-	 * group, not including a zombie group leader, (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
-	/*
-	 * We don't bother to synchronize most readers of this at all,
-	 * because there is no reader checking a limit that actually needs
-	 * to get both rlim_cur and rlim_max atomically, and either one
-	 * alone is a single word that can safely be read normally.
-	 * getrlimit/setrlimit use task_lock(current->group_leader) to
-	 * protect this instead of the siglock, because they really
-	 * have no need to disable irqs.
-	 */
-	struct rlimit rlim[RLIM_NLIMITS];
-
-#ifdef CONFIG_BSD_PROCESS_ACCT
-	struct pacct_struct pacct;	/* per-process accounting information */
-#endif
-#ifdef CONFIG_TASKSTATS
-	struct taskstats *stats;
-#endif
-#ifdef CONFIG_AUDIT
-	unsigned audit_tty;
-	struct tty_audit_buf *tty_audit_buf;
-#endif
-
-	/*
-	 * Thread is the potential origin of an oom condition; kill first on
-	 * oom
-	 */
-	bool oom_flag_origin;
-	short oom_score_adj;		/* OOM kill score adjustment */
-	short oom_score_adj_min;	/* OOM kill score adjustment min value.
-					 * Only settable by CAP_SYS_RESOURCE. */
-	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
-					 * killed by the oom killer */
-
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
-					 * credential calculations
-					 * (notably. ptrace) */
-};
-
-/*
- * Bits in flags field of signal_struct.
- */
-#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
-#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
-/*
- * Pending notifications to parent.
- */
-#define SIGNAL_CLD_STOPPED	0x00000010
-#define SIGNAL_CLD_CONTINUED	0x00000020
-#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
-
-#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
-
-#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
-			  SIGNAL_STOP_CONTINUED)
-
-static inline void signal_set_stop_flags(struct signal_struct *sig,
-					 unsigned int flags)
-{
-	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
-	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
-}
-
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
-	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exit_task != NULL);
-}
-
 /*
  * Some day this will be a full-fledged user tracking system..
  */
@@ -2126,190 +1933,8 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
-extern void flush_signals(struct task_struct *);
-extern void ignore_signals(struct task_struct *);
-extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-
-static inline int kernel_dequeue_signal(siginfo_t *info)
-{
-	struct task_struct *tsk = current;
-	siginfo_t __info;
-	int ret;
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
-	spin_unlock_irq(&tsk->sighand->siglock);
-
-	return ret;
-}
-
-static inline void kernel_signal_stop(void)
-{
-	spin_lock_irq(&current->sighand->siglock);
-	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
-		__set_current_state(TASK_STOPPED);
-	spin_unlock_irq(&current->sighand->siglock);
-
-	schedule();
-}
 
 extern void release_task(struct task_struct * p);
-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
-extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
-extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
-				const struct cred *, u32);
-extern int kill_pgrp(struct pid *pid, int sig, int priv);
-extern int kill_pid(struct pid *pid, int sig, int priv);
-extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern __must_check bool do_notify_parent(struct task_struct *, int);
-extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
-extern void force_sig(int, struct task_struct *);
-extern int send_sig(int, struct task_struct *, int);
-extern int zap_other_threads(struct task_struct *p);
-extern struct sigqueue *sigqueue_alloc(void);
-extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
-extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
-
-#ifdef TIF_RESTORE_SIGMASK
-/*
- * Legacy restore_sigmask accessors.  These are inefficient on
- * SMP architectures because they require atomic operations.
- */
-
-/**
- * set_restore_sigmask() - make sure saved_sigmask processing gets done
- *
- * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
- * will run before returning to user mode, to process the flag.  For
- * all callers, TIF_SIGPENDING is already set or it's no harm to set
- * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
- * arch code will notice on return to user mode, in case those bits
- * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
- * signal code always gets run when TIF_RESTORE_SIGMASK is set.
- */
-static inline void set_restore_sigmask(void)
-{
-	set_thread_flag(TIF_RESTORE_SIGMASK);
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_restore_sigmask(void)
-{
-	return test_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-
-#else	/* TIF_RESTORE_SIGMASK */
-
-/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
-static inline void set_restore_sigmask(void)
-{
-	current->restore_sigmask = true;
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current->restore_sigmask = false;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current->restore_sigmask;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	if (!current->restore_sigmask)
-		return false;
-	current->restore_sigmask = false;
-	return true;
-}
-#endif
-
-static inline void restore_saved_sigmask(void)
-{
-	if (test_and_clear_restore_sigmask())
-		__set_current_blocked(&current->saved_sigmask);
-}
-
-static inline sigset_t *sigmask_to_save(void)
-{
-	sigset_t *res = &current->blocked;
-	if (unlikely(test_restore_sigmask()))
-		res = &current->saved_sigmask;
-	return res;
-}
-
-static inline int kill_cad_pid(int sig, int priv)
-{
-	return kill_pid(cad_pid, sig, priv);
-}
-
-/* These can be the second arg to send_sig_info/send_group_sig_info.  */
-#define SEND_SIG_NOINFO ((struct siginfo *) 0)
-#define SEND_SIG_PRIV	((struct siginfo *) 1)
-#define SEND_SIG_FORCED	((struct siginfo *) 2)
-
-/*
- * True if we are on the alternate signal stack.
- */
-static inline int on_sig_stack(unsigned long sp)
-{
-	/*
-	 * If the signal stack is SS_AUTODISARM then, by construction, we
-	 * can't be on the signal stack unless user code deliberately set
-	 * SS_AUTODISARM when we were already on it.
-	 *
-	 * This improves reliability: if user state gets corrupted such that
-	 * the stack pointer points very close to the end of the signal stack,
-	 * then this check will enable the signal to be handled anyway.
-	 */
-	if (current->sas_ss_flags & SS_AUTODISARM)
-		return 0;
-
-#ifdef CONFIG_STACK_GROWSUP
-	return sp >= current->sas_ss_sp &&
-		sp - current->sas_ss_sp < current->sas_ss_size;
-#else
-	return sp > current->sas_ss_sp &&
-		sp - current->sas_ss_sp <= current->sas_ss_size;
-#endif
-}
-
-static inline int sas_ss_flags(unsigned long sp)
-{
-	if (!current->sas_ss_size)
-		return SS_DISABLE;
-
-	return on_sig_stack(sp) ? SS_ONSTACK : 0;
-}
-
-static inline void sas_ss_reset(struct task_struct *p)
-{
-	p->sas_ss_sp = 0;
-	p->sas_ss_size = 0;
-	p->sas_ss_flags = SS_DISABLE;
-}
-
-static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
-{
-	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
-#ifdef CONFIG_STACK_GROWSUP
-		return current->sas_ss_sp;
-#else
-		return current->sas_ss_sp + current->sas_ss_size;
-#endif
-	return sp;
-}
 
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
@@ -2338,10 +1963,8 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 
 extern void exit_files(struct task_struct *);
-extern void __cleanup_sighand(struct sighand_struct *);
 
 extern void exit_itimers(struct signal_struct *);
-extern void flush_itimer_signals(void);
 
 extern void do_group_exit(int);
 
@@ -2376,81 +1999,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define tasklist_empty() \
-	list_empty(&init_task.tasks)
-
-#define next_task(p) \
-	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
-
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
-
-extern bool current_is_single_threaded(void);
-
-/*
- * Careful: do_each_thread/while_each_thread is a double loop so
- *          'break' will not work as expected - use goto instead.
- */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
-
-#define while_each_thread(g, t) \
-	while ((t = next_thread(t)) != g)
-
-#define __for_each_thread(signal, t)	\
-	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
-
-#define for_each_thread(p, t)		\
-	__for_each_thread((p)->signal, t)
-
-/* Careful: this is a double loop, 'break' won't work as expected. */
-#define for_each_process_thread(p, t)	\
-	for_each_process(p) for_each_thread(p, t)
-
-typedef int (*proc_visitor)(struct task_struct *p, void *data);
-void walk_process_tree(struct task_struct *top, proc_visitor, void *);
-
-static inline int get_nr_threads(struct task_struct *tsk)
-{
-	return tsk->signal->nr_threads;
-}
-
-static inline bool thread_group_leader(struct task_struct *p)
-{
-	return p->exit_signal >= 0;
-}
-
-/* Do to the insanities of de_thread it is possible for a process
- * to have the pid of the thread group leader without actually being
- * the thread group leader.  For iteration through the pids in proc
- * all we care about is that we have a task with the appropriate
- * pid, we don't actually care if we have the right task.
- */
-static inline bool has_group_leader_pid(struct task_struct *p)
-{
-	return task_pid(p) == p->signal->leader_pid;
-}
-
-static inline
-bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
-{
-	return p1->signal == p2->signal;
-}
-
-static inline struct task_struct *next_thread(const struct task_struct *p)
-{
-	return list_entry_rcu(p->thread_group.next,
-			      struct task_struct, thread_group);
-}
-
-static inline int thread_group_empty(struct task_struct *p)
-{
-	return list_empty(&p->thread_group);
-}
-
-#define delay_group_leader(p) \
-		(thread_group_leader(p) && !thread_group_empty(p))
-
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
@@ -2471,25 +2019,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
-							unsigned long *flags);
-
-static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
-						       unsigned long *flags)
-{
-	struct sighand_struct *ret;
-
-	ret = __lock_task_sighand(tsk, flags);
-	(void)__cond_lock(&tsk->sighand->siglock, ret);
-	return ret;
-}
-
-static inline void unlock_task_sighand(struct task_struct *tsk,
-						unsigned long *flags)
-{
-	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
@@ -2862,28 +2391,6 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-static inline unsigned long task_rlimit(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
-}
-
-static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
-}
-
-static inline unsigned long rlimit(unsigned int limit)
-{
-	return task_rlimit(current, limit);
-}
-
-static inline unsigned long rlimit_max(unsigned int limit)
-{
-	return task_rlimit_max(current, limit);
-}
-
 #define SCHED_CPUFREQ_RT	(1U << 0)
 #define SCHED_CPUFREQ_DL	(1U << 1)
 #define SCHED_CPUFREQ_IOWAIT	(1U << 2)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c6958a53fef3..53fe5450f431 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -8,4 +8,506 @@
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
 
+/*
+ * Types defining task->signal and task->sighand and APIs using them:
+ */
+
+struct sighand_struct {
+	atomic_t		count;
+	struct k_sigaction	action[_NSIG];
+	spinlock_t		siglock;
+	wait_queue_head_t	signalfd_wqh;
+};
+
+/*
+ * NOTE! "signal_struct" does not have its own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+	atomic_t		sigcnt;
+	atomic_t		live;
+	int			nr_threads;
+	struct list_head	thread_head;
+
+	wait_queue_head_t	wait_chldexit;	/* for wait4() */
+
+	/* current thread group signal load-balancing target: */
+	struct task_struct	*curr_target;
+
+	/* shared signal handling: */
+	struct sigpending	shared_pending;
+
+	/* thread group exit support */
+	int			group_exit_code;
+	/* overloaded:
+	 * - notify group_exit_task when ->count is equal to notify_count
+	 * - everyone except group_exit_task is stopped during signal delivery
+	 *   of fatal signals, group_exit_task processes the signal.
+	 */
+	int			notify_count;
+	struct task_struct	*group_exit_task;
+
+	/* thread group stop support, overloads group_exit_code too */
+	int			group_stop_count;
+	unsigned int		flags; /* see SIGNAL_* flags below */
+
+	/*
+	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
+	 * manager, to re-parent orphan (double-forking) child processes
+	 * to this process instead of 'init'. The service manager is
+	 * able to receive SIGCHLD signals and is able to investigate
+	 * the process until it calls wait(). All children of this
+	 * process will inherit a flag if they should look for a
+	 * child_subreaper process at exit.
+	 */
+	unsigned int		is_child_subreaper:1;
+	unsigned int		has_child_subreaper:1;
+
+#ifdef CONFIG_POSIX_TIMERS
+
+	/* POSIX.1b Interval Timers */
+	int			posix_timer_id;
+	struct list_head	posix_timers;
+
+	/* ITIMER_REAL timer for the process */
+	struct hrtimer real_timer;
+	ktime_t it_real_incr;
+
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
+
+	/*
+	 * Thread group totals for process CPU timers.
+	 * See thread_group_cputimer(), et al, for details.
+	 */
+	struct thread_group_cputimer cputimer;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
+#endif
+
+	struct pid *leader_pid;
+
+#ifdef CONFIG_NO_HZ_FULL
+	atomic_t tick_dep_mask;
+#endif
+
+	struct pid *tty_old_pgrp;
+
+	/* boolean value for session group leader */
+	int leader;
+
+	struct tty_struct *tty; /* NULL if no tty */
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
+	/*
+	 * Cumulative resource counters for dead threads in the group,
+	 * and for reaped dead child processes forked by this group.
+	 * Live threads maintain their own counters and add to these
+	 * in __exit_signal, except for the group leader.
+	 */
+	seqlock_t stats_lock;
+	u64 utime, stime, cutime, cstime;
+	u64 gtime;
+	u64 cgtime;
+	struct prev_cputime prev_cputime;
+	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
+	struct task_io_accounting ioac;
+
+	/*
+	 * Cumulative ns of schedule CPU time fo dead threads in the
+	 * group, not including a zombie group leader, (This only differs
+	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
+	 * other than jiffies.)
+	 */
+	unsigned long long sum_sched_runtime;
+
+	/*
+	 * We don't bother to synchronize most readers of this at all,
+	 * because there is no reader checking a limit that actually needs
+	 * to get both rlim_cur and rlim_max atomically, and either one
+	 * alone is a single word that can safely be read normally.
+	 * getrlimit/setrlimit use task_lock(current->group_leader) to
+	 * protect this instead of the siglock, because they really
+	 * have no need to disable irqs.
+	 */
+	struct rlimit rlim[RLIM_NLIMITS];
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct pacct_struct pacct;	/* per-process accounting information */
+#endif
+#ifdef CONFIG_TASKSTATS
+	struct taskstats *stats;
+#endif
+#ifdef CONFIG_AUDIT
+	unsigned audit_tty;
+	struct tty_audit_buf *tty_audit_buf;
+#endif
+
+	/*
+	 * Thread is the potential origin of an oom condition; kill first on
+	 * oom
+	 */
+	bool oom_flag_origin;
+	short oom_score_adj;		/* OOM kill score adjustment */
+	short oom_score_adj_min;	/* OOM kill score adjustment min value.
+					 * Only settable by CAP_SYS_RESOURCE. */
+	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
+					 * killed by the oom killer */
+
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
+};
+
+/*
+ * Bits in flags field of signal_struct.
+ */
+#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
+/*
+ * Pending notifications to parent.
+ */
+#define SIGNAL_CLD_STOPPED	0x00000010
+#define SIGNAL_CLD_CONTINUED	0x00000020
+#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
+
+#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
+
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+			  SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+					 unsigned int flags)
+{
+	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
+		(sig->group_exit_task != NULL);
+}
+
+extern void flush_signals(struct task_struct *);
+extern void ignore_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *, int force_default);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+
+static inline int kernel_dequeue_signal(siginfo_t *info)
+{
+	struct task_struct *tsk = current;
+	siginfo_t __info;
+	int ret;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	return ret;
+}
+
+static inline void kernel_signal_stop(void)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+		__set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	schedule();
+}
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sigsegv(int, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
+				const struct cred *, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern __must_check bool do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int zap_other_threads(struct task_struct *p);
+extern struct sigqueue *sigqueue_alloc(void);
+extern void sigqueue_free(struct sigqueue *);
+extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+
+#ifdef TIF_RESTORE_SIGMASK
+/*
+ * Legacy restore_sigmask accessors.  These are inefficient on
+ * SMP architectures because they require atomic operations.
+ */
+
+/**
+ * set_restore_sigmask() - make sure saved_sigmask processing gets done
+ *
+ * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
+ * will run before returning to user mode, to process the flag.  For
+ * all callers, TIF_SIGPENDING is already set or it's no harm to set
+ * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
+ * arch code will notice on return to user mode, in case those bits
+ * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
+ * signal code always gets run when TIF_RESTORE_SIGMASK is set.
+ */
+static inline void set_restore_sigmask(void)
+{
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+	return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+#else	/* TIF_RESTORE_SIGMASK */
+
+/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
+static inline void set_restore_sigmask(void)
+{
+	current->restore_sigmask = true;
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	current->restore_sigmask = false;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current->restore_sigmask;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	if (!current->restore_sigmask)
+		return false;
+	current->restore_sigmask = false;
+	return true;
+}
+#endif
+
+static inline void restore_saved_sigmask(void)
+{
+	if (test_and_clear_restore_sigmask())
+		__set_current_blocked(&current->saved_sigmask);
+}
+
+static inline sigset_t *sigmask_to_save(void)
+{
+	sigset_t *res = &current->blocked;
+	if (unlikely(test_restore_sigmask()))
+		res = &current->saved_sigmask;
+	return res;
+}
+
+static inline int kill_cad_pid(int sig, int priv)
+{
+	return kill_pid(cad_pid, sig, priv);
+}
+
+/* These can be the second arg to send_sig_info/send_group_sig_info.  */
+#define SEND_SIG_NOINFO ((struct siginfo *) 0)
+#define SEND_SIG_PRIV	((struct siginfo *) 1)
+#define SEND_SIG_FORCED	((struct siginfo *) 2)
+
+/*
+ * True if we are on the alternate signal stack.
+ */
+static inline int on_sig_stack(unsigned long sp)
+{
+	/*
+	 * If the signal stack is SS_AUTODISARM then, by construction, we
+	 * can't be on the signal stack unless user code deliberately set
+	 * SS_AUTODISARM when we were already on it.
+	 *
+	 * This improves reliability: if user state gets corrupted such that
+	 * the stack pointer points very close to the end of the signal stack,
+	 * then this check will enable the signal to be handled anyway.
+	 */
+	if (current->sas_ss_flags & SS_AUTODISARM)
+		return 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	return sp >= current->sas_ss_sp &&
+		sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+	return sp > current->sas_ss_sp &&
+		sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+	if (!current->sas_ss_size)
+		return SS_DISABLE;
+
+	return on_sig_stack(sp) ? SS_ONSTACK : 0;
+}
+
+static inline void sas_ss_reset(struct task_struct *p)
+{
+	p->sas_ss_sp = 0;
+	p->sas_ss_size = 0;
+	p->sas_ss_flags = SS_DISABLE;
+}
+
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+		return current->sas_ss_sp;
+#else
+		return current->sas_ss_sp + current->sas_ss_size;
+#endif
+	return sp;
+}
+
+extern void __cleanup_sighand(struct sighand_struct *);
+extern void flush_itimer_signals(void);
+
+#define tasklist_empty() \
+	list_empty(&init_task.tasks)
+
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+#define for_each_process(p) \
+	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+extern bool current_is_single_threaded(void);
+
+/*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ *          'break' will not work as expected - use goto instead.
+ */
+#define do_each_thread(g, t) \
+	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+
+#define while_each_thread(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#define __for_each_thread(signal, t)	\
+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)		\
+	__for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)	\
+	for_each_process(p) for_each_thread(p, t)
+
+typedef int (*proc_visitor)(struct task_struct *p, void *data);
+void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
+static inline int get_nr_threads(struct task_struct *tsk)
+{
+	return tsk->signal->nr_threads;
+}
+
+static inline bool thread_group_leader(struct task_struct *p)
+{
+	return p->exit_signal >= 0;
+}
+
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader.  For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline bool has_group_leader_pid(struct task_struct *p)
+{
+	return task_pid(p) == p->signal->leader_pid;
+}
+
+static inline
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+	return p1->signal == p2->signal;
+}
+
+static inline struct task_struct *next_thread(const struct task_struct *p)
+{
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
+}
+
+static inline int thread_group_empty(struct task_struct *p)
+{
+	return list_empty(&p->thread_group);
+}
+
+#define delay_group_leader(p) \
+		(thread_group_leader(p) && !thread_group_empty(p))
+
+extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
+							unsigned long *flags);
+
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+						       unsigned long *flags)
+{
+	struct sighand_struct *ret;
+
+	ret = __lock_task_sighand(tsk, flags);
+	(void)__cond_lock(&tsk->sighand->siglock, ret);
+	return ret;
+}
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+						unsigned long *flags)
+{
+	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* _LINUX_SCHED_SIGNAL_H */
-- 
cgit v1.2.3


From bcbb6a5bf7df6e37ba652d1f426eab042ec4f56b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:22:42 +0100
Subject: sched/headers: Move 'struct user_struct' definition and APIs to the
 new <linux/sched/user.h> header

'struct user_struct' was added to sched.h historically, but it's actually
entirely independent of task_struct and of scheduler details, so move
it to its own header.

Fix up .c files using those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 53 ---------------------------------------------
 include/linux/sched/user.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1586104d4c0..71efbcafaa31 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,7 +349,6 @@ extern void io_schedule(void);
 void __noreturn do_task_dead(void);
 
 struct nsproxy;
-struct user_namespace;
 
 #ifdef CONFIG_MMU
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -481,49 +480,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * Some day this will be a full-fledged user tracking system..
- */
-struct user_struct {
-	atomic_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
-#ifdef CONFIG_FANOTIFY
-	atomic_t fanotify_listeners;
-#endif
-#ifdef CONFIG_EPOLL
-	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
-#endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
-	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
-	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
-
-#ifdef CONFIG_KEYS
-	struct key *uid_keyring;	/* UID specific keyring */
-	struct key *session_keyring;	/* UID's default session keyring */
-#endif
-
-	/* Hash table maintenance information */
-	struct hlist_node uidhash_node;
-	kuid_t uid;
-
-#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
-	atomic_long_t locked_vm;
-#endif
-};
-
-extern int uids_sysfs_init(void);
-
-extern struct user_struct *find_user(kuid_t);
-
-extern struct user_struct root_user;
-#define INIT_USER (&root_user)
-
-
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -1908,15 +1864,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-/* per-UID process charging. */
-extern struct user_struct * alloc_uid(kuid_t);
-static inline struct user_struct *get_uid(struct user_struct *u)
-{
-	atomic_inc(&u->__count);
-	return u;
-}
-extern void free_uid(struct user_struct *);
-
 #include <asm/current.h>
 
 extern void xtime_update(unsigned long ticks);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 83238e5ddadd..40c6363da5ef 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -3,4 +3,58 @@
 
 #include <linux/sched.h>
 
+struct key;
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+	atomic_t __count;	/* reference count */
+	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t sigpending;	/* How many pending signals does this user have? */
+#ifdef CONFIG_FANOTIFY
+	atomic_t fanotify_listeners;
+#endif
+#ifdef CONFIG_EPOLL
+	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+#endif
+#ifdef CONFIG_POSIX_MQUEUE
+	/* protected by mq_lock	*/
+	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
+#endif
+	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
+	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
+
+#ifdef CONFIG_KEYS
+	struct key *uid_keyring;	/* UID specific keyring */
+	struct key *session_keyring;	/* UID's default session keyring */
+#endif
+
+	/* Hash table maintenance information */
+	struct hlist_node uidhash_node;
+	kuid_t uid;
+
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+	atomic_long_t locked_vm;
+#endif
+};
+
+extern int uids_sysfs_init(void);
+
+extern struct user_struct *find_user(kuid_t);
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(kuid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+	atomic_inc(&u->__count);
+	return u;
+}
+extern void free_uid(struct user_struct *);
+
 #endif /* _LINUX_SCHED_USER_H */
-- 
cgit v1.2.3


From d151b27d3f86589308cd8c891fdfd2db5f8e80d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:17:23 +0100
Subject: sched/headers: Move softlockup detector watchdog methods to
 <linux/nmi.h>

These methods don't belong into <linux/sched.h>, they are neither directly
related to task_struct or are scheduler functionality.

Put them next to the other watchdog methods in <linux/nmi.h>.

( Arguably that header's name is a misnomer, and this patch makes it
  more so - but it should be renamed in another patch. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h   | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/sched.h | 37 -------------------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 0a3fadc32693..aa3cd0878270 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,43 @@
 #include <linux/sched.h>
 #include <asm/irq.h>
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+extern void touch_softlockup_watchdog_sched(void);
+extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
+extern void touch_all_softlockup_watchdogs(void);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
+extern unsigned int  softlockup_panic;
+extern unsigned int  hardlockup_panic;
+void lockup_detector_init(void);
+#else
+static inline void touch_softlockup_watchdog_sched(void)
+{
+}
+static inline void touch_softlockup_watchdog(void)
+{
+}
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
+static inline void touch_all_softlockup_watchdogs(void)
+{
+}
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+void reset_hung_task_detector(void);
+#else
+static inline void reset_hung_task_detector(void)
+{
+}
+#endif
+
 /*
  * The run state of the lockup detectors is controlled by the content of the
  * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 71efbcafaa31..8a1d296c53a0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,43 +286,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 
 extern void sched_show_task(struct task_struct *p);
 
-#ifdef CONFIG_LOCKUP_DETECTOR
-extern void touch_softlockup_watchdog_sched(void);
-extern void touch_softlockup_watchdog(void);
-extern void touch_softlockup_watchdog_sync(void);
-extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
-extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
-#else
-static inline void touch_softlockup_watchdog_sched(void)
-{
-}
-static inline void touch_softlockup_watchdog(void)
-{
-}
-static inline void touch_softlockup_watchdog_sync(void)
-{
-}
-static inline void touch_all_softlockup_watchdogs(void)
-{
-}
-static inline void lockup_detector_init(void)
-{
-}
-#endif
-
-#ifdef CONFIG_DETECT_HUNG_TASK
-void reset_hung_task_detector(void);
-#else
-static inline void reset_hung_task_detector(void)
-{
-}
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
-- 
cgit v1.2.3


From 8d88460edc05224b8d7ae3372173153876a02825 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:06:10 +0100
Subject: sched/headers: Move 'struct pacct_struct' and 'struct cpu_itimer'
 form <linux/sched.h> to <linux/sched/signal.h>

These structures are actually part of 'struct signal', so move them to <linux/sched/signal.h>
where they belong.

This further decreases the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 13 -------------
 include/linux/sched/signal.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a1d296c53a0..8bf111efe98e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -326,19 +326,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct pacct_struct {
-	int			ac_flag;
-	long			ac_exitcode;
-	unsigned long		ac_mem;
-	u64			ac_utime, ac_stime;
-	unsigned long		ac_minflt, ac_majflt;
-};
-
-struct cpu_itimer {
-	u64 expires;
-	u64 incr;
-};
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 53fe5450f431..30e7ceed3ef6 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -19,6 +19,22 @@ struct sighand_struct {
 	wait_queue_head_t	signalfd_wqh;
 };
 
+/*
+ * Per-process accounting stats:
+ */
+struct pacct_struct {
+	int			ac_flag;
+	long			ac_exitcode;
+	unsigned long		ac_mem;
+	u64			ac_utime, ac_stime;
+	unsigned long		ac_minflt, ac_majflt;
+};
+
+struct cpu_itimer {
+	u64 expires;
+	u64 incr;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
-- 
cgit v1.2.3


From 7284c6d419b5a6ae9806927f1fd4f0cfd19a16f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:14:19 +0100
Subject: sched/headers: Move the cpufreq interfaces to <linux/sched/cpufreq.h>

No need to have this in the generic <linux/sched.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 17 -----------------
 include/linux/sched/cpufreq.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8bf111efe98e..aae79706ec4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2288,21 +2288,4 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-#define SCHED_CPUFREQ_RT	(1U << 0)
-#define SCHED_CPUFREQ_DL	(1U << 1)
-#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
-
-#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
-
-#ifdef CONFIG_CPU_FREQ
-struct update_util_data {
-       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
-};
-
-void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
-                       void (*func)(struct update_util_data *data, u64 time,
-				    unsigned int flags));
-void cpufreq_remove_update_util_hook(int cpu);
-#endif /* CONFIG_CPU_FREQ */
-
 #endif
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 07ed9664fa20..2bdcfbfc4c30 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+/*
+ * Interface between cpufreq drivers and the scheduler:
+ */
+
+#define SCHED_CPUFREQ_RT	(1U << 0)
+#define SCHED_CPUFREQ_DL	(1U << 1)
+#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
+#ifdef CONFIG_CPU_FREQ
+struct update_util_data {
+       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+};
+
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+                       void (*func)(struct update_util_data *data, u64 time,
+				    unsigned int flags));
+void cpufreq_remove_update_util_hook(int cpu);
+#endif /* CONFIG_CPU_FREQ */
+
 #endif /* _LINUX_SCHED_CPUFREQ_H */
-- 
cgit v1.2.3


From 4240c8bf877f1145571106a2934c5cea0b51b178 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:18:24 +0100
Subject: sched/headers: Move more mm_struct related functionality from
 <linux/sched.h> to <linux/sched/mm.h>

Neither the mmap_layout nor the mm_update_next_owner() methods need to be
in <linux/sched.h> - move them to the more appropriate <linux/sched/mm.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 21 ---------------------
 include/linux/sched/mm.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aae79706ec4f..1ddb82be6b50 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -313,19 +313,6 @@ void __noreturn do_task_dead(void);
 
 struct nsproxy;
 
-#ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-extern unsigned long
-arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
-		       unsigned long, unsigned long);
-extern unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
-			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags);
-#else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
-#endif
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -2280,12 +2267,4 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
-#ifdef CONFIG_MEMCG
-extern void mm_update_next_owner(struct mm_struct *mm);
-#else
-static inline void mm_update_next_owner(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_MEMCG */
-
 #endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index be1ae55f5ab9..93fad9f0f466 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -100,4 +100,25 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 
+#ifdef CONFIG_MEMCG
+extern void mm_update_next_owner(struct mm_struct *mm);
+#else
+static inline void mm_update_next_owner(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_MEMCG */
+
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern unsigned long
+arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
+extern unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+			  unsigned long len, unsigned long pgoff,
+			  unsigned long flags);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From abe722a1c59728c6c0ea4e4d5efcfe397c8abebc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:27:56 +0100
Subject: sched/headers: Move the 'init_mm' declaration from <linux/sched.h> to
 <linux/mm_types.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 2 ++
 include/linux/sched.h    | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6daaf0a51968..28bc710d3467 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -546,6 +546,8 @@ struct mm_struct {
 	struct work_struct async_put_work;
 };
 
+extern struct mm_struct init_mm;
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1ddb82be6b50..253d8ab6256c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1782,8 +1782,6 @@ static inline int kstack_end(void *addr)
 extern union thread_union init_thread_union;
 extern struct task_struct init_task;
 
-extern struct   mm_struct init_mm;
-
 extern struct pid_namespace init_pid_ns;
 
 /*
-- 
cgit v1.2.3


From d026ce796cbca3c49678a68bb4a39fb4b9cf8192 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:32:21 +0100
Subject: sched/headers: Move in_vfork() from <linux/sched.h> to
 <linux/sched/mm.h>

The in_vfork() function deals with task->mm, so it better belongs
into <linux/sched/mm.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 26 --------------------------
 include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 253d8ab6256c..aa60812b4b7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1242,32 +1242,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 #define TNF_FAULT_LOCAL	0x08
 #define TNF_MIGRATE_FAIL 0x10
 
-static inline bool in_vfork(struct task_struct *tsk)
-{
-	bool ret;
-
-	/*
-	 * need RCU to access ->real_parent if CLONE_VM was used along with
-	 * CLONE_PARENT.
-	 *
-	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
-	 * imply CLONE_VM
-	 *
-	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
-	 * ->real_parent is not necessarily the task doing vfork(), so in
-	 * theory we can't rely on task_lock() if we want to dereference it.
-	 *
-	 * And in this case we can't trust the real_parent->mm == tsk->mm
-	 * check, it can be false negative. But we do not care, if init or
-	 * another oom-unkillable task does this it should blame itself.
-	 */
-	rcu_read_lock();
-	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
-	rcu_read_unlock();
-
-	return ret;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 93fad9f0f466..64d83fa8d93a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -121,4 +121,30 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
+static inline bool in_vfork(struct task_struct *tsk)
+{
+	bool ret;
+
+	/*
+	 * need RCU to access ->real_parent if CLONE_VM was used along with
+	 * CLONE_PARENT.
+	 *
+	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+	 * imply CLONE_VM
+	 *
+	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+	 * ->real_parent is not necessarily the task doing vfork(), so in
+	 * theory we can't rely on task_lock() if we want to dereference it.
+	 *
+	 * And in this case we can't trust the real_parent->mm == tsk->mm
+	 * check, it can be false negative. But we do not care, if init or
+	 * another oom-unkillable task does this it should blame itself.
+	 */
+	rcu_read_lock();
+	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 5647028d550c959577527cec9c0f29fbcea029ea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:39:17 +0100
Subject: sched/headers: Move the NUMA balancing interfaces from
 <linux/sched.h> to <linux/sched/numa_balancing.h>

Split out the interface between the scheduler and the MM which
deals with page fault driven NUMA balancing, into the new
<linux/sched/numa_balancing.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h                | 35 -------------------------------
 include/linux/sched/numa_balancing.h | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aa60812b4b7a..fcaea1e7b08a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1236,41 +1236,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-#define TNF_MIGRATED	0x01
-#define TNF_NO_GROUP	0x02
-#define TNF_SHARED	0x04
-#define TNF_FAULT_LOCAL	0x08
-#define TNF_MIGRATE_FAIL 0x10
-
-#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, int flags);
-extern pid_t task_numa_group_id(struct task_struct *p);
-extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
-					int src_nid, int dst_cpu);
-#else
-static inline void task_numa_fault(int last_node, int node, int pages,
-				   int flags)
-{
-}
-static inline pid_t task_numa_group_id(struct task_struct *p)
-{
-	return 0;
-}
-static inline void set_numabalancing_state(bool enabled)
-{
-}
-static inline void task_numa_free(struct task_struct *p)
-{
-}
-static inline bool should_numa_migrate_memory(struct task_struct *p,
-				struct page *page, int src_nid, int dst_cpu)
-{
-	return true;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 999182279f78..35d5fc77b4be 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -1,6 +1,46 @@
 #ifndef _LINUX_SCHED_NUMA_BALANCING_H
 #define _LINUX_SCHED_NUMA_BALANCING_H
 
+/*
+ * This is the interface between the scheduler and the MM that
+ * implements memory access pattern based NUMA-balancing:
+ */
+
 #include <linux/sched.h>
 
+#define TNF_MIGRATED	0x01
+#define TNF_NO_GROUP	0x02
+#define TNF_SHARED	0x04
+#define TNF_FAULT_LOCAL	0x08
+#define TNF_MIGRATE_FAIL 0x10
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
+extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+					int src_nid, int dst_cpu);
+#else
+static inline void task_numa_fault(int last_node, int node, int pages,
+				   int flags)
+{
+}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+	return 0;
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
+static inline bool should_numa_migrate_memory(struct task_struct *p,
+				struct page *page, int src_nid, int dst_cpu)
+{
+	return true;
+}
+#endif
+
 #endif /* _LINUX_SCHED_NUMA_BALANCING_H */
-- 
cgit v1.2.3


From c41cfc6c5ba46050b416c0b0b2621cbe68c4669c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:45:58 +0100
Subject: sched/headers: Move the JOBCTL_ defines and methods from
 <linux/sched.h> to <linux/sched/jobctl.h>

Only a small fraction of sched.h users actually utilizes these defines,
and they are not scheduler functionality in any case, so move them
into their separate header.

(Also make <linux/sched/jobctl.h> a self-contained header.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 30 ------------------------------
 include/linux/sched/jobctl.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fcaea1e7b08a..46ba8f1b5f1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1549,36 +1549,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-/*
- * task->jobctl flags
- */
-#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
-
-#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
-#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
-#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
-#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
-#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
-#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
-#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
-
-#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
-
-#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
-#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
-
-extern bool task_set_jobctl_pending(struct task_struct *task,
-				    unsigned long mask);
-extern void task_clear_jobctl_trapping(struct task_struct *task);
-extern void task_clear_jobctl_pending(struct task_struct *task,
-				      unsigned long mask);
-
 static inline void rcu_copy_process(struct task_struct *p)
 {
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index 228e4644937b..016afa0fb3bb 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -1,4 +1,36 @@
 #ifndef _LINUX_SCHED_JOBCTL_H
 #define _LINUX_SCHED_JOBCTL_H
 
+#include <linux/types.h>
+
+struct task_struct;
+
+/*
+ * task->jobctl flags
+ */
+#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
+
+#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
+#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
+#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
+#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
+#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
+#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
+#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
+
+#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
+
+#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+
+extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
+extern void task_clear_jobctl_trapping(struct task_struct *task);
+extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask);
+
 #endif /* _LINUX_SCHED_JOBCTL_H */
-- 
cgit v1.2.3


From 30a1baab81189f01c427c4939610b2dde2dbec37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: sched/headers: Remove various unrelated headers from <linux/sched.h>

Remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

because they are either not required, or are already included
naturally as part of the remaining headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 46ba8f1b5f1f..7752025679c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,60 +5,32 @@
 
 #include <linux/sched/prio.h>
 
-#include <asm/param.h>	/* for HZ */
-
 #include <linux/capability.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timex.h>
-#include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/rbtree.h>
-#include <linux/thread_info.h>
-#include <linux/cpumask.h>
-#include <linux/errno.h>
-#include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
-
-#include <asm/page.h>
 #include <asm/ptrace.h>
 
-#include <linux/smp.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
 #include <linux/signal.h>
-#include <linux/compiler.h>
-#include <linux/completion.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
-#include <linux/percpu.h>
-#include <linux/topology.h>
 #include <linux/seccomp.h>
-#include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
-#include <linux/time.h>
-#include <linux/param.h>
 #include <linux/resource.h>
-#include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
-#include <linux/llist.h>
-#include <linux/uidgid.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
-#include <asm/processor.h>
-
 struct sched_attr;
 struct sched_param;
 
-- 
cgit v1.2.3


From 9a07000400c853c57477c2a5138ae9f556c40897 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:10:05 +0100
Subject: sched/headers: Move CONFIG_TASK_XACCT bits from <linux/sched.h> to
 <linux/sched/xacct.h>

The CONFIG_TASK_XACCT=y accounting inline functions are only used by
fs/read_write.c, so move them into their separate header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 38 --------------------------------------
 include/linux/sched/xacct.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7752025679c0..1201312e111e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2104,44 +2104,6 @@ extern struct task_group root_task_group;
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
-#ifdef CONFIG_TASK_XACCT
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.rchar += amt;
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.wchar += amt;
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-	tsk->ioac.syscr++;
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-	tsk->ioac.syscw++;
-}
-#else
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-}
-#endif
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
index 890f7ce5cd27..a28156a0d34a 100644
--- a/include/linux/sched/xacct.h
+++ b/include/linux/sched/xacct.h
@@ -1,6 +1,48 @@
 #ifndef _LINUX_SCHED_XACCT_H
 #define _LINUX_SCHED_XACCT_H
 
+/*
+ * Extended task accounting methods:
+ */
+
 #include <linux/sched.h>
 
+#ifdef CONFIG_TASK_XACCT
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.rchar += amt;
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.wchar += amt;
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+	tsk->ioac.syscr++;
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+	tsk->ioac.syscw++;
+}
+#else
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+}
+#endif
+
 #endif /* _LINUX_SCHED_XACCT_H */
-- 
cgit v1.2.3


From 2a1f062a4acf0be50516ceece92a7182a173d55a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: sched/headers: Move signal wakeup & sigpending methods from
 <linux/sched.h> into <linux/sched/signal.h>

This reduces the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 51 ------------------------------------------
 include/linux/sched/signal.h | 53 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1201312e111e..1e0400069e95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1931,37 +1931,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
-static inline int restart_syscall(void)
-{
-	set_tsk_thread_flag(current, TIF_SIGPENDING);
-	return -ERESTARTNOINTR;
-}
-
-static inline int signal_pending(struct task_struct *p)
-{
-	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
-}
-
-static inline int __fatal_signal_pending(struct task_struct *p)
-{
-	return unlikely(sigismember(&p->pending.signal, SIGKILL));
-}
-
-static inline int fatal_signal_pending(struct task_struct *p)
-{
-	return signal_pending(p) && __fatal_signal_pending(p);
-}
-
-static inline int signal_pending_state(long state, struct task_struct *p)
-{
-	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
-		return 0;
-	if (!signal_pending(p))
-		return 0;
-
-	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
@@ -2028,26 +1997,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-/*
- * Reevaluate whether the task has signals pending delivery.
- * Wake the task if so.
- * This is required every time the blocked sigset_t changes.
- * callers must hold sighand->siglock.
- */
-extern void recalc_sigpending_and_wake(struct task_struct *t);
-extern void recalc_sigpending(void);
-
-extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
-
-static inline void signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
-}
-static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
-}
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 30e7ceed3ef6..b3545fb4333a 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -3,10 +3,10 @@
 
 #include <linux/rculist.h>
 #include <linux/signal.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
+#include <linux/cred.h>
 
 /*
  * Types defining task->signal and task->sighand and APIs using them:
@@ -271,6 +271,57 @@ extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
+static inline int restart_syscall(void)
+{
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTNOINTR;
+}
+
+static inline int signal_pending(struct task_struct *p)
+{
+	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+}
+
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+	return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+	return signal_pending(p) && __fatal_signal_pending(p);
+}
+
+static inline int signal_pending_state(long state, struct task_struct *p)
+{
+	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
+		return 0;
+	if (!signal_pending(p))
+		return 0;
+
+	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+}
+
+/*
+ * Reevaluate whether the task has signals pending delivery.
+ * Wake the task if so.
+ * This is required every time the blocked sigset_t changes.
+ * callers must hold sighand->siglock.
+ */
+extern void recalc_sigpending_and_wake(struct task_struct *t);
+extern void recalc_sigpending(void);
+
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on
-- 
cgit v1.2.3


From 74444edaa0b2ef946e846d5ddf1ba90efcd7c200 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:43:54 +0100
Subject: sched/headers: Move the memalloc_noio_*() APIs to <linux/sched/mm.h>

In preparation to remove the <linux/gfp.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 22 ----------------------
 include/linux/sched/mm.h | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e0400069e95..dbc3ff04750a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1468,28 +1468,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
- * __GFP_FS is also cleared as it implies __GFP_IO.
- */
-static inline gfp_t memalloc_noio_flags(gfp_t flags)
-{
-	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~(__GFP_IO | __GFP_FS);
-	return flags;
-}
-
-static inline unsigned int memalloc_noio_save(void)
-{
-	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
-	current->flags |= PF_MEMALLOC_NOIO;
-	return flags;
-}
-
-static inline void memalloc_noio_restore(unsigned int flags)
-{
-	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
-}
-
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 64d83fa8d93a..62dca4e0b4e0 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -147,4 +147,26 @@ static inline bool in_vfork(struct task_struct *tsk)
 	return ret;
 }
 
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+		flags &= ~(__GFP_IO | __GFP_FS);
+	return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+	current->flags |= PF_MEMALLOC_NOIO;
+	return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 3605df49556ebb7641d1f8ec2e7eeecf4dd734bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:12 +0100
Subject: sched/headers: Move task statistics APIs from <linux/sched.h> to
 <linux/sched/stat.h>

There are a number of task statistics related variables and methods exported
via sched.h - collect them into <linux/sched/stat.h> and include it from
their usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 10 ----------
 include/linux/sched/stat.h | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dbc3ff04750a..415baf253517 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,16 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern unsigned long total_forks;
-extern int nr_threads;
-DECLARE_PER_CPU(unsigned long, process_counts);
-extern int nr_processes(void);
-extern unsigned long nr_running(void);
-extern bool single_task_running(void);
-extern unsigned long nr_iowait(void);
-extern unsigned long nr_iowait_cpu(int cpu);
-extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 30fe012aecb0..0d52ceb6d3ae 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,4 +3,22 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various counters maintained by the scheduler and fork(),
+ * exposed via /proc, sys.c or used by drivers via these APIs.
+ *
+ * ( Note that all these values are aquired without locking,
+ *   so they can only be relied on in narrow circumstances. )
+ */
+
+extern unsigned long total_forks;
+extern int nr_threads;
+DECLARE_PER_CPU(unsigned long, process_counts);
+extern int nr_processes(void);
+extern unsigned long nr_running(void);
+extern bool single_task_running(void);
+extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(int cpu);
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
+
 #endif /* _LINUX_SCHED_STAT_H */
-- 
cgit v1.2.3


From 752b3ca734cbc17c0456b846ae886c0ed39c5703 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:12:59 +0100
Subject: sched/headers: Move the NOHZ APIs from <linux/sched.h> to
 <linux/sched/nohz.h>

There's a number of NOHZ/dyntics related functionality in <linux/sched.h>,
but only a handful of timer files are making use of them.

Move them into their own header. This better documents these APIs
and unclutters <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 35 -----------------------------------
 include/linux/sched/nohz.h | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 415baf253517..5f9bfc603d19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,14 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz_start(void);
-extern void cpu_load_update_nohz_stop(void);
-#else
-static inline void cpu_load_update_nohz_start(void) { }
-static inline void cpu_load_update_nohz_stop(void) { }
-#endif
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
@@ -204,15 +196,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void nohz_balance_enter_idle(int cpu);
-extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(void);
-#else
-static inline void nohz_balance_enter_idle(int cpu) { }
-static inline void set_cpu_sd_state_idle(void) { }
-#endif
-
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
@@ -1535,14 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 }
 #endif
 
-#ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
-#else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
-#endif /* CONFIG_NO_HZ_COMMON */
-
 #ifndef cpu_relax_yield
 #define cpu_relax_yield() cpu_relax()
 #endif
@@ -1563,16 +1538,6 @@ extern void idle_task_exit(void);
 static inline void idle_task_exit(void) {}
 #endif
 
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
-extern void wake_up_nohz_cpu(int cpu);
-#else
-static inline void wake_up_nohz_cpu(int cpu) { }
-#endif
-
-#ifdef CONFIG_NO_HZ_FULL
-extern u64 scheduler_tick_max_deferment(void);
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index fe6caf0dab10..9471f0736a3a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -3,4 +3,43 @@
 
 #include <linux/sched.h>
 
+/*
+ * This is the interface between the scheduler and nohz/dyntics:
+ */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
+#else
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void nohz_balance_enter_idle(int cpu);
+extern void set_cpu_sd_state_idle(void);
+extern int get_nohz_timer_target(void);
+#else
+static inline void nohz_balance_enter_idle(int cpu) { }
+static inline void set_cpu_sd_state_idle(void) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_COMMON
+void calc_load_enter_idle(void);
+void calc_load_exit_idle(void);
+#else
+static inline void calc_load_enter_idle(void) { }
+static inline void calc_load_exit_idle(void) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
+#else
+static inline void wake_up_nohz_cpu(int cpu) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+extern u64 scheduler_tick_max_deferment(void);
+#endif
+
 #endif /* _LINUX_SCHED_NOHZ_H */
-- 
cgit v1.2.3


From d3d1e320d43a7bad9603acf0214406a1e8795c63 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:16:09 +0100
Subject: sched/headers: Move debugging functions from <linux/sched.h> to
 <linux/sched/debug.h>

Collect the various scheduler and task state debugging APIs scattered
around <linux/sched.h> into the new <linux/sched/debug.h> header.

In particular the show_regs() and show_stack() prototype affects many files,
update them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 36 -----------------------------------
 include/linux/sched/debug.h | 46 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f9bfc603d19..76a2e522be29 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,15 +46,9 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern void dump_cpu_task(int cpu);
-
 struct seq_file;
 struct cfs_rq;
 struct task_group;
-#ifdef CONFIG_SCHED_DEBUG
-extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
-extern void proc_sched_set_task(struct task_struct *p);
-#endif
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -196,25 +190,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-/*
- * Only dump TASK_* tasks. (0 for all tasks)
- */
-extern void show_state_filter(unsigned long state_filter);
-
-static inline void show_state(void)
-{
-	show_state_filter(0);
-}
-
-extern void show_regs(struct pt_regs *);
-
-/*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
- * task), SP is the stack pointer of the first frame that should be shown in the back
- * trace (or NULL if the entire call-chain of the task should be shown).
- */
-extern void show_stack(struct task_struct *task, unsigned long *sp);
-
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -229,17 +204,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 # define sched_cpu_dying	NULL
 #endif
 
-extern void sched_show_task(struct task_struct *p);
-
-/* Attach to any functions which should be ignored in wchan output. */
-#define __sched		__attribute__((__section__(".sched.text")))
-
-/* Linker adds these: start and end of __sched functions */
-extern char __sched_text_start[], __sched_text_end[];
-
-/* Is this address in the __sched functions? */
-extern int in_sched_functions(unsigned long addr);
-
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 55bc0cd35fd5..853bbef0b47b 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -3,4 +3,50 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various scheduler/task debugging interfaces:
+ */
+
+struct task_struct;
+
+extern void dump_cpu_task(int cpu);
+
+/*
+ * Only dump TASK_* tasks. (0 for all tasks)
+ */
+extern void show_state_filter(unsigned long state_filter);
+
+static inline void show_state(void)
+{
+	show_state_filter(0);
+}
+
+struct pt_regs;
+
+extern void show_regs(struct pt_regs *);
+
+/*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+ * task), SP is the stack pointer of the first frame that should be shown in the back
+ * trace (or NULL if the entire call-chain of the task should be shown).
+ */
+extern void show_stack(struct task_struct *task, unsigned long *sp);
+
+extern void sched_show_task(struct task_struct *p);
+
+#ifdef CONFIG_SCHED_DEBUG
+struct seq_file;
+extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_set_task(struct task_struct *p);
+#endif
+
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched		__attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
+
 #endif /* _LINUX_SCHED_DEBUG_H */
-- 
cgit v1.2.3


From 63cc9d6fca8c220c2406f1376100a9acb55197af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:04:39 +0100
Subject: sched/headers, time/timekeeping: Move the xtime_update() prototype
 from <linux/sched.h> to <linux/time.h>

This was in <linux/sched.h> only for hysterical raisins.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 include/linux/time.h  | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76a2e522be29..5a4fbc76feb4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1579,8 +1579,6 @@ extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 
 #include <asm/current.h>
 
-extern void xtime_update(unsigned long ticks);
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
diff --git a/include/linux/time.h b/include/linux/time.h
index 23f0f5ce3090..4f4ab65b6e61 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,6 +167,8 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
+extern void xtime_update(unsigned long ticks);
+
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
-- 
cgit v1.2.3


From 70b8157e61d0143fb44ae9482557d7aca365da3d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:11:00 +0100
Subject: sched/headers: Move <asm/current.h> include from the middle of
 <linux/sched.h> to the header portion

Linux-0.01 already defined 'current' in the middle of sched.h, so this
is an ancient historical precedent - but still in a modern kernel it
looks a bit weird that we have:

	#include <asm/current.h>

in the middle of the header.

Move it further up. If this was done for some obscure dependency
reasons then we'll trigger and document it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a4fbc76feb4..ac0fed4c3130 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,6 +31,8 @@
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
+#include <asm/current.h>
+
 struct sched_attr;
 struct sched_param;
 
@@ -1577,8 +1579,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-#include <asm/current.h>
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
-- 
cgit v1.2.3


From 0ca0156973a47e689f3bc817e26e15fff3f84eec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 14:52:01 +0100
Subject: sched/headers: Split hotplug CPU interfaces out of <linux/sched.h>
 into <linux/sched/hotplug.h>

Split the CPU hotplug scheduler APIs out of the common header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 15 ---------------
 include/linux/sched/hotplug.h | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac0fed4c3130..25cc0adb3e08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -196,15 +196,6 @@ extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
-extern int sched_cpu_starting(unsigned int cpu);
-extern int sched_cpu_activate(unsigned int cpu);
-extern int sched_cpu_deactivate(unsigned int cpu);
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern int sched_cpu_dying(unsigned int cpu);
-#else
-# define sched_cpu_dying	NULL
-#endif
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
@@ -1498,12 +1489,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-#ifdef CONFIG_HOTPLUG_CPU
-extern void idle_task_exit(void);
-#else
-static inline void idle_task_exit(void) {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index 34670ed24894..c608d3c1ddb8 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -3,4 +3,24 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler interfaces for hotplug CPU support:
+ */
+
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying	NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void idle_task_exit(void);
+#else
+static inline void idle_task_exit(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_HOTPLUG_H */
-- 
cgit v1.2.3


From 901b14bd946a8b7ea211105b6207e082ddd36846 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 15:24:12 +0100
Subject: sched/headers: Move task lifetime APIs from <linux/sched.h> to
 <linux/sched/task.h>

There's a fair amount of task lifetime management (a.k.a fork()/exit())
related APIs in <linux/sched.h>, but only a small fraction of
the users of the generic sched.h header make use of them.

Move these functions to the <linux/sched/task.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 91 -------------------------------------------
 include/linux/sched/task.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25cc0adb3e08..b1677c8db03f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -165,28 +165,10 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-#include <linux/spinlock.h>
-
-/*
- * This serializes "schedule()" and also protects
- * the run-queue from deletions/modifications (but
- * _adding_ to the beginning of the run-queue has
- * a separate lock).
- */
-extern rwlock_t tasklist_lock;
-extern spinlock_t mmlist_lock;
-
 struct task_struct;
 
-#ifdef CONFIG_PROVE_RCU
-extern int lockdep_tasklist_lock_is_held(void);
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
-extern asmlinkage void schedule_tail(struct task_struct *prev);
-extern void init_idle(struct task_struct *idle, int cpu);
-extern void init_idle_bootup_task(struct task_struct *idle);
 
 extern cpumask_var_t cpu_isolated_map;
 
@@ -211,8 +193,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-void __noreturn do_task_dead(void);
-
 struct nsproxy;
 
 /**
@@ -1120,24 +1100,6 @@ struct task_struct {
  */
 };
 
-#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
-extern int arch_task_struct_size __read_mostly;
-#else
-# define arch_task_struct_size (sizeof(struct task_struct))
-#endif
-
-#ifdef CONFIG_VMAP_STACK
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return t->stack_vm_area;
-}
-#else
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return NULL;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -1429,21 +1391,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
@@ -1572,45 +1519,11 @@ extern void wake_up_new_task(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_dead(struct task_struct *p);
-
-extern void proc_caches_init(void);
-
-extern void release_task(struct task_struct * p);
-
-#ifdef CONFIG_HAVE_COPY_THREAD_TLS
-extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
-			struct task_struct *, unsigned long);
-#else
-extern int copy_thread(unsigned long, unsigned long, unsigned long,
-			struct task_struct *);
-
-/* Architectures that haven't opted into copy_thread_tls get the tls argument
- * via pt_regs, so ignore the tls argument passed via C. */
-static inline int copy_thread_tls(
-		unsigned long clone_flags, unsigned long sp, unsigned long arg,
-		struct task_struct *p, unsigned long tls)
-{
-	return copy_thread(clone_flags, sp, arg, p);
-}
-#endif
-extern void flush_thread(void);
-
-#ifdef CONFIG_HAVE_EXIT_THREAD
-extern void exit_thread(struct task_struct *tsk);
-#else
-static inline void exit_thread(struct task_struct *tsk)
-{
-}
-#endif
 
 extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern void do_group_exit(int);
-
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
@@ -1618,10 +1531,6 @@ extern int do_execveat(int, struct filename *,
 		       const char __user * const __user *,
 		       const char __user * const __user *,
 		       int);
-extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
-extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
-struct task_struct *fork_idle(int);
-extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0023c91ff821..e93638a03515 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -1,6 +1,103 @@
 #ifndef _LINUX_SCHED_TASK_H
 #define _LINUX_SCHED_TASK_H
 
+/*
+ * Interface between the scheduler and various task lifetime (fork()/exit())
+ * functionality:
+ */
+
 #include <linux/sched.h>
 
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t mmlist_lock;
+
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+extern asmlinkage void schedule_tail(struct task_struct *prev);
+extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle_bootup_task(struct task_struct *idle);
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special.s = 0;
+	p->rcu_blocked_node = NULL;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
+extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern void sched_dead(struct task_struct *p);
+
+void __noreturn do_task_dead(void);
+
+extern void proc_caches_init(void);
+
+extern void release_task(struct task_struct * p);
+
+#ifdef CONFIG_HAVE_COPY_THREAD_TLS
+extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+			struct task_struct *, unsigned long);
+#else
+extern int copy_thread(unsigned long, unsigned long, unsigned long,
+			struct task_struct *);
+
+/* Architectures that haven't opted into copy_thread_tls get the tls argument
+ * via pt_regs, so ignore the tls argument passed via C. */
+static inline int copy_thread_tls(
+		unsigned long clone_flags, unsigned long sp, unsigned long arg,
+		struct task_struct *p, unsigned long tls)
+{
+	return copy_thread(clone_flags, sp, arg, p);
+}
+#endif
+extern void flush_thread(void);
+
+#ifdef CONFIG_HAVE_EXIT_THREAD
+extern void exit_thread(struct task_struct *tsk);
+#else
+static inline void exit_thread(struct task_struct *tsk)
+{
+}
+#endif
+extern void do_group_exit(int);
+
+extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
+struct task_struct *fork_idle(int);
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+extern int arch_task_struct_size __read_mostly;
+#else
+# define arch_task_struct_size (sizeof(struct task_struct))
+#endif
+
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_H */
-- 
cgit v1.2.3


From 6bfbaa51ed47774492d83d182a86068cc35aa4c6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:37:49 +0100
Subject: sched/headers, RCU: Move rcu_copy_process() from <linux/sched/task.h>
 to kernel/fork.c

Move rcu_copy_process() into kernel/fork.c, which is the only
user of this inline function.

This simplifies <linux/sched/task.h> to the level that <linux/sched.h>
does not have to be included in it anymore - which change is done
in a subsequent patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index e93638a03515..20ed9108f261 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -25,21 +25,6 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
-- 
cgit v1.2.3


From c7af7877eeacfeaaf6a1b6f54c481292ef116837 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:01:58 +0100
Subject: sched/core: Move, sort and clean up <linux/sched.h> structure
 predeclarations

Most of the structure predeclarations were at the head of sched.h, but not
all of them - there were a number of lines spread around sched.h, in
random places.

Move them to the head, and also sort them alphabetically.

Remove unused entries.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 56 +++++++++++++++++++++------------------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1677c8db03f..5398356e33c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,24 +33,35 @@
 
 #include <asm/current.h>
 
-struct sched_attr;
-struct sched_param;
-
-struct futex_pi_state;
-struct robust_list_head;
+/* task_struct member predeclarations: */
+struct audit_context;
+struct autogroup;
+struct backing_dev_info;
 struct bio_list;
-struct fs_struct;
-struct perf_event_context;
 struct blk_plug;
+struct cfs_rq;
 struct filename;
+struct fs_struct;
+struct futex_pi_state;
+struct io_context;
+struct mempolicy;
 struct nameidata;
-
-struct signal_struct;
-struct sighand_struct;
-
+struct nsproxy;
+struct perf_event_context;
+struct pid_namespace;
+struct pipe_inode_info;
+struct rcu_node;
+struct reclaim_state;
+struct robust_list_head;
+struct sched_attr;
+struct sched_param;
 struct seq_file;
-struct cfs_rq;
+struct sighand_struct;
+struct signal_struct;
+struct task_delay_info;
 struct task_group;
+struct task_struct;
+struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -165,8 +176,6 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-struct task_struct;
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
 
@@ -193,8 +202,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-struct nsproxy;
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -297,10 +304,6 @@ struct thread_group_cputimer {
 };
 
 #include <linux/rwsem.h>
-struct autogroup;
-
-struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
@@ -314,8 +317,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-struct task_delay_info;
-
 static inline int sched_info_on(void)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -342,20 +343,12 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-struct io_context;			/* See blkdev.h */
-
-
 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
 extern void prefetch_stack(struct task_struct *t);
 #else
 static inline void prefetch_stack(struct task_struct *t) { }
 #endif
 
-struct audit_context;		/* See audit.c */
-struct mempolicy;
-struct pipe_inode_info;
-struct uts_namespace;
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
@@ -564,7 +557,6 @@ union rcu_special {
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
-struct rcu_node;
 
 enum perf_event_task_context {
 	perf_invalid_context = -1,
@@ -1125,8 +1117,6 @@ static inline struct pid *task_session(struct task_struct *task)
 	return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
-struct pid_namespace;
-
 /*
  * the helpers to get the task's different pids as they are seen
  * from various namespaces
-- 
cgit v1.2.3


From d04b0ad37e4b6ac39a56c823ae76ab37cd044dc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:07:57 +0100
Subject: sched/headers: Move the PREEMPT_COUNT defines from <linux/sched.h> to
 <linux/preempt.h>

These defines are not really part of the scheduler's driver API, but are
related to the preempt count - so move them to <linux/preempt.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 21 +++++++++++++++++++++
 include/linux/sched.h   | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7eeceac52dea..cae461224948 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -55,6 +55,27 @@
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
+#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
+/*
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
+ *
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
+ */
+#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
+
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
 /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
 #include <asm/preempt.h>
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5398356e33c7..3b3e31da416e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -265,27 +265,6 @@ struct task_cputime_atomic {
 		.sum_exec_runtime = ATOMIC64_INIT(0),		\
 	}
 
-#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
-/*
- * Disable preemption until the scheduler is running -- use an unconditional
- * value so that it also works on !PREEMPT_COUNT kernels.
- *
- * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
- */
-#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
-
-/*
- * Initial preempt_count value; reflects the preempt_count schedule invariant
- * which states that during context switches:
- *
- *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
- *
- * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
- * Note: See finish_task_switch().
- */
-#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
-- 
cgit v1.2.3


From f3ac60671954c8d413532627b1be13a76f394c49 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:59:33 +0100
Subject: sched/headers: Move task-stack related APIs from <linux/sched.h> to
 <linux/sched/task_stack.h>

Split out the task->stack related functionality, which is not really
part of the core scheduler APIs.

Only keep task_thread_info() because it's used by sched.h.

Update the code that uses those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 115 +++------------------------------------
 include/linux/sched/task_stack.h | 104 +++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3e31da416e..af9590c8bfb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,6 +1450,15 @@ union thread_union {
 	unsigned long stack[THREAD_SIZE/sizeof(long)];
 };
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+	return &task->thread_info;
+}
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+# define task_thread_info(task)	((struct thread_info *)(task)->stack)
+#endif
+
 #ifndef __HAVE_ARCH_KSTACK_END
 static inline int kstack_end(void *addr)
 {
@@ -1540,112 +1549,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-
-static inline struct thread_info *task_thread_info(struct task_struct *task)
-{
-	return &task->thread_info;
-}
-
-/*
- * When accessing the stack of a non-current task that might exit, use
- * try_get_task_stack() instead.  task_stack_page will return a pointer
- * that could get freed out from under you.
- */
-static inline void *task_stack_page(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#define setup_thread_stack(new,old)	do { } while(0)
-
-static inline unsigned long *end_of_stack(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
-
-#define task_thread_info(task)	((struct thread_info *)(task)->stack)
-#define task_stack_page(task)	((void *)(task)->stack)
-
-static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
-{
-	*task_thread_info(p) = *task_thread_info(org);
-	task_thread_info(p)->task = p;
-}
-
-/*
- * Return the address of the last usable long on the stack.
- *
- * When the stack grows down, this is just above the thread
- * info struct. Going any lower will corrupt the threadinfo.
- *
- * When the stack grows up, this is the highest address.
- * Beyond that position, we corrupt data on the next page.
- */
-static inline unsigned long *end_of_stack(struct task_struct *p)
-{
-#ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
-#else
-	return (unsigned long *)(task_thread_info(p) + 1);
-#endif
-}
-
-#endif
-
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return atomic_inc_not_zero(&tsk->stack_refcount) ?
-		task_stack_page(tsk) : NULL;
-}
-
-extern void put_task_stack(struct task_struct *tsk);
-#else
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return task_stack_page(tsk);
-}
-
-static inline void put_task_stack(struct task_struct *tsk) {}
-#endif
-
-#define task_stack_end_corrupted(task) \
-		(*(end_of_stack(task)) != STACK_END_MAGIC)
-
-static inline int object_is_on_stack(void *obj)
-{
-	void *stack = task_stack_page(current);
-
-	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
-}
-
-extern void thread_stack_cache_init(void);
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-static inline unsigned long stack_not_used(struct task_struct *p)
-{
-	unsigned long *n = end_of_stack(p);
-
-	do { 	/* Skip over canary */
-# ifdef CONFIG_STACK_GROWSUP
-		n--;
-# else
-		n++;
-# endif
-	} while (!*n);
-
-# ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long)end_of_stack(p) - (unsigned long)n;
-# else
-	return (unsigned long)n - (unsigned long)end_of_stack(p);
-# endif
-}
-#endif
-extern void set_task_stack_end_magic(struct task_struct *tsk);
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d2d578e85f7d..aaa5c2a6a0e9 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -1,7 +1,111 @@
 #ifndef _LINUX_SCHED_TASK_STACK_H
 #define _LINUX_SCHED_TASK_STACK_H
 
+/*
+ * task->stack (kernel stack) handling interfaces:
+ */
+
 #include <linux/sched.h>
 #include <linux/magic.h>
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead.  task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#define setup_thread_stack(new,old)	do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+
+#define task_stack_page(task)	((void *)(task)->stack)
+
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+	*task_thread_info(p) = *task_thread_info(org);
+	task_thread_info(p)->task = p;
+}
+
+/*
+ * Return the address of the last usable long on the stack.
+ *
+ * When the stack grows down, this is just above the thread
+ * info struct. Going any lower will corrupt the threadinfo.
+ *
+ * When the stack grows up, this is the highest address.
+ * Beyond that position, we corrupt data on the next page.
+ */
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+#ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
+#else
+	return (unsigned long *)(task_thread_info(p) + 1);
+#endif
+}
+
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return atomic_inc_not_zero(&tsk->stack_refcount) ?
+		task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
+#define task_stack_end_corrupted(task) \
+		(*(end_of_stack(task)) != STACK_END_MAGIC)
+
+static inline int object_is_on_stack(void *obj)
+{
+	void *stack = task_stack_page(current);
+
+	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
+extern void thread_stack_cache_init(void);
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+	unsigned long *n = end_of_stack(p);
+
+	do { 	/* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+		n--;
+# else
+		n++;
+# endif
+	} while (!*n);
+
+# ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
+	return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
+}
+#endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
cgit v1.2.3


From 70806b21e4d64f01193a2b64a053b75ff53a2b10 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: sched/headers: Move the 'root_task_group' declaration to
 <linux/sched/autogroup.h>

Also remove the duplicate declaration from <linux/init_task.h>.

( That declaration was originally duplicated for dependency hell reasons,
  but there's no problem including the much smaller <linux/sched/autogroup.h>
  header now, to pick up the right prototype. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h       | 2 --
 include/linux/sched.h           | 4 ----
 include/linux/sched/autogroup.h | 5 +++++
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f6841c19c913..91d9049f0039 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -151,8 +151,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-extern struct task_group root_task_group;
-
 #ifdef CONFIG_CGROUP_SCHED
 # define INIT_CGROUP_SCHED(tsk)						\
 	.sched_task_group = &root_task_group,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af9590c8bfb0..4934733bd6cb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1707,10 +1707,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-#ifdef CONFIG_CGROUP_SCHED
-extern struct task_group root_task_group;
-#endif /* CONFIG_CGROUP_SCHED */
-
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index 586bdf37330d..fd6855548d0c 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 
 struct signal_struct;
+struct task_group;
 struct seq_file;
 
 #ifdef CONFIG_SCHED_AUTOGROUP
@@ -24,4 +25,8 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 
+#ifdef CONFIG_CGROUP_SCHED
+extern struct task_group root_task_group;
+#endif /* CONFIG_CGROUP_SCHED */
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */
-- 
cgit v1.2.3


From 76960dbf9b235b957d9d730be2c6c2a70f709026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: signals: Move signal data types from <linux/signal.h> to
 <linux/signal_types.h>

Separate out just the pure data types - sched.h will be able to use
this reduced size header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/signal.h       | 54 ----------------------------------------
 include/linux/signal_types.h | 59 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index d1c2b05a7a55..94ad6eea9550 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -8,24 +8,6 @@ struct task_struct;
 
 /* for sysctl */
 extern int print_fatal_signals;
-/*
- * Real Time signals may be queued.
- */
-
-struct sigqueue {
-	struct list_head list;
-	int flags;
-	siginfo_t info;
-	struct user_struct *user;
-};
-
-/* flags values. */
-#define SIGQUEUE_PREALLOC	1
-
-struct sigpending {
-	struct list_head list;
-	sigset_t signal;
-};
 
 #ifndef HAVE_ARCH_COPY_SIGINFO
 
@@ -271,42 +253,6 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct sigaction {
-#ifndef __ARCH_HAS_IRIX_SIGACTION
-	__sighandler_t	sa_handler;
-	unsigned long	sa_flags;
-#else
-	unsigned int	sa_flags;
-	__sighandler_t	sa_handler;
-#endif
-#ifdef __ARCH_HAS_SA_RESTORER
-	__sigrestore_t sa_restorer;
-#endif
-	sigset_t	sa_mask;	/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-#ifdef __ARCH_HAS_KA_RESTORER
-	__sigrestore_t ka_restorer;
-#endif
-};
- 
-#ifdef CONFIG_OLD_SIGACTION
-struct old_sigaction {
-	__sighandler_t sa_handler;
-	old_sigset_t sa_mask;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-};
-#endif
-
-struct ksignal {
-	struct k_sigaction ka;
-	siginfo_t info;
-	int sig;
-};
-
 extern int get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 28799c88f490..16d862a3d8f3 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -1,7 +1,66 @@
 #ifndef _LINUX_SIGNAL_TYPES_H
 #define _LINUX_SIGNAL_TYPES_H
 
+/*
+ * Basic signal handling related data type definitions:
+ */
+
 #include <linux/list.h>
 #include <uapi/linux/signal.h>
 
+/*
+ * Real Time signals may be queued.
+ */
+
+struct sigqueue {
+	struct list_head list;
+	int flags;
+	siginfo_t info;
+	struct user_struct *user;
+};
+
+/* flags values. */
+#define SIGQUEUE_PREALLOC	1
+
+struct sigpending {
+	struct list_head list;
+	sigset_t signal;
+};
+
+struct sigaction {
+#ifndef __ARCH_HAS_IRIX_SIGACTION
+	__sighandler_t	sa_handler;
+	unsigned long	sa_flags;
+#else
+	unsigned int	sa_flags;
+	__sighandler_t	sa_handler;
+#endif
+#ifdef __ARCH_HAS_SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t	sa_mask;	/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+#ifdef __ARCH_HAS_KA_RESTORER
+	__sigrestore_t ka_restorer;
+#endif
+};
+
+#ifdef CONFIG_OLD_SIGACTION
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+#endif
+
+struct ksignal {
+	struct k_sigaction ka;
+	siginfo_t info;
+	int sig;
+};
+
 #endif /* _LINUX_SIGNAL_TYPES_H */
-- 
cgit v1.2.3


From 9e7d2e44dd88ba7e29c165b6fca428e384afa5a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: mm/headers, sched/headers: Move task related MM types from
 <linux/mm_types.> to <linux/mm_types_task.h>

Separate all the MM types that are embedded directly in 'struct task_struct'
into the <linux/mm_types_task.h> header.

The goal is to include this header in <linux/sched.h>, not the full <linux/mm_types.h>
header, to reduce the size, complexity and coupling of <linux/sched.h>.

(This patch does not change <linux/sched.h> yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      | 49 --------------------------------------
 include/linux/mm_types_task.h | 55 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 28bc710d3467..f60f45fe226f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,11 +24,6 @@
 struct address_space;
 struct mem_cgroup;
 
-#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
-#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
-		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
-#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -231,17 +226,6 @@ struct page {
 #endif
 ;
 
-struct page_frag {
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 offset;
-	__u32 size;
-#else
-	__u16 offset;
-	__u16 size;
-#endif
-};
-
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
@@ -360,18 +344,6 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
-/*
- * The per task VMA cache array:
- */
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
-struct vmacache {
-	u32 seqnum;
-	struct vm_area_struct *vmas[VMACACHE_SIZE];
-};
-
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
@@ -383,27 +355,6 @@ struct core_state {
 	struct completion startup;
 };
 
-enum {
-	MM_FILEPAGES,	/* Resident file mapping pages */
-	MM_ANONPAGES,	/* Resident anonymous pages */
-	MM_SWAPENTS,	/* Anonymous swap entries */
-	MM_SHMEMPAGES,	/* Resident shared memory pages */
-	NR_MM_COUNTERS
-};
-
-#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
-#define SPLIT_RSS_COUNTING
-/* per-thread cached information, */
-struct task_rss_stat {
-	int events;	/* for synchronization threshold */
-	int count[NR_MM_COUNTERS];
-};
-#endif /* USE_SPLIT_PTE_PTLOCKS */
-
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
-
 struct kioctx_table;
 struct mm_struct {
 	struct vm_area_struct *mmap;		/* list of VMAs */
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 2419d302f03c..9526d8b9fe0e 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -1,10 +1,65 @@
 #ifndef _LINUX_MM_TYPES_TASK_H
 #define _LINUX_MM_TYPES_TASK_H
 
+/*
+ * Here are the definitions of the MM data types that are embedded in 'struct task_struct'.
+ *
+ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
+ */
+
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
 
 #include <asm/page.h>
 
+#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
+		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
+#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
+
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
+enum {
+	MM_FILEPAGES,	/* Resident file mapping pages */
+	MM_ANONPAGES,	/* Resident anonymous pages */
+	MM_SWAPENTS,	/* Anonymous swap entries */
+	MM_SHMEMPAGES,	/* Resident shared memory pages */
+	NR_MM_COUNTERS
+};
+
+#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
+#define SPLIT_RSS_COUNTING
+/* per-thread cached information, */
+struct task_rss_stat {
+	int events;	/* for synchronization threshold */
+	int count[NR_MM_COUNTERS];
+};
+#endif /* USE_SPLIT_PTE_PTLOCKS */
+
+struct mm_rss_stat {
+	atomic_long_t count[NR_MM_COUNTERS];
+};
+
+struct page_frag {
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 offset;
+	__u32 size;
+#else
+	__u16 offset;
+	__u16 size;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
-- 
cgit v1.2.3


From 77ba809e8b39b4e384df0433e2bd3dd0907dad29 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: sched/headers: Remove the <linux/mm_types.h> dependency from
 <linux/sched.h>

Use the freshly introduced, reduced size <linux/mm_types_task.h> header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4934733bd6cb..3eb284741790 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,7 @@
 #include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/mm_types.h>
+#include <linux/mm_types_task.h>
 #include <asm/ptrace.h>
 
 #include <linux/sem.h>
-- 
cgit v1.2.3


From cdc75e9f7b14f29efcf4b162a3c673733e96db79 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: sched/headers: Move 'init_task' and 'init_thread_union' from
 <linux/sched.h> to <linux/sched/task.h>

'init_task' is really not part of core scheduler APIs but part of
the fork() interface between the scheduler and process management.

So move the declarations.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 3 ---
 include/linux/sched/task.h | 6 ++++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3eb284741790..4ab105a2a8f9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1469,9 +1469,6 @@ static inline int kstack_end(void *addr)
 }
 #endif
 
-extern union thread_union init_thread_union;
-extern struct task_struct init_task;
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 20ed9108f261..1be049a18d1b 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -8,6 +8,9 @@
 
 #include <linux/sched.h>
 
+struct task_struct;
+union thread_union;
+
 /*
  * This serializes "schedule()" and also protects
  * the run-queue from deletions/modifications (but
@@ -17,6 +20,9 @@
 extern rwlock_t tasklist_lock;
 extern spinlock_t mmlist_lock;
 
+extern union thread_union init_thread_union;
+extern struct task_struct init_task;
+
 #ifdef CONFIG_PROVE_RCU
 extern int lockdep_tasklist_lock_is_held(void);
 #endif /* #ifdef CONFIG_PROVE_RCU */
-- 
cgit v1.2.3


From 56cd697366b6d6f67acb6c58ac7f3b185d11ef07 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: sched/headers: Move the task_lock()/unlock() APIs to
 <linux/sched/task.h>

The task_lock()/task_unlock() APIs are not realated to core scheduling,
they are task lifetime APIs, i.e. they belong into <linux/sched/task.h>.

Move them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 20 --------------------
 include/linux/sched/task.h | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ab105a2a8f9..d481c129a822 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1526,26 +1526,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-/*
- * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
- * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
- *
- * Nests both inside and outside of read_lock(&tasklist_lock).
- * It must not be nested with write_lock_irq(&tasklist_lock),
- * neither inside nor outside.
- */
-static inline void task_lock(struct task_struct *p)
-{
-	spin_lock(&p->alloc_lock);
-}
-
-static inline void task_unlock(struct task_struct *p)
-{
-	spin_unlock(&p->alloc_lock);
-}
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 1be049a18d1b..2be9fde588a7 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -91,4 +91,24 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
+/*
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[]. And ->vfork_done.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+ * neither inside nor outside.
+ */
+static inline void task_lock(struct task_struct *p)
+{
+	spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+	spin_unlock(&p->alloc_lock);
+}
+
 #endif /* _LINUX_SCHED_TASK_H */
-- 
cgit v1.2.3


From 1050b27c52f615bc0e772b3119881e5304ccde6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: sched/headers: Move cputime functionality from <linux/sched.h> and
 <linux/cputime.h> into <linux/sched/cputime.h>

Move cputime related functionality out of <linux/sched.h>, as most code
that includes <linux/sched.h> does not use that functionality.

Move data types that are not included in task_struct directly to
the signal definitions, into <linux/sched/signal.h>.

Also merge the (small) existing <linux/cputime.h> header into <linux/sched/cputime.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cputime.h       |  13 ---
 include/linux/sched.h         |  89 ---------------------
 include/linux/sched/cputime.h | 182 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/sched/signal.h  |  33 ++++++++
 4 files changed, 214 insertions(+), 103 deletions(-)
 delete mode 100644 include/linux/cputime.h

(limited to 'include/linux')

diff --git a/include/linux/cputime.h b/include/linux/cputime.h
deleted file mode 100644
index a691dc4ddc13..000000000000
--- a/include/linux/cputime.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_CPUTIME_H
-#define __LINUX_CPUTIME_H
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#include <asm/cputime.h>
-
-#ifndef cputime_to_nsecs
-# define cputime_to_nsecs(__ct)	\
-	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
-#endif
-
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#endif /* __LINUX_CPUTIME_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d481c129a822..2b43f55e4956 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,14 +219,6 @@ struct prev_cputime {
 #endif
 };
 
-static inline void prev_cputime_init(struct prev_cputime *prev)
-{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	prev->utime = prev->stime = 0;
-	raw_spin_lock_init(&prev->lock);
-#endif
-}
-
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in nanoseconds
@@ -248,40 +240,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-/*
- * This is the atomic variant of task_cputime, which can be used for
- * storing and updating task_cputime statistics without locking.
- */
-struct task_cputime_atomic {
-	atomic64_t utime;
-	atomic64_t stime;
-	atomic64_t sum_exec_runtime;
-};
-
-#define INIT_CPUTIME_ATOMIC \
-	(struct task_cputime_atomic) {				\
-		.utime = ATOMIC64_INIT(0),			\
-		.stime = ATOMIC64_INIT(0),			\
-		.sum_exec_runtime = ATOMIC64_INIT(0),		\
-	}
-
-/**
- * struct thread_group_cputimer - thread group interval timer counts
- * @cputime_atomic:	atomic thread group interval timers.
- * @running:		true when there are timers running and
- *			@cputime_atomic receives updates.
- * @checking_timer:	true when a thread in the group is in the
- *			process of checking for thread group timers.
- *
- * This structure contains the version of task_cputime, above, that is
- * used for thread group CPU timer calculations.
- */
-struct thread_group_cputimer {
-	struct task_cputime_atomic cputime_atomic;
-	bool running;
-	bool checking_timer;
-};
-
 #include <linux/rwsem.h>
 
 #ifdef CONFIG_SCHED_INFO
@@ -1234,44 +1192,6 @@ static inline void put_task_struct(struct task_struct *t)
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 struct task_struct *try_get_task_struct(struct task_struct **ptask);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
-			 u64 *utime, u64 *stime);
-extern u64 task_gtime(struct task_struct *t);
-#else
-static inline void task_cputime(struct task_struct *t,
-				u64 *utime, u64 *stime)
-{
-	*utime = t->utime;
-	*stime = t->stime;
-}
-
-static inline u64 task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-#endif
-
-#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	*utimescaled = t->utimescaled;
-	*stimescaled = t->stimescaled;
-}
-#else
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	task_cputime(t, utimescaled, stimescaled);
-}
-#endif
-
-extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-
 /*
  * Per process flags
  */
@@ -1395,9 +1315,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-extern unsigned long long
-task_sched_runtime(struct task_struct *task);
-
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1629,12 +1546,6 @@ static __always_inline bool need_resched(void)
 	return unlikely(tif_need_resched());
 }
 
-/*
- * Thread group CPU time accounting.
- */
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6ed4fe43de28..4c5b9735c1ae 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -2,6 +2,186 @@
 #define _LINUX_SCHED_CPUTIME_H
 
 #include <linux/sched/signal.h>
-#include <linux/cputime.h>
+
+/*
+ * cputime accounting APIs:
+ */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/cputime.h>
+
+#ifndef cputime_to_nsecs
+# define cputime_to_nsecs(__ct)	\
+	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
+#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 u64 *utime, u64 *stime);
+extern u64 task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+				u64 *utime, u64 *stime)
+{
+	*utime = t->utime;
+	*stime = t->stime;
+}
+
+static inline u64 task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	*utimescaled = t->utimescaled;
+	*stimescaled = t->stimescaled;
+}
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	task_cputime(t, utimescaled, stimescaled);
+}
+#endif
+
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+
+
+/*
+ * Thread group CPU time accounting.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+
+
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
+ *
+ * @tsk:	Pointer to target task.
+ */
+#ifdef CONFIG_POSIX_TIMERS
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+
+	/* Check if cputimer isn't running. This is accessed without locking. */
+	if (!READ_ONCE(cputimer->running))
+		return NULL;
+
+	/*
+	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
+	 * in __exit_signal(), we won't account to the signal struct further
+	 * cputime consumed by that task, even though the task can still be
+	 * ticking after __exit_signal().
+	 *
+	 * In order to keep a consistent behaviour between thread group cputime
+	 * and thread group cputimer accounting, lets also ignore the cputime
+	 * elapsing after __exit_signal() in any thread group timer running.
+	 *
+	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
+	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
+	 * clock delta is behind the expiring timer value.
+	 */
+	if (unlikely(!tsk->sighand))
+		return NULL;
+
+	return cputimer;
+}
+#else
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	return NULL;
+}
+#endif
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					   u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.utime);
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+					     u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.stime);
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
+}
+
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	prev->utime = prev->stime = 0;
+	raw_spin_lock_init(&prev->lock);
+#endif
+}
+
+extern unsigned long long
+task_sched_runtime(struct task_struct *task);
 
 #endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b3545fb4333a..2cf446704cd4 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -35,6 +35,39 @@ struct cpu_itimer {
 	u64 incr;
 };
 
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+	atomic64_t utime;
+	atomic64_t stime;
+	atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+	(struct task_cputime_atomic) {				\
+		.utime = ATOMIC64_INIT(0),			\
+		.stime = ATOMIC64_INIT(0),			\
+		.sum_exec_runtime = ATOMIC64_INIT(0),		\
+	}
+/**
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime_atomic:	atomic thread group interval timers.
+ * @running:		true when there are timers running and
+ *			@cputime_atomic receives updates.
+ * @checking_timer:	true when a thread in the group is in the
+ *			process of checking for thread group timers.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU timer calculations.
+ */
+struct thread_group_cputimer {
+	struct task_cputime_atomic cputime_atomic;
+	bool running;
+	bool checking_timer;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
-- 
cgit v1.2.3


From a2d7a7463c38dd14b845721aac3583a26a97c66d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:07:04 +0100
Subject: sched/headers: Move sched_info_on() and force_schedstat_enabled()
 from <linux/sched.h> to <linux/sched/stat.h>

These APIs are not core scheduler but scheduler statistics related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 16 ----------------
 include/linux/sched/stat.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b43f55e4956..707eee099332 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -254,22 +254,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-static inline int sched_info_on(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
-}
-
-#ifdef CONFIG_SCHEDSTATS
-void force_schedstat_enabled(void);
-#endif
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 0d52ceb6d3ae..d8cedf27083e 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -21,4 +21,20 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+	extern int delayacct_on;
+	return delayacct_on;
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 #endif /* _LINUX_SCHED_STAT_H */
-- 
cgit v1.2.3


From 28851755990c832d7050d5e1e2c91ed9d9e6fe59 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:24:31 +0100
Subject: sched/headers, vfs/execve: Move the do_execve*() prototypes from
 <linux/sched.h> to <linux/binfmts.h>

These are not scheduler related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/binfmts.h | 10 ++++++++++
 include/linux/sched.h   |  8 --------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1303b570b18c..05488da3aee9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -6,6 +6,8 @@
 #include <asm/exec.h>
 #include <uapi/linux/binfmts.h>
 
+struct filename;
+
 #define CORENAME_MAX_SIZE 128
 
 /*
@@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
+extern int do_execve(struct filename *,
+		     const char __user * const __user *,
+		     const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+		       const char __user * const __user *,
+		       const char __user * const __user *,
+		       int);
+
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 707eee099332..5f2267e41fde 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,14 +1400,6 @@ extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
-- 
cgit v1.2.3


From 9049863a32fad5d7158318e45f0a41a0f2192c0c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:31:22 +0100
Subject: sched/headers: Move kstack_end() from <linux/sched.h> to
 <linux/sched/task_stack.h>

This is a task-stack related API, not core scheduler functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 10 ----------
 include/linux/sched/task_stack.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f2267e41fde..309eb76b7eab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1360,16 +1360,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-#ifndef __HAVE_ARCH_KSTACK_END
-static inline int kstack_end(void *addr)
-{
-	/* Reliable end of stack detection:
-	 * Some APM bios versions misalign the stack
-	 */
-	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
-}
-#endif
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index aaa5c2a6a0e9..df6ea6665b31 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -108,4 +108,14 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
 
+#ifndef __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	/* Reliable end of stack detection:
+	 * Some APM bios versions misalign the stack
+	 */
+	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
cgit v1.2.3


From 42011db0ed5a9c92b1281e1300eb3d026f3764a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:35:41 +0100
Subject: sched/headers: Move exit_files() and exit_itimers() from
 <linux/sched.h> to <linux/sched/task.h>

These two functions are task management related, not core scheduler APIs.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 4 ----
 include/linux/sched/task.h | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 309eb76b7eab..4a03c49e3bcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1386,10 +1386,6 @@ extern void wake_up_new_task(struct task_struct *tsk);
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
-extern void exit_files(struct task_struct *);
-
-extern void exit_itimers(struct signal_struct *);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 2be9fde588a7..a33a8121da9a 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -67,6 +67,9 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 extern void do_group_exit(int);
 
+extern void exit_files(struct task_struct *);
+extern void exit_itimers(struct signal_struct *);
+
 extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-- 
cgit v1.2.3


From c5a21921d55a2aee9d1e031a78cef6cda3eab78b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: sched/headers: Move _init() prototypes from <linux/sched.h> to
 <linux/sched/init.h>

trap_init() and cpu_init() belong into <linux/cpu.h>, sched_init*() into <linux/sched/init.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 5 -----
 include/linux/sched/init.h | 7 +++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a03c49e3bcc..0fb56ae0abc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,15 +176,10 @@ struct uts_namespace;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-extern void sched_init(void);
-extern void sched_init_smp(void);
-
 extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void cpu_init (void);
-extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index 15e46313e55e..f31d16075857 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler init related prototypes:
+ */
+
+extern void sched_init(void);
+extern void sched_init_smp(void);
+
 #endif /* _LINUX_SCHED_INIT_H */
-- 
cgit v1.2.3


From 93b5a9a7051e51ce50109046af0235268a261ba0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:53:36 +0100
Subject: sched/headers, timekeeping: Move the timer tick function prototypes
 to <linux/timekeeping.h>

Move the update_process_times() and xtime_update() prototypes to <linux/timekeeping.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 1 -
 include/linux/time.h        | 2 --
 include/linux/timekeeping.h | 4 ++++
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0fb56ae0abc7..dc8d94dc140f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -180,7 +180,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
diff --git a/include/linux/time.h b/include/linux/time.h
index 4f4ab65b6e61..23f0f5ce3090 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,8 +167,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
-extern void xtime_update(unsigned long ticks);
-
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index d2e804e15c3e..b598cbc7b576 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -8,6 +8,10 @@
 void timekeeping_init(void);
 extern int timekeeping_suspended;
 
+/* Architecture timer tick functions: */
+extern void update_process_times(int user);
+extern void xtime_update(unsigned long ticks);
+
 /*
  * Get and set timeofday
  */
-- 
cgit v1.2.3


From dcc2dc45f7cf267d37843059ff75b70260596c69 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:05:49 +0100
Subject: sched/headers, mm: Move 'struct tlbflush_unmap_batch' from
 <linux/sched.h> to <linux/mm_types_task.h>

Unclutter <linux/sched.h> some more.

Also move the CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH condition inside the
structure body definition, to remove a pair of #ifdefs from sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types_task.h | 22 ++++++++++++++++++++++
 include/linux/sched.h         | 21 ---------------------
 2 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 9526d8b9fe0e..136dfdf63ba1 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
+#include <linux/cpumask.h>
 
 #include <asm/page.h>
 
@@ -62,4 +63,25 @@ struct page_frag {
 #endif
 };
 
+/* Track pages that require TLB flushes */
+struct tlbflush_unmap_batch {
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	/*
+	 * Each bit set is a CPU that potentially has a TLB entry for one of
+	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+	 */
+	struct cpumask cpumask;
+
+	/* True if any bit in cpumask is set */
+	bool flush_required;
+
+	/*
+	 * If true then the PTE was dirty when unmapped. The entry must be
+	 * flushed before IO is initiated or a stale TLB entry potentially
+	 * allows an update without redirtying the page.
+	 */
+	bool writable;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc8d94dc140f..8d6b7aa7f3ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -484,25 +484,6 @@ struct wake_q_node {
 	struct wake_q_node *next;
 };
 
-/* Track pages that require TLB flushes */
-struct tlbflush_unmap_batch {
-	/*
-	 * Each bit set is a CPU that potentially has a TLB entry for one of
-	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
-	 */
-	struct cpumask cpumask;
-
-	/* True if any bit in cpumask is set */
-	bool flush_required;
-
-	/*
-	 * If true then the PTE was dirty when unmapped. The entry must be
-	 * flushed before IO is initiated or a stale TLB entry potentially
-	 * allows an update without redirtying the page.
-	 */
-	bool writable;
-};
-
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -895,9 +876,7 @@ struct task_struct {
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	struct tlbflush_unmap_batch tlb_ubc;
-#endif
 
 	struct rcu_head rcu;
 
-- 
cgit v1.2.3


From cda66725c1444db67127115d611c982b62b45d8c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:30:50 +0100
Subject: sched/headers: Move the get_task_struct()/put_task_struct() and
 related APIs from <linux/sched.h> to <linux/sched/task.h>

These belong into the task lifetime API header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 14 --------------
 include/linux/sched/task.h | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d6b7aa7f3ac..b68358c60560 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1135,20 +1135,6 @@ static inline int is_global_init(struct task_struct *tsk)
 
 extern struct pid *cad_pid;
 
-extern void free_task(struct task_struct *tsk);
-#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-
-extern void __put_task_struct(struct task_struct *t);
-
-static inline void put_task_struct(struct task_struct *t)
-{
-	if (atomic_dec_and_test(&t->usage))
-		__put_task_struct(t);
-}
-
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 /*
  * Per process flags
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a33a8121da9a..3886ae64148f 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -75,6 +75,21 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
+extern void free_task(struct task_struct *tsk);
+
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+	if (atomic_dec_and_test(&t->usage))
+		__put_task_struct(t);
+}
+
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
-- 
cgit v1.2.3


From 6f175fc9536355d8aa5c2d4854848a97c244a031 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:12:45 +0100
Subject: sched/headers: Move the sched_exec() prototype to
 <linux/sched/task.h>

sched_exec() better fits into the task lifetime APIs than into the core scheduler
APIs.

This reduces the size of <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/task.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b68358c60560..bd89fc17767c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1258,13 +1258,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/* sched_exec is called by processes performing an exec */
-#ifdef CONFIG_SMP
-extern void sched_exec(void);
-#else
-#define sched_exec()   {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3886ae64148f..a978d7189cfd 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -77,6 +77,13 @@ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void free_task(struct task_struct *tsk);
 
+/* sched_exec is called by processes performing an exec */
+#ifdef CONFIG_SMP
+extern void sched_exec(void);
+#else
+#define sched_exec()   {}
+#endif
+
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct(struct task_struct *t);
-- 
cgit v1.2.3


From c03cb28e7c44055cd00d11b8581b9fa46a3e3764 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/topology.h>

The <linux/sched/topology.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index b3550b36e65d..0d6fceff37bb 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
-#include <linux/sched.h>
-
 #include <linux/sched/idle.h>
 
 /*
-- 
cgit v1.2.3


From f411229e1dd623a3dee623824d094546789977e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:49:05 +0100
Subject: sched/headers: Remove tsk_is_polling()

It's not used by anything.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/idle.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 66ee32f87f0b..5ca63ebad6b4 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -17,10 +17,6 @@ extern void wake_up_if_idle(int cpu);
  * polling state.
  */
 #ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
 
 static inline void __current_set_polling(void)
 {
@@ -59,7 +55,6 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 
 #else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
 static inline void __current_set_polling(void) { }
 static inline void __current_clr_polling(void) { }
 
-- 
cgit v1.2.3


From ea94763950e49d1aa622c59218ee2fc3b434ba6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/clock.h>

The <linux/sched/clock.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/clock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index ac12f71d359c..4a68c6791207 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CLOCK_H
 #define _LINUX_SCHED_CLOCK_H
 
-#include <linux/sched.h>
+#include <linux/smp.h>
 
 /*
  * Do not use outside of architecture code which knows its limitations.
-- 
cgit v1.2.3


From cc689c5b352d4a7510b11c975c5c94d4785b37e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: sched/headers: Remove <linux/sched.h> and <linux/slab.h> from
 <linux/delayacct.h>

The <linux/delayacct.h> file is a self-contained header and users of
it either don't need <linux/sched.h> and <linux/slab.h> - or have
already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h    | 2 --
 include/linux/fault-inject.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 6b769cbd1000..4178d2493547 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,8 +18,6 @@
 #define _LINUX_DELAYACCT_H
 
 #include <uapi/linux/taskstats.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 
 /*
  * Per-task flags relevant to delay accounting
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f4956d8601c..728d4e0292aa 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
 
 #endif /* CONFIG_FAULT_INJECTION */
 
+struct kmem_cache;
+
 #ifdef CONFIG_FAILSLAB
 extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
-- 
cgit v1.2.3


From 5a2d6880f461faa416c0d329d46a128cf342c1eb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/loadavg.h>

The <linux/sched/loadavg.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/loadavg.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index c392e28ce0ac..4264bc6b2c27 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_LOADAVG_H
 #define _LINUX_SCHED_LOADAVG_H
 
-#include <linux/sched.h>
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
-- 
cgit v1.2.3


From 5fd73157bdfb57370b69be7c0067f08e610e7daa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/autogroup.h>

The <linux/sched/autogroup.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Add a 'task_struct' predeclaration to make it build standalone.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/autogroup.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index fd6855548d0c..55cd496df884 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SCHED_AUTOGROUP_H
 #define _LINUX_SCHED_AUTOGROUP_H
 
-#include <linux/sched.h>
-
 struct signal_struct;
+struct task_struct;
 struct task_group;
 struct seq_file;
 
-- 
cgit v1.2.3


From b8d6d80b37a98e3910f1b4e799f8ebea88e94bfa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/mm.h>

The <linux/sched/mm.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include kernel.h and atomic.h.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 62dca4e0b4e0..830953ebb391 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_MM_H
 #define _LINUX_SCHED_MM_H
 
+#include <linux/kernel.h>
+#include <linux/atomic.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
-- 
cgit v1.2.3


From ae1cc8823204bb938d039afa43c28a84591ada9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/coredump.h>

The <linux/sched/coredump.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include <linux/mm_types.h>.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/coredump.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index f46912aa4f4c..69eedcef8f03 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_SCHED_COREDUMP_H
 #define _LINUX_SCHED_COREDUMP_H
 
-#include <linux/sched.h>
 #include <linux/mm_types.h>
 
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-- 
cgit v1.2.3


From 556725839e543bc2b45bd73121c0b1c1d6c23714 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:31:24 +0100
Subject: sched/headers: Remove unused 'task_can_switch_user()' prototype

The function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd89fc17767c..43ed34ccb53a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1509,9 +1509,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern int task_can_switch_user(struct user_struct *up,
-					struct task_struct *tsk);
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
-- 
cgit v1.2.3


From de8deb0ad79f8a49cea5110c006c8676a11611f7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/user.h>

If we add <linux/uidgid.h> then <linux/sched/user.h> becomes a
self-contained header and users of it either don't need <linux/sched.h>
or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/user.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 40c6363da5ef..5d5415e129d4 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_SCHED_USER_H
 #define _LINUX_SCHED_USER_H
 
-#include <linux/sched.h>
+#include <linux/uidgid.h>
+#include <linux/atomic.h>
 
 struct key;
 
-- 
cgit v1.2.3


From ac4e620967bb72f86371154935aa8b67cf54e225 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:44:31 +0100
Subject: sched/headers: Remove #include <linux/capability.h> from
 <linux/sched.h>

The <linux/sched.h> header does not actually make use of any
types or APIs defined in <linux/capability.h>, so remove its inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43ed34ccb53a..54eefb2ad603 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,7 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-#include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>
-- 
cgit v1.2.3


From aa829c7679a13fca667f772db1f0ea03adc29122 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:54:05 +0100
Subject: sched/headers: Remove <linux/cgroup-defs.h> from <linux/sched.h>

It's not used by anything in <linux/sched.h> anymore.

This reduces the preprocessed size of <linux/sched.h> and
speeds up the build a bit.

Also fix code that implicitly relied on headers included by <linux/cgroup-defs.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54eefb2ad603..8991e4d7cd0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,7 +28,6 @@
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
-#include <linux/cgroup-defs.h>
 
 #include <asm/current.h>
 
-- 
cgit v1.2.3


From ed53742d793bd92ed32114081370b199dc94467d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/cpufreq.h>

Make the <linux/sched/cpufreq.h> file a self-contained header and
remove the <linux/sched.h> dependency: users of it either don't
need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 2bdcfbfc4c30..d2be2ccbb372 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CPUFREQ_H
 #define _LINUX_SCHED_CPUFREQ_H
 
-#include <linux/sched.h>
+#include <linux/types.h>
 
 /*
  * Interface between cpufreq drivers and the scheduler:
-- 
cgit v1.2.3


From 71af2ed5eeea639339e3a1497a0196bab7de4b57 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: kasan, sched/headers: Remove <linux/sched.h> from <linux/kasan.h>

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 7793036eac80..ceb3fe78a0d3 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
-#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
-- 
cgit v1.2.3


From e26512fea5bcd6602dbf02a551ed073cd4529449 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: sched/headers: Remove <linux/cred.h> inclusion from <linux/sched.h>

This reduces header dependencies and speeds up the build.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8991e4d7cd0f..1be69735cef3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -24,7 +24,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/cred.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
-- 
cgit v1.2.3


From f780d89a0e820a529cf91fb78b52565e1b37b774 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:03:42 +0100
Subject: sched/headers: Remove <asm/ptrace.h> from <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1be69735cef3..9dfa9c113570 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,6 @@
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>
-#include <asm/ptrace.h>
 
 #include <linux/sem.h>
 #include <linux/shm.h>
-- 
cgit v1.2.3


From 9c6da18109d603a99915b257929c0370c9d3ae40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: sched/headers: Remove <linux/rtmutex.h> from <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9dfa9c113570..cc5c99a27e21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -16,7 +16,6 @@
 #include <linux/pid.h>
 #include <linux/seccomp.h>
 #include <linux/rculist.h>
-#include <linux/rtmutex.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>
-- 
cgit v1.2.3


From f0a0eb699967f4f51567b563818bafe4017bef73 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: sched/headers: Remove the <linux/gfp.h> include from <linux/sched.h>

This reduces sched.h header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc5c99a27e21..ab80596dddfd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 
-- 
cgit v1.2.3


From dc1995392d79b0eeee147a792482b991f74c1494 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/stat.h>

The <linux/sched/stat.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

( Keep the <linux/percpu.h> dependency.)

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index d8cedf27083e..141b74c53fad 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_STAT_H
 #define _LINUX_SCHED_STAT_H
 
-#include <linux/sched.h>
+#include <linux/percpu.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),
-- 
cgit v1.2.3


From b0145d9b332a5117bd0abbb980ab89586a1d5f6c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/nohz.h>

The <linux/sched/nohz.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 9471f0736a3a..4995b717500b 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_NOHZ_H
 #define _LINUX_SCHED_NOHZ_H
 
-#include <linux/sched.h>
-
 /*
  * This is the interface between the scheduler and nohz/dyntics:
  */
-- 
cgit v1.2.3


From 4f079e98a0db5f067c0981a526ff8938e21c52e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/debug.h>

The <linux/sched/debug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/debug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 853bbef0b47b..e0eaee54c5a4 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_DEBUG_H
 #define _LINUX_SCHED_DEBUG_H
 
-#include <linux/sched.h>
-
 /*
  * Various scheduler/task debugging interfaces:
  */
-- 
cgit v1.2.3


From a906086ef35129d522047a296e7c4237af75fdcb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/hotplug.h>

The <linux/sched/hotplug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/hotplug.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index c608d3c1ddb8..752ac7e628d7 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_HOTPLUG_H
 #define _LINUX_SCHED_HOTPLUG_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler interfaces for hotplug CPU support:
  */
-- 
cgit v1.2.3


From fae3c30c2d1ae3ff93877f64e5d55e2443d8f82f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 12:00:08 +0100
Subject: sched/headers: Remove the runqueue_is_locked() prototype

Remove the runqueue_is_locked() prototype, the function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab80596dddfd..ac98255d00fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -172,8 +172,6 @@ struct uts_namespace;
 
 extern cpumask_var_t cpu_isolated_map;
 
-extern int runqueue_is_locked(int cpu);
-
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-- 
cgit v1.2.3


From cd9c513be34ceaae8bf211474b91b6897574efdd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:58 +0100
Subject: sched/headers: Remove <linux/rwsem.h> from <linux/sched.h>

This is a stray header that is not needed by anything in sched.h,
so remove it.

Update files that relied on the stray inclusion.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 2 --
 include/linux/user_namespace.h | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac98255d00fb..b361f881fe44 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -226,8 +226,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#include <linux/rwsem.h>
-
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
 	/* cumulative counters */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 08264641b502..faa9bfb827da 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>
 
-- 
cgit v1.2.3


From 192c9414516e7178a44f9ed48d47501c4c19771c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: sched/headers: Remove <linux/signal.h> from <linux/sched.h>

Instead of including the full <linux/signal.h>, only include the types-only
<linux/signal_types.h> header in <linux/sched.h>, to further decouple the
scheduler header from the signal headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b361f881fe44..905d4f148d65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -11,7 +11,6 @@
 
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>
-- 
cgit v1.2.3


From 1d1954e038435765a623884b626731784cde768f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:16:15 +0100
Subject: sched/headers: Remove the 'init_pid_ns' prototype from
 <linux/sched.h>

pid.h already defines it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 905d4f148d65..057b2a85ecc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1301,8 +1301,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-extern struct pid_namespace init_pid_ns;
-
 /*
  * find a task by one of its numerical ids
  *
-- 
cgit v1.2.3


From b68070e146b9c2b4ece8d869a4fab9a4f14bbfb4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: sched/headers: Remove <linux/rculist.h> from <linux/sched.h>

We don't actually need the full rculist.h header anymore, include
the smaller rcupdate.h header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 057b2a85ecc7..5720d11f3224 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -14,7 +14,7 @@
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>
-#include <linux/rculist.h>
+#include <linux/rcupdate.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>
-- 
cgit v1.2.3


From 2c873d55cd838deef8218b6d5fe9bd336cb3113a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:11:03 +0100
Subject: sched/core: Remove unused prefetch_stack()

prefetch_stack() is defined by IA64, but not actually used anywhere anymore.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5720d11f3224..bd0111a06aa2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -247,12 +247,6 @@ struct sched_info {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
-extern void prefetch_stack(struct task_struct *t);
-#else
-static inline void prefetch_stack(struct task_struct *t) { }
-#endif
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
-- 
cgit v1.2.3


From 5c0d0f36414f9f8a292b42e797f9284b127d79c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:59 +0100
Subject: sched/headers: Remove <linux/sched.h> from <linux/sched/init.h>

The <linux/sched/init.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/init.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index f31d16075857..127215045285 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_INIT_H
 #define _LINUX_SCHED_INIT_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler init related prototypes:
  */
-- 
cgit v1.2.3


From 50ff9d130014f7b19541dbf607a615a72b75b778 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: sched/headers: Remove <linux/magic.h> from <linux/sched/task_stack.h>

It's not used by any of the scheduler methods, but <linux/sched/task_stack.h>
needs it to pick up STACK_END_MAGIC.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd0111a06aa2..559be4f1aee5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/topology.h>
-#include <linux/magic.h>
 
 #include <asm/current.h>
 
-- 
cgit v1.2.3


From b2d5bfea2d00a0000da18f7667c2b0e2c2f168d9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:56:40 +0100
Subject: sched/headers, timers: Remove the <linux/sysctl.h> include from
 <linux/timer.h>

So we want to simplify <linux/sched.h>'s header dependencies, but one
roadblock of that is <linux/timer.h>'s inclusion of sysctl.h,
which brings in other, problematic headers.

Note that timer.h's inclusion of sysctl.h can be avoided if we
pre-declare ctl_table - so do that.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/timer.h          | 2 +-
 include/linux/user_namespace.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index c7bdf895179c..e6789b8757d5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -212,7 +212,7 @@ struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-#include <linux/sysctl.h>
+struct ctl_table;
 
 extern unsigned int sysctl_timer_migration;
 int timer_migration_handler(struct ctl_table *table, int write,
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index faa9bfb827da..be765234c0a2 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/workqueue.h>
 #include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>
-- 
cgit v1.2.3


From 283cb90305cf1686594ed537c7a8cb188eba1a4d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:59:47 +0100
Subject: sched/headers, hrtimer: Remove the <linux/wait.h> include from
 <linux/hrtimer.h>

In our quest to simplify <linux/sched.h>'s header dependencies, remove
the <linux/wait.h> inclusion from <linux/hrtimer.h> - which does
not appear to be necessary, as hrtimer.h does not use waitqueues.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e52b427223ba..249e579ecd4c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -19,7 +19,6 @@
 #include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
-- 
cgit v1.2.3


From ee6a3d19f15b9b10075481088b8d4537f286d7b4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:01:09 +0100
Subject: sched/headers: Remove the <linux/topology.h> include from
 <linux/sched.h>

It's used only by a single (rarely used) inline function (task_node(p)),
which we can move to <linux/sched/topology.h>.

( Add <linux/nodemask.h>, because we rely on that. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 7 +------
 include/linux/sched/topology.h | 7 +++++++
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 559be4f1aee5..f9dc9cfaf079 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/sched.h>
 
 #include <linux/sched/prio.h>
+#include <linux/nodemask.h>
 
 #include <linux/mutex.h>
 #include <linux/plist.h>
@@ -21,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/topology.h>
 
 #include <asm/current.h>
 
@@ -1454,11 +1454,6 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 #endif
 }
 
-static inline int task_node(const struct task_struct *p)
-{
-	return cpu_to_node(task_cpu(p));
-}
-
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
 #else
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d6fceff37bb..7d065abc7a47 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
+#include <linux/topology.h>
+
 #include <linux/sched/idle.h>
 
 /*
@@ -216,4 +218,9 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
 
 #endif	/* !CONFIG_SMP */
 
+static inline int task_node(const struct task_struct *p)
+{
+	return cpu_to_node(task_cpu(p));
+}
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
cgit v1.2.3


From 7f5f8e8d97d77edf33f2836259d1f19c6f4d94f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:44:12 +0100
Subject: sched/headers: Remove #ifdefs from <linux/sched.h>

We can remove two pairs of #ifdefs by defining structures in a smarter way.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9dc9cfaf079..341f5792fbe0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -224,8 +224,8 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#ifdef CONFIG_SCHED_INFO
 struct sched_info {
+#ifdef CONFIG_SCHED_INFO
 	/* cumulative counters */
 	unsigned long pcount;	      /* # of times run on this cpu */
 	unsigned long long run_delay; /* time spent waiting on a runqueue */
@@ -233,8 +233,8 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-};
 #endif /* CONFIG_SCHED_INFO */
+};
 
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
@@ -309,8 +309,8 @@ struct sched_avg {
 	unsigned long load_avg, util_avg;
 };
 
-#ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
+#ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
 	u64			wait_count;
@@ -342,8 +342,8 @@ struct sched_statistics {
 	u64			nr_wakeups_affine_attempts;
 	u64			nr_wakeups_passive;
 	u64			nr_wakeups_idle;
-};
 #endif
+};
 
 struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
@@ -358,9 +358,7 @@ struct sched_entity {
 
 	u64			nr_migrations;
 
-#ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
-#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	int			depth;
@@ -530,9 +528,7 @@ struct task_struct {
 	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-#ifdef CONFIG_SCHED_INFO
 	struct sched_info sched_info;
-#endif
 
 	struct list_head tasks;
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 5eca1c10cbaa9c366c18ca79f81f21c731e3dcc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 22:06:35 +0100
Subject: sched/headers: Clean up <linux/sched.h>

Now that <linux/sched.h> dependencies have been sorted out,
do various trivial cleanups:

 - remove unnecessary structure predeclarations
 - fix various typos
 - update comments where necessary
 - remove pointless comments
 - use consistent types
 - tabulate consistently
 - use a consistent comment style
 - clean up the header section a bit
 - use a consistent style of a single field per line
 - remove line-breaks where they make the code look worse
 - etc ...

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1222 ++++++++++++++++++++++++++-----------------------
 1 file changed, 651 insertions(+), 571 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 341f5792fbe0..d67eee84fd43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1,38 +1,38 @@
 #ifndef _LINUX_SCHED_H
 #define _LINUX_SCHED_H
 
-#include <uapi/linux/sched.h>
+/*
+ * Define 'struct task_struct' and provide the main scheduler
+ * APIs (schedule(), wakeup variants, etc.)
+ */
 
-#include <linux/sched/prio.h>
-#include <linux/nodemask.h>
+#include <uapi/linux/sched.h>
 
-#include <linux/mutex.h>
-#include <linux/plist.h>
-#include <linux/mm_types_task.h>
+#include <asm/current.h>
 
+#include <linux/pid.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal_types.h>
-#include <linux/pid.h>
+#include <linux/kcov.h>
+#include <linux/mutex.h>
+#include <linux/plist.h>
+#include <linux/hrtimer.h>
 #include <linux/seccomp.h>
+#include <linux/nodemask.h>
 #include <linux/rcupdate.h>
-
 #include <linux/resource.h>
-#include <linux/hrtimer.h>
-#include <linux/kcov.h>
-#include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
+#include <linux/sched/prio.h>
+#include <linux/signal_types.h>
+#include <linux/mm_types_task.h>
+#include <linux/task_io_accounting.h>
 
-#include <asm/current.h>
-
-/* task_struct member predeclarations: */
+/* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
-struct autogroup;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct cfs_rq;
-struct filename;
 struct fs_struct;
 struct futex_pi_state;
 struct io_context;
@@ -52,8 +52,6 @@ struct sighand_struct;
 struct signal_struct;
 struct task_delay_info;
 struct task_group;
-struct task_struct;
-struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -65,50 +63,53 @@ struct uts_namespace;
  * modifying one set can't modify the other one by
  * mistake.
  */
-#define TASK_RUNNING		0
-#define TASK_INTERRUPTIBLE	1
-#define TASK_UNINTERRUPTIBLE	2
-#define __TASK_STOPPED		4
-#define __TASK_TRACED		8
-/* in tsk->exit_state */
-#define EXIT_DEAD		16
-#define EXIT_ZOMBIE		32
-#define EXIT_TRACE		(EXIT_ZOMBIE | EXIT_DEAD)
-/* in tsk->state again */
-#define TASK_DEAD		64
-#define TASK_WAKEKILL		128
-#define TASK_WAKING		256
-#define TASK_PARKED		512
-#define TASK_NOLOAD		1024
-#define TASK_NEW		2048
-#define TASK_STATE_MAX		4096
-
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
-
-/* Convenience macros for the sake of set_current_state */
-#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-#define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
-
-#define TASK_IDLE		(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
-
-/* Convenience macros for the sake of wake_up */
-#define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
-
-/* get_task_state() */
-#define TASK_REPORT		(TASK_RUNNING | TASK_INTERRUPTIBLE | \
-				 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-				 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
-
-#define task_is_traced(task)	((task->state & __TASK_TRACED) != 0)
-#define task_is_stopped(task)	((task->state & __TASK_STOPPED) != 0)
-#define task_is_stopped_or_traced(task)	\
-			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#define task_contributes_to_load(task)	\
-				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FROZEN) == 0 && \
-				 (task->state & TASK_NOLOAD) == 0)
+
+/* Used in tsk->state: */
+#define TASK_RUNNING			0
+#define TASK_INTERRUPTIBLE		1
+#define TASK_UNINTERRUPTIBLE		2
+#define __TASK_STOPPED			4
+#define __TASK_TRACED			8
+/* Used in tsk->exit_state: */
+#define EXIT_DEAD			16
+#define EXIT_ZOMBIE			32
+#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
+/* Used in tsk->state again: */
+#define TASK_DEAD			64
+#define TASK_WAKEKILL			128
+#define TASK_WAKING			256
+#define TASK_PARKED			512
+#define TASK_NOLOAD			1024
+#define TASK_NEW			2048
+#define TASK_STATE_MAX			4096
+
+#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
+
+/* Convenience macros for the sake of set_current_state: */
+#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
+
+#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
+/* Convenience macros for the sake of wake_up(): */
+#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL			(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+/* get_task_state(): */
+#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
+					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+
+#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
+
+#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
+
+#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+
+#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
+					 (task->flags & PF_FROZEN) == 0 && \
+					 (task->state & TASK_NOLOAD) == 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
@@ -158,26 +159,24 @@ struct uts_namespace;
  *
  * Also see the comments of try_to_wake_up().
  */
-#define __set_current_state(state_value)		\
-	do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)			\
-	smp_store_mb(current->state, (state_value))
-
+#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value))
 #endif
 
-/* Task command name length */
-#define TASK_COMM_LEN 16
+/* Task command name length: */
+#define TASK_COMM_LEN			16
 
-extern cpumask_var_t cpu_isolated_map;
+extern cpumask_var_t			cpu_isolated_map;
 
 extern void scheduler_tick(void);
 
-#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long schedule_timeout(signed long timeout);
-extern signed long schedule_timeout_interruptible(signed long timeout);
-extern signed long schedule_timeout_killable(signed long timeout);
-extern signed long schedule_timeout_uninterruptible(signed long timeout);
-extern signed long schedule_timeout_idle(signed long timeout);
+#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
+
+extern long schedule_timeout(long timeout);
+extern long schedule_timeout_interruptible(long timeout);
+extern long schedule_timeout_killable(long timeout);
+extern long schedule_timeout_uninterruptible(long timeout);
+extern long schedule_timeout_idle(long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
@@ -197,9 +196,9 @@ extern void io_schedule(void);
  */
 struct prev_cputime {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	u64 utime;
-	u64 stime;
-	raw_spinlock_t lock;
+	u64				utime;
+	u64				stime;
+	raw_spinlock_t			lock;
 #endif
 };
 
@@ -214,25 +213,34 @@ struct prev_cputime {
  * these counts together and treat all three of them in parallel.
  */
 struct task_cputime {
-	u64 utime;
-	u64 stime;
-	unsigned long long sum_exec_runtime;
+	u64				utime;
+	u64				stime;
+	unsigned long long		sum_exec_runtime;
 };
 
-/* Alternate field names when used to cache expirations. */
-#define virt_exp	utime
-#define prof_exp	stime
-#define sched_exp	sum_exec_runtime
+/* Alternate field names when used on cache expirations: */
+#define virt_exp			utime
+#define prof_exp			stime
+#define sched_exp			sum_exec_runtime
 
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
-	/* cumulative counters */
-	unsigned long pcount;	      /* # of times run on this cpu */
-	unsigned long long run_delay; /* time spent waiting on a runqueue */
+	/* Cumulative counters: */
+
+	/* # of times we have run on this CPU: */
+	unsigned long			pcount;
+
+	/* Time spent waiting on a runqueue: */
+	unsigned long long		run_delay;
+
+	/* Timestamps: */
+
+	/* When did we last run on a CPU? */
+	unsigned long long		last_arrival;
+
+	/* When were we last queued to run? */
+	unsigned long long		last_queued;
 
-	/* timestamps */
-	unsigned long long last_arrival,/* when we last ran on a cpu */
-			   last_queued;	/* when we were last queued to run */
 #endif /* CONFIG_SCHED_INFO */
 };
 
@@ -243,12 +251,12 @@ struct sched_info {
  * We define a basic fixed point arithmetic range, and then formalize
  * all these metrics based on that basic range.
  */
-# define SCHED_FIXEDPOINT_SHIFT	10
-# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
+# define SCHED_FIXEDPOINT_SHIFT		10
+# define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)
 
 struct load_weight {
-	unsigned long weight;
-	u32 inv_weight;
+	unsigned long			weight;
+	u32				inv_weight;
 };
 
 /*
@@ -304,69 +312,73 @@ struct load_weight {
  * issues.
  */
 struct sched_avg {
-	u64 last_update_time, load_sum;
-	u32 util_sum, period_contrib;
-	unsigned long load_avg, util_avg;
+	u64				last_update_time;
+	u64				load_sum;
+	u32				util_sum;
+	u32				period_contrib;
+	unsigned long			load_avg;
+	unsigned long			util_avg;
 };
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
-	u64			wait_start;
-	u64			wait_max;
-	u64			wait_count;
-	u64			wait_sum;
-	u64			iowait_count;
-	u64			iowait_sum;
-
-	u64			sleep_start;
-	u64			sleep_max;
-	s64			sum_sleep_runtime;
-
-	u64			block_start;
-	u64			block_max;
-	u64			exec_max;
-	u64			slice_max;
-
-	u64			nr_migrations_cold;
-	u64			nr_failed_migrations_affine;
-	u64			nr_failed_migrations_running;
-	u64			nr_failed_migrations_hot;
-	u64			nr_forced_migrations;
-
-	u64			nr_wakeups;
-	u64			nr_wakeups_sync;
-	u64			nr_wakeups_migrate;
-	u64			nr_wakeups_local;
-	u64			nr_wakeups_remote;
-	u64			nr_wakeups_affine;
-	u64			nr_wakeups_affine_attempts;
-	u64			nr_wakeups_passive;
-	u64			nr_wakeups_idle;
+	u64				wait_start;
+	u64				wait_max;
+	u64				wait_count;
+	u64				wait_sum;
+	u64				iowait_count;
+	u64				iowait_sum;
+
+	u64				sleep_start;
+	u64				sleep_max;
+	s64				sum_sleep_runtime;
+
+	u64				block_start;
+	u64				block_max;
+	u64				exec_max;
+	u64				slice_max;
+
+	u64				nr_migrations_cold;
+	u64				nr_failed_migrations_affine;
+	u64				nr_failed_migrations_running;
+	u64				nr_failed_migrations_hot;
+	u64				nr_forced_migrations;
+
+	u64				nr_wakeups;
+	u64				nr_wakeups_sync;
+	u64				nr_wakeups_migrate;
+	u64				nr_wakeups_local;
+	u64				nr_wakeups_remote;
+	u64				nr_wakeups_affine;
+	u64				nr_wakeups_affine_attempts;
+	u64				nr_wakeups_passive;
+	u64				nr_wakeups_idle;
 #endif
 };
 
 struct sched_entity {
-	struct load_weight	load;		/* for load-balancing */
-	struct rb_node		run_node;
-	struct list_head	group_node;
-	unsigned int		on_rq;
+	/* For load-balancing: */
+	struct load_weight		load;
+	struct rb_node			run_node;
+	struct list_head		group_node;
+	unsigned int			on_rq;
 
-	u64			exec_start;
-	u64			sum_exec_runtime;
-	u64			vruntime;
-	u64			prev_sum_exec_runtime;
+	u64				exec_start;
+	u64				sum_exec_runtime;
+	u64				vruntime;
+	u64				prev_sum_exec_runtime;
 
-	u64			nr_migrations;
+	u64				nr_migrations;
 
-	struct sched_statistics statistics;
+	struct sched_statistics		statistics;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	int			depth;
-	struct sched_entity	*parent;
+	int				depth;
+	struct sched_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq		*cfs_rq;
+	struct cfs_rq			*cfs_rq;
 	/* rq "owned" by this entity/group: */
-	struct cfs_rq		*my_q;
+	struct cfs_rq			*my_q;
 #endif
 
 #ifdef CONFIG_SMP
@@ -376,49 +388,49 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg	avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg ____cacheline_aligned_in_smp;
 #endif
 };
 
 struct sched_rt_entity {
-	struct list_head run_list;
-	unsigned long timeout;
-	unsigned long watchdog_stamp;
-	unsigned int time_slice;
-	unsigned short on_rq;
-	unsigned short on_list;
-
-	struct sched_rt_entity *back;
+	struct list_head		run_list;
+	unsigned long			timeout;
+	unsigned long			watchdog_stamp;
+	unsigned int			time_slice;
+	unsigned short			on_rq;
+	unsigned short			on_list;
+
+	struct sched_rt_entity		*back;
 #ifdef CONFIG_RT_GROUP_SCHED
-	struct sched_rt_entity	*parent;
+	struct sched_rt_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct rt_rq		*rt_rq;
+	struct rt_rq			*rt_rq;
 	/* rq "owned" by this entity/group: */
-	struct rt_rq		*my_q;
+	struct rt_rq			*my_q;
 #endif
 };
 
 struct sched_dl_entity {
-	struct rb_node	rb_node;
+	struct rb_node			rb_node;
 
 	/*
 	 * Original scheduling parameters. Copied here from sched_attr
 	 * during sched_setattr(), they will remain the same until
 	 * the next sched_setattr().
 	 */
-	u64 dl_runtime;		/* maximum runtime for each instance	*/
-	u64 dl_deadline;	/* relative deadline of each instance	*/
-	u64 dl_period;		/* separation of two instances (period) */
-	u64 dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_runtime;	/* Maximum runtime for each instance	*/
+	u64				dl_deadline;	/* Relative deadline of each instance	*/
+	u64				dl_period;	/* Separation of two instances (period) */
+	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
 	 * they are continously updated during task execution. Note that
 	 * the remaining runtime could be < 0 in case we are in overrun.
 	 */
-	s64 runtime;		/* remaining runtime for this instance	*/
-	u64 deadline;		/* absolute deadline for this instance	*/
-	unsigned int flags;	/* specifying the scheduler behaviour	*/
+	s64				runtime;	/* Remaining runtime for this instance	*/
+	u64				deadline;	/* Absolute deadline for this instance	*/
+	unsigned int			flags;		/* Specifying the scheduler behaviour	*/
 
 	/*
 	 * Some bool flags:
@@ -431,24 +443,28 @@ struct sched_dl_entity {
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
 	 *
-	 * @dl_yielded tells if task gave up the cpu before consuming
+	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_boosted, dl_yielded;
+	int				dl_throttled;
+	int				dl_boosted;
+	int				dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
-	struct hrtimer dl_timer;
+	struct hrtimer			dl_timer;
 };
 
 union rcu_special {
 	struct {
-		u8 blocked;
-		u8 need_qs;
-		u8 exp_need_qs;
-		u8 pad;	/* Otherwise the compiler can store garbage here. */
+		u8			blocked;
+		u8			need_qs;
+		u8			exp_need_qs;
+
+		/* Otherwise the compiler can store garbage here: */
+		u8			pad;
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
@@ -470,361 +486,417 @@ struct task_struct {
 	 * For reasons of header soup (see current_thread_info()), this
 	 * must be the first element of task_struct.
 	 */
-	struct thread_info thread_info;
+	struct thread_info		thread_info;
 #endif
-	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
-	void *stack;
-	atomic_t usage;
-	unsigned int flags;	/* per process flags, defined below */
-	unsigned int ptrace;
+	/* -1 unrunnable, 0 runnable, >0 stopped: */
+	volatile long			state;
+	void				*stack;
+	atomic_t			usage;
+	/* Per task flags (PF_*), defined further below: */
+	unsigned int			flags;
+	unsigned int			ptrace;
 
 #ifdef CONFIG_SMP
-	struct llist_node wake_entry;
-	int on_cpu;
+	struct llist_node		wake_entry;
+	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	unsigned int cpu;	/* current CPU */
+	/* Current CPU: */
+	unsigned int			cpu;
 #endif
-	unsigned int wakee_flips;
-	unsigned long wakee_flip_decay_ts;
-	struct task_struct *last_wakee;
+	unsigned int			wakee_flips;
+	unsigned long			wakee_flip_decay_ts;
+	struct task_struct		*last_wakee;
 
-	int wake_cpu;
+	int				wake_cpu;
 #endif
-	int on_rq;
+	int				on_rq;
+
+	int				prio;
+	int				static_prio;
+	int				normal_prio;
+	unsigned int			rt_priority;
 
-	int prio, static_prio, normal_prio;
-	unsigned int rt_priority;
-	const struct sched_class *sched_class;
-	struct sched_entity se;
-	struct sched_rt_entity rt;
+	const struct sched_class	*sched_class;
+	struct sched_entity		se;
+	struct sched_rt_entity		rt;
 #ifdef CONFIG_CGROUP_SCHED
-	struct task_group *sched_task_group;
+	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity dl;
+	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
-	/* list of struct preempt_notifier: */
-	struct hlist_head preempt_notifiers;
+	/* List of struct preempt_notifier: */
+	struct hlist_head		preempt_notifiers;
 #endif
 
 #ifdef CONFIG_BLK_DEV_IO_TRACE
-	unsigned int btrace_seq;
+	unsigned int			btrace_seq;
 #endif
 
-	unsigned int policy;
-	int nr_cpus_allowed;
-	cpumask_t cpus_allowed;
+	unsigned int			policy;
+	int				nr_cpus_allowed;
+	cpumask_t			cpus_allowed;
 
 #ifdef CONFIG_PREEMPT_RCU
-	int rcu_read_lock_nesting;
-	union rcu_special rcu_read_unlock_special;
-	struct list_head rcu_node_entry;
-	struct rcu_node *rcu_blocked_node;
+	int				rcu_read_lock_nesting;
+	union rcu_special		rcu_read_unlock_special;
+	struct list_head		rcu_node_entry;
+	struct rcu_node			*rcu_blocked_node;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+
 #ifdef CONFIG_TASKS_RCU
-	unsigned long rcu_tasks_nvcsw;
-	bool rcu_tasks_holdout;
-	struct list_head rcu_tasks_holdout_list;
-	int rcu_tasks_idle_cpu;
+	unsigned long			rcu_tasks_nvcsw;
+	bool				rcu_tasks_holdout;
+	struct list_head		rcu_tasks_holdout_list;
+	int				rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-	struct sched_info sched_info;
+	struct sched_info		sched_info;
 
-	struct list_head tasks;
+	struct list_head		tasks;
 #ifdef CONFIG_SMP
-	struct plist_node pushable_tasks;
-	struct rb_node pushable_dl_tasks;
+	struct plist_node		pushable_tasks;
+	struct rb_node			pushable_dl_tasks;
 #endif
 
-	struct mm_struct *mm, *active_mm;
+	struct mm_struct		*mm;
+	struct mm_struct		*active_mm;
 
 	/* Per-thread vma caching: */
-	struct vmacache vmacache;
-
-#if defined(SPLIT_RSS_COUNTING)
-	struct task_rss_stat	rss_stat;
-#endif
-/* task state */
-	int exit_state;
-	int exit_code, exit_signal;
-	int pdeath_signal;  /*  The signal sent when the parent dies  */
-	unsigned long jobctl;	/* JOBCTL_*, siglock protected */
-
-	/* Used for emulating ABI behavior of previous Linux versions */
-	unsigned int personality;
-
-	/* scheduler bits, serialized by scheduler locks */
-	unsigned sched_reset_on_fork:1;
-	unsigned sched_contributes_to_load:1;
-	unsigned sched_migrated:1;
-	unsigned sched_remote_wakeup:1;
-	unsigned :0; /* force alignment to the next boundary */
-
-	/* unserialized, strictly 'current' */
-	unsigned in_execve:1; /* bit to tell LSMs we're in execve */
-	unsigned in_iowait:1;
-#if !defined(TIF_RESTORE_SIGMASK)
-	unsigned restore_sigmask:1;
+	struct vmacache			vmacache;
+
+#ifdef SPLIT_RSS_COUNTING
+	struct task_rss_stat		rss_stat;
+#endif
+	int				exit_state;
+	int				exit_code;
+	int				exit_signal;
+	/* The signal sent when the parent dies: */
+	int				pdeath_signal;
+	/* JOBCTL_*, siglock protected: */
+	unsigned long			jobctl;
+
+	/* Used for emulating ABI behavior of previous Linux versions: */
+	unsigned int			personality;
+
+	/* Scheduler bits, serialized by scheduler locks: */
+	unsigned			sched_reset_on_fork:1;
+	unsigned			sched_contributes_to_load:1;
+	unsigned			sched_migrated:1;
+	unsigned			sched_remote_wakeup:1;
+	/* Force alignment to the next boundary: */
+	unsigned			:0;
+
+	/* Unserialized, strictly 'current' */
+
+	/* Bit to tell LSMs we're in execve(): */
+	unsigned			in_execve:1;
+	unsigned			in_iowait:1;
+#ifndef TIF_RESTORE_SIGMASK
+	unsigned			restore_sigmask:1;
 #endif
 #ifdef CONFIG_MEMCG
-	unsigned memcg_may_oom:1;
+	unsigned			memcg_may_oom:1;
 #ifndef CONFIG_SLOB
-	unsigned memcg_kmem_skip_account:1;
+	unsigned			memcg_kmem_skip_account:1;
 #endif
 #endif
 #ifdef CONFIG_COMPAT_BRK
-	unsigned brk_randomized:1;
+	unsigned			brk_randomized:1;
 #endif
 
-	unsigned long atomic_flags; /* Flags needing atomic access. */
+	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
-	struct restart_block restart_block;
+	struct restart_block		restart_block;
 
-	pid_t pid;
-	pid_t tgid;
+	pid_t				pid;
+	pid_t				tgid;
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	/* Canary value for the -fstack-protector gcc feature */
-	unsigned long stack_canary;
+	/* Canary value for the -fstack-protector GCC feature: */
+	unsigned long			stack_canary;
 #endif
 	/*
-	 * pointers to (original) parent process, youngest child, younger sibling,
+	 * Pointers to the (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with
 	 * p->real_parent->pid)
 	 */
-	struct task_struct __rcu *real_parent; /* real parent process */
-	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
+
+	/* Real parent process: */
+	struct task_struct __rcu	*real_parent;
+
+	/* Recipient of SIGCHLD, wait4() reports: */
+	struct task_struct __rcu	*parent;
+
 	/*
-	 * children/sibling forms the list of my natural children
+	 * Children/sibling form the list of natural children:
 	 */
-	struct list_head children;	/* list of my children */
-	struct list_head sibling;	/* linkage in my parent's children list */
-	struct task_struct *group_leader;	/* threadgroup leader */
+	struct list_head		children;
+	struct list_head		sibling;
+	struct task_struct		*group_leader;
 
 	/*
-	 * ptraced is the list of tasks this task is using ptrace on.
+	 * 'ptraced' is the list of tasks this task is using ptrace() on.
+	 *
 	 * This includes both natural children and PTRACE_ATTACH targets.
-	 * p->ptrace_entry is p's link on the p->parent->ptraced list.
+	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
 	 */
-	struct list_head ptraced;
-	struct list_head ptrace_entry;
+	struct list_head		ptraced;
+	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link pids[PIDTYPE_MAX];
-	struct list_head thread_group;
-	struct list_head thread_node;
+	struct pid_link			pids[PIDTYPE_MAX];
+	struct list_head		thread_group;
+	struct list_head		thread_node;
+
+	struct completion		*vfork_done;
 
-	struct completion *vfork_done;		/* for vfork() */
-	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
-	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+	/* CLONE_CHILD_SETTID: */
+	int __user			*set_child_tid;
 
-	u64 utime, stime;
+	/* CLONE_CHILD_CLEARTID: */
+	int __user			*clear_child_tid;
+
+	u64				utime;
+	u64				stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-	u64 utimescaled, stimescaled;
+	u64				utimescaled;
+	u64				stimescaled;
 #endif
-	u64 gtime;
-	struct prev_cputime prev_cputime;
+	u64				gtime;
+	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_t vtime_seqcount;
-	unsigned long long vtime_snap;
+	seqcount_t			vtime_seqcount;
+	unsigned long long		vtime_snap;
 	enum {
-		/* Task is sleeping or running in a CPU with VTIME inactive */
+		/* Task is sleeping or running in a CPU with VTIME inactive: */
 		VTIME_INACTIVE = 0,
-		/* Task runs in userspace in a CPU with VTIME active */
+		/* Task runs in userspace in a CPU with VTIME active: */
 		VTIME_USER,
-		/* Task runs in kernelspace in a CPU with VTIME active */
+		/* Task runs in kernelspace in a CPU with VTIME active: */
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
+	atomic_t			tick_dep_mask;
 #endif
-	unsigned long nvcsw, nivcsw; /* context switch counts */
-	u64 start_time;		/* monotonic time in nsec */
-	u64 real_start_time;	/* boot based time in nsec */
-/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
-	unsigned long min_flt, maj_flt;
+	/* Context switch counts: */
+	unsigned long			nvcsw;
+	unsigned long			nivcsw;
+
+	/* Monotonic time in nsecs: */
+	u64				start_time;
+
+	/* Boot based time in nsecs: */
+	u64				real_start_time;
+
+	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
+	unsigned long			min_flt;
+	unsigned long			maj_flt;
 
 #ifdef CONFIG_POSIX_TIMERS
-	struct task_cputime cputime_expires;
-	struct list_head cpu_timers[3];
-#endif
-
-/* process credentials */
-	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
-	const struct cred __rcu *real_cred; /* objective and real subjective task
-					 * credentials (COW) */
-	const struct cred __rcu *cred;	/* effective (overridable) subjective task
-					 * credentials (COW) */
-	char comm[TASK_COMM_LEN]; /* executable name excluding path
-				     - access with [gs]et_task_comm (which lock
-				       it with task_lock())
-				     - initialized normally by setup_new_exec */
-/* file system info */
-	struct nameidata *nameidata;
+	struct task_cputime		cputime_expires;
+	struct list_head		cpu_timers[3];
+#endif
+
+	/* Process credentials: */
+
+	/* Tracer's credentials at attach: */
+	const struct cred __rcu		*ptracer_cred;
+
+	/* Objective and real subjective task credentials (COW): */
+	const struct cred __rcu		*real_cred;
+
+	/* Effective (overridable) subjective task credentials (COW): */
+	const struct cred __rcu		*cred;
+
+	/*
+	 * executable name, excluding path.
+	 *
+	 * - normally initialized setup_new_exec()
+	 * - access it with [gs]et_task_comm()
+	 * - lock it with task_lock()
+	 */
+	char				comm[TASK_COMM_LEN];
+
+	struct nameidata		*nameidata;
+
 #ifdef CONFIG_SYSVIPC
-/* ipc stuff */
-	struct sysv_sem sysvsem;
-	struct sysv_shm sysvshm;
+	struct sysv_sem			sysvsem;
+	struct sysv_shm			sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
-/* hung task detection */
-	unsigned long last_switch_count;
-#endif
-/* filesystem information */
-	struct fs_struct *fs;
-/* open file information */
-	struct files_struct *files;
-/* namespaces */
-	struct nsproxy *nsproxy;
-/* signal handlers */
-	struct signal_struct *signal;
-	struct sighand_struct *sighand;
-
-	sigset_t blocked, real_blocked;
-	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
-	struct sigpending pending;
-
-	unsigned long sas_ss_sp;
-	size_t sas_ss_size;
-	unsigned sas_ss_flags;
-
-	struct callback_head *task_works;
-
-	struct audit_context *audit_context;
+	unsigned long			last_switch_count;
+#endif
+	/* Filesystem information: */
+	struct fs_struct		*fs;
+
+	/* Open file information: */
+	struct files_struct		*files;
+
+	/* Namespaces: */
+	struct nsproxy			*nsproxy;
+
+	/* Signal handlers: */
+	struct signal_struct		*signal;
+	struct sighand_struct		*sighand;
+	sigset_t			blocked;
+	sigset_t			real_blocked;
+	/* Restored if set_restore_sigmask() was used: */
+	sigset_t			saved_sigmask;
+	struct sigpending		pending;
+	unsigned long			sas_ss_sp;
+	size_t				sas_ss_size;
+	unsigned int			sas_ss_flags;
+
+	struct callback_head		*task_works;
+
+	struct audit_context		*audit_context;
 #ifdef CONFIG_AUDITSYSCALL
-	kuid_t loginuid;
-	unsigned int sessionid;
+	kuid_t				loginuid;
+	unsigned int			sessionid;
 #endif
-	struct seccomp seccomp;
+	struct seccomp			seccomp;
 
-/* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
-/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
- * mempolicy */
-	spinlock_t alloc_lock;
+	/* Thread group tracking: */
+	u32				parent_exec_id;
+	u32				self_exec_id;
+
+	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
+	spinlock_t			alloc_lock;
 
 	/* Protection of the PI data structures: */
-	raw_spinlock_t pi_lock;
+	raw_spinlock_t			pi_lock;
 
-	struct wake_q_node wake_q;
+	struct wake_q_node		wake_q;
 
 #ifdef CONFIG_RT_MUTEXES
-	/* PI waiters blocked on a rt_mutex held by this task */
-	struct rb_root pi_waiters;
-	struct rb_node *pi_waiters_leftmost;
-	/* Deadlock detection and priority inheritance handling */
-	struct rt_mutex_waiter *pi_blocked_on;
+	/* PI waiters blocked on a rt_mutex held by this task: */
+	struct rb_root			pi_waiters;
+	struct rb_node			*pi_waiters_leftmost;
+	/* Deadlock detection and priority inheritance handling: */
+	struct rt_mutex_waiter		*pi_blocked_on;
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
-	/* mutex deadlock detection */
-	struct mutex_waiter *blocked_on;
+	/* Mutex deadlock detection: */
+	struct mutex_waiter		*blocked_on;
 #endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
-	unsigned int irq_events;
-	unsigned long hardirq_enable_ip;
-	unsigned long hardirq_disable_ip;
-	unsigned int hardirq_enable_event;
-	unsigned int hardirq_disable_event;
-	int hardirqs_enabled;
-	int hardirq_context;
-	unsigned long softirq_disable_ip;
-	unsigned long softirq_enable_ip;
-	unsigned int softirq_disable_event;
-	unsigned int softirq_enable_event;
-	int softirqs_enabled;
-	int softirq_context;
+	unsigned int			irq_events;
+	unsigned long			hardirq_enable_ip;
+	unsigned long			hardirq_disable_ip;
+	unsigned int			hardirq_enable_event;
+	unsigned int			hardirq_disable_event;
+	int				hardirqs_enabled;
+	int				hardirq_context;
+	unsigned long			softirq_disable_ip;
+	unsigned long			softirq_enable_ip;
+	unsigned int			softirq_disable_event;
+	unsigned int			softirq_enable_event;
+	int				softirqs_enabled;
+	int				softirq_context;
 #endif
+
 #ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 48UL
-	u64 curr_chain_key;
-	int lockdep_depth;
-	unsigned int lockdep_recursion;
-	struct held_lock held_locks[MAX_LOCK_DEPTH];
-	gfp_t lockdep_reclaim_gfp;
+# define MAX_LOCK_DEPTH			48UL
+	u64				curr_chain_key;
+	int				lockdep_depth;
+	unsigned int			lockdep_recursion;
+	struct held_lock		held_locks[MAX_LOCK_DEPTH];
+	gfp_t				lockdep_reclaim_gfp;
 #endif
+
 #ifdef CONFIG_UBSAN
-	unsigned int in_ubsan;
+	unsigned int			in_ubsan;
 #endif
 
-/* journalling filesystem info */
-	void *journal_info;
+	/* Journalling filesystem info: */
+	void				*journal_info;
 
-/* stacked block device info */
-	struct bio_list *bio_list;
+	/* Stacked block device info: */
+	struct bio_list			*bio_list;
 
 #ifdef CONFIG_BLOCK
-/* stack plugging */
-	struct blk_plug *plug;
+	/* Stack plugging: */
+	struct blk_plug			*plug;
 #endif
 
-/* VM state */
-	struct reclaim_state *reclaim_state;
+	/* VM state: */
+	struct reclaim_state		*reclaim_state;
+
+	struct backing_dev_info		*backing_dev_info;
 
-	struct backing_dev_info *backing_dev_info;
+	struct io_context		*io_context;
 
-	struct io_context *io_context;
+	/* Ptrace state: */
+	unsigned long			ptrace_message;
+	siginfo_t			*last_siginfo;
 
-	unsigned long ptrace_message;
-	siginfo_t *last_siginfo; /* For ptrace use.  */
-	struct task_io_accounting ioac;
-#if defined(CONFIG_TASK_XACCT)
-	u64 acct_rss_mem1;	/* accumulated rss usage */
-	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	u64 acct_timexpd;	/* stime + utime since last update */
+	struct task_io_accounting	ioac;
+#ifdef CONFIG_TASK_XACCT
+	/* Accumulated RSS usage: */
+	u64				acct_rss_mem1;
+	/* Accumulated virtual memory usage: */
+	u64				acct_vm_mem1;
+	/* stime + utime since last update: */
+	u64				acct_timexpd;
 #endif
 #ifdef CONFIG_CPUSETS
-	nodemask_t mems_allowed;	/* Protected by alloc_lock */
-	seqcount_t mems_allowed_seq;	/* Seqence no to catch updates */
-	int cpuset_mem_spread_rotor;
-	int cpuset_slab_spread_rotor;
+	/* Protected by ->alloc_lock: */
+	nodemask_t			mems_allowed;
+	/* Seqence number to catch updates: */
+	seqcount_t			mems_allowed_seq;
+	int				cpuset_mem_spread_rotor;
+	int				cpuset_slab_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
-	/* Control Group info protected by css_set_lock */
-	struct css_set __rcu *cgroups;
-	/* cg_list protected by css_set_lock and tsk->alloc_lock */
-	struct list_head cg_list;
+	/* Control Group info protected by css_set_lock: */
+	struct css_set __rcu		*cgroups;
+	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
+	struct list_head		cg_list;
 #endif
 #ifdef CONFIG_INTEL_RDT_A
-	int closid;
+	int				closid;
 #endif
 #ifdef CONFIG_FUTEX
-	struct robust_list_head __user *robust_list;
+	struct robust_list_head __user	*robust_list;
 #ifdef CONFIG_COMPAT
 	struct compat_robust_list_head __user *compat_robust_list;
 #endif
-	struct list_head pi_state_list;
-	struct futex_pi_state *pi_state_cache;
+	struct list_head		pi_state_list;
+	struct futex_pi_state		*pi_state_cache;
 #endif
 #ifdef CONFIG_PERF_EVENTS
-	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
-	struct mutex perf_event_mutex;
-	struct list_head perf_event_list;
+	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
+	struct mutex			perf_event_mutex;
+	struct list_head		perf_event_list;
 #endif
 #ifdef CONFIG_DEBUG_PREEMPT
-	unsigned long preempt_disable_ip;
+	unsigned long			preempt_disable_ip;
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
-	short il_next;
-	short pref_node_fork;
+	/* Protected by alloc_lock: */
+	struct mempolicy		*mempolicy;
+	short				il_next;
+	short				pref_node_fork;
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	int numa_scan_seq;
-	unsigned int numa_scan_period;
-	unsigned int numa_scan_period_max;
-	int numa_preferred_nid;
-	unsigned long numa_migrate_retry;
-	u64 node_stamp;			/* migration stamp  */
-	u64 last_task_numa_placement;
-	u64 last_sum_exec_runtime;
-	struct callback_head numa_work;
-
-	struct list_head numa_entry;
-	struct numa_group *numa_group;
+	int				numa_scan_seq;
+	unsigned int			numa_scan_period;
+	unsigned int			numa_scan_period_max;
+	int				numa_preferred_nid;
+	unsigned long			numa_migrate_retry;
+	/* Migration stamp: */
+	u64				node_stamp;
+	u64				last_task_numa_placement;
+	u64				last_sum_exec_runtime;
+	struct callback_head		numa_work;
+
+	struct list_head		numa_entry;
+	struct numa_group		*numa_group;
 
 	/*
 	 * numa_faults is an array split into four regions:
@@ -840,8 +912,8 @@ struct task_struct {
 	 * during the current scan window. When the scan completes, the counts
 	 * in faults_memory and faults_cpu decay and these values are copied.
 	 */
-	unsigned long *numa_faults;
-	unsigned long total_numa_faults;
+	unsigned long			*numa_faults;
+	unsigned long			total_numa_faults;
 
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
@@ -849,119 +921,132 @@ struct task_struct {
 	 * period is adapted based on the locality of the faults with different
 	 * weights depending on whether they were shared or private faults
 	 */
-	unsigned long numa_faults_locality[3];
+	unsigned long			numa_faults_locality[3];
 
-	unsigned long numa_pages_migrated;
+	unsigned long			numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-	struct tlbflush_unmap_batch tlb_ubc;
+	struct tlbflush_unmap_batch	tlb_ubc;
 
-	struct rcu_head rcu;
+	struct rcu_head			rcu;
 
-	/*
-	 * cache last used pipe for splice
-	 */
-	struct pipe_inode_info *splice_pipe;
+	/* Cache last used pipe for splice(): */
+	struct pipe_inode_info		*splice_pipe;
 
-	struct page_frag task_frag;
+	struct page_frag		task_frag;
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info		*delays;
 #endif
 
 #ifdef CONFIG_FAULT_INJECTION
-	int make_it_fail;
+	int				make_it_fail;
 #endif
 	/*
-	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
-	 * balance_dirty_pages() for some dirty throttling pause
+	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
+	 * balance_dirty_pages() for a dirty throttling pause:
 	 */
-	int nr_dirtied;
-	int nr_dirtied_pause;
-	unsigned long dirty_paused_when; /* start of a write-and-pause period */
+	int				nr_dirtied;
+	int				nr_dirtied_pause;
+	/* Start of a write-and-pause period: */
+	unsigned long			dirty_paused_when;
 
 #ifdef CONFIG_LATENCYTOP
-	int latency_record_count;
-	struct latency_record latency_record[LT_SAVECOUNT];
+	int				latency_record_count;
+	struct latency_record		latency_record[LT_SAVECOUNT];
 #endif
 	/*
-	 * time slack values; these are used to round up poll() and
+	 * Time slack values; these are used to round up poll() and
 	 * select() etc timeout values. These are in nanoseconds.
 	 */
-	u64 timer_slack_ns;
-	u64 default_timer_slack_ns;
+	u64				timer_slack_ns;
+	u64				default_timer_slack_ns;
 
 #ifdef CONFIG_KASAN
-	unsigned int kasan_depth;
+	unsigned int			kasan_depth;
 #endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Index of current stored address in ret_stack */
-	int curr_ret_stack;
-	/* Stack of return addresses for return function tracing */
-	struct ftrace_ret_stack	*ret_stack;
-	/* time stamp for last schedule */
-	unsigned long long ftrace_timestamp;
+	/* Index of current stored address in ret_stack: */
+	int				curr_ret_stack;
+
+	/* Stack of return addresses for return function tracing: */
+	struct ftrace_ret_stack		*ret_stack;
+
+	/* Timestamp for last schedule: */
+	unsigned long long		ftrace_timestamp;
+
 	/*
 	 * Number of functions that haven't been traced
-	 * because of depth overrun.
+	 * because of depth overrun:
 	 */
-	atomic_t trace_overrun;
-	/* Pause for the tracing */
-	atomic_t tracing_graph_pause;
+	atomic_t			trace_overrun;
+
+	/* Pause tracing: */
+	atomic_t			tracing_graph_pause;
 #endif
+
 #ifdef CONFIG_TRACING
-	/* state flags for use by tracers */
-	unsigned long trace;
-	/* bitmask and counter of trace recursion */
-	unsigned long trace_recursion;
+	/* State flags for use by tracers: */
+	unsigned long			trace;
+
+	/* Bitmask and counter of trace recursion: */
+	unsigned long			trace_recursion;
 #endif /* CONFIG_TRACING */
+
 #ifdef CONFIG_KCOV
-	/* Coverage collection mode enabled for this task (0 if disabled). */
-	enum kcov_mode kcov_mode;
-	/* Size of the kcov_area. */
-	unsigned	kcov_size;
-	/* Buffer for coverage collection. */
-	void		*kcov_area;
-	/* kcov desciptor wired with this task or NULL. */
-	struct kcov	*kcov;
+	/* Coverage collection mode enabled for this task (0 if disabled): */
+	enum kcov_mode			kcov_mode;
+
+	/* Size of the kcov_area: */
+	unsigned int			kcov_size;
+
+	/* Buffer for coverage collection: */
+	void				*kcov_area;
+
+	/* KCOV descriptor wired with this task or NULL: */
+	struct kcov			*kcov;
 #endif
+
 #ifdef CONFIG_MEMCG
-	struct mem_cgroup *memcg_in_oom;
-	gfp_t memcg_oom_gfp_mask;
-	int memcg_oom_order;
+	struct mem_cgroup		*memcg_in_oom;
+	gfp_t				memcg_oom_gfp_mask;
+	int				memcg_oom_order;
 
-	/* number of pages to reclaim on returning to userland */
-	unsigned int memcg_nr_pages_over_high;
+	/* Number of pages to reclaim on returning to userland: */
+	unsigned int			memcg_nr_pages_over_high;
 #endif
+
 #ifdef CONFIG_UPROBES
-	struct uprobe_task *utask;
+	struct uprobe_task		*utask;
 #endif
 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
-	unsigned int	sequential_io;
-	unsigned int	sequential_io_avg;
+	unsigned int			sequential_io;
+	unsigned int			sequential_io_avg;
 #endif
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-	unsigned long	task_state_change;
+	unsigned long			task_state_change;
 #endif
-	int pagefault_disabled;
+	int				pagefault_disabled;
 #ifdef CONFIG_MMU
-	struct task_struct *oom_reaper_list;
+	struct task_struct		*oom_reaper_list;
 #endif
 #ifdef CONFIG_VMAP_STACK
-	struct vm_struct *stack_vm_area;
+	struct vm_struct		*stack_vm_area;
 #endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	/* A live task holds one reference. */
-	atomic_t stack_refcount;
+	/* A live task holds one reference: */
+	atomic_t			stack_refcount;
 #endif
-/* CPU-specific state of this task */
-	struct thread_struct thread;
-/*
- * WARNING: on x86, 'thread_struct' contains a variable-sized
- * structure.  It *MUST* be at the end of 'task_struct'.
- *
- * Do not put anything below here!
- */
+	/* CPU-specific state of this task: */
+	struct thread_struct		thread;
+
+	/*
+	 * WARNING: on x86, 'thread_struct' contains a variable-sized
+	 * structure.  It *MUST* be at the end of 'task_struct'.
+	 *
+	 * Do not put anything below here!
+	 */
 };
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -975,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task)
 }
 
 /*
- * Without tasklist or rcu lock it is not safe to dereference
+ * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
  * we can race with another thread doing sys_setsid/sys_setpgid.
  */
@@ -1002,16 +1087,14 @@ static inline struct pid *task_session(struct task_struct *task)
  *
  * see also pid_nr() etc in include/linux/pid.h
  */
-pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
-			struct pid_namespace *ns);
+pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
 
 static inline pid_t task_pid_nr(struct task_struct *tsk)
 {
 	return tsk->pid;
 }
 
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
 }
@@ -1027,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
 	return tsk->tgid;
 }
 
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
 	return pid_vnr(task_tgid(tsk));
 }
 
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
+ */
+static inline int pid_alive(const struct task_struct *p)
+{
+	return p->pids[PIDTYPE_PID].pid != NULL;
+}
 
-static inline int pid_alive(const struct task_struct *p);
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
 {
 	pid_t pid = 0;
@@ -1053,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk)
 	return task_ppid_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
 }
@@ -1065,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
 }
 
 
-static inline pid_t task_session_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
 }
@@ -1076,27 +1170,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
 }
 
-/* obsolete, do not use */
+/* Obsolete, do not use: */
 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 {
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
- *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
- *
- * Return: 1 if the process is alive. 0 otherwise.
- */
-static inline int pid_alive(const struct task_struct *p)
-{
-	return p->pids[PIDTYPE_PID].pid != NULL;
-}
-
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
@@ -1116,34 +1195,34 @@ extern struct pid *cad_pid;
 /*
  * Per process flags
  */
-#define PF_IDLE		0x00000002	/* I am an IDLE thread */
-#define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
-#define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
-#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
-#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
-#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
-#define PF_DUMPCORE	0x00000200	/* dumped core */
-#define PF_SIGNALED	0x00000400	/* killed by a signal */
-#define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
-#define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
-#define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
-#define PF_FROZEN	0x00010000	/* frozen for system suspend */
-#define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
-#define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
-#define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
-#define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
-#define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
-#define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
-#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
-#define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
-#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
-#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
+#define PF_IDLE			0x00000002	/* I am an IDLE thread */
+#define PF_EXITING		0x00000004	/* Getting shut down */
+#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
+#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
+#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
+#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
+#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
+#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
+#define PF_DUMPCORE		0x00000200	/* Dumped core */
+#define PF_SIGNALED		0x00000400	/* Killed by a signal */
+#define PF_MEMALLOC		0x00000800	/* Allocating memory */
+#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
+#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
+#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
+#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
+#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
+#define PF_FSTRANS		0x00020000	/* Inside a filesystem transaction */
+#define PF_KSWAPD		0x00040000	/* I am kswapd */
+#define PF_MEMALLOC_NOIO	0x00080000	/* Allocating memory without IO involved */
+#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
+#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
+#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
+#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
+#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
+#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
+#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
@@ -1156,33 +1235,38 @@ extern struct pid *cad_pid;
  * child is not running and in turn not changing child->flags
  * at the same time the parent does it.
  */
-#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
-#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
-#define clear_used_math() clear_stopped_child_used_math(current)
-#define set_used_math() set_stopped_child_used_math(current)
+#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
+#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
+#define clear_used_math()			clear_stopped_child_used_math(current)
+#define set_used_math()				set_stopped_child_used_math(current)
+
 #define conditional_stopped_child_used_math(condition, child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
-#define conditional_used_math(condition) \
-	conditional_stopped_child_used_math(condition, current)
+
+#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)
+
 #define copy_to_stopped_child_used_math(child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
+
 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
-#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
-#define used_math() tsk_used_math(current)
+#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
+#define used_math()				tsk_used_math(current)
 
 /* Per-process atomic flags. */
-#define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
-#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
-#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
-#define PFA_LMK_WAITING  3      /* Lowmemorykiller is waiting */
+#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
+#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
+#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
+#define PFA_LMK_WAITING			3	/* Lowmemorykiller is waiting */
 
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
 	{ return test_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_SET(name, func)					\
 	static inline void task_set_##func(struct task_struct *p)	\
 	{ set_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_CLEAR(name, func)					\
 	static inline void task_clear_##func(struct task_struct *p)	\
 	{ clear_bit(PFA_##name, &p->atomic_flags); }
@@ -1201,30 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void tsk_restore_flags(struct task_struct *task,
-				unsigned long orig_flags, unsigned long flags)
+static inline void
+tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags)
 {
 	task->flags &= ~flags;
 	task->flags |= orig_flags & flags;
 }
 
-extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
-				     const struct cpumask *trial);
-extern int task_can_attach(struct task_struct *p,
-			   const struct cpumask *cs_cpus_allowed);
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
 #ifdef CONFIG_SMP
-extern void do_set_cpus_allowed(struct task_struct *p,
-			       const struct cpumask *new_mask);
-
-extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const struct cpumask *new_mask);
+extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
 #else
-static inline void do_set_cpus_allowed(struct task_struct *p,
-				      const struct cpumask *new_mask)
+static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
 }
-static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const struct cpumask *new_mask)
+static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
@@ -1239,6 +1316,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
+
 /**
  * task_nice - return the nice value of a given task.
  * @p: the task in question.
@@ -1249,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p)
 {
 	return PRIO_TO_NICE((p)->static_prio);
 }
+
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int,
-			      const struct sched_param *);
-extern int sched_setscheduler_nocheck(struct task_struct *, int,
-				      const struct sched_param *);
-extern int sched_setattr(struct task_struct *,
-			 const struct sched_attr *);
+extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
+extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
+extern int sched_setattr(struct task_struct *, const struct sched_attr *);
 extern struct task_struct *idle_task(int cpu);
+
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
@@ -1269,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p)
 {
 	return !!(p->flags & PF_IDLE);
 }
+
 extern struct task_struct *curr_task(int cpu);
 extern void ia64_set_curr_task(int cpu, struct task_struct *p);
 
@@ -1302,23 +1380,25 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
  */
 
 extern struct task_struct *find_task_by_vpid(pid_t nr);
-extern struct task_struct *find_task_by_pid_ns(pid_t nr,
-		struct pid_namespace *ns);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
+
 #ifdef CONFIG_SMP
- extern void kick_process(struct task_struct *tsk);
+extern void kick_process(struct task_struct *tsk);
 #else
- static inline void kick_process(struct task_struct *tsk) { }
+static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
 	__set_task_comm(tsk, from, false);
 }
+
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
@@ -1326,15 +1406,15 @@ void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p,
-					       long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 {
 	return 1;
 }
 #endif
 
-/* set thread flags in other task's structures
- * - see asm/thread_info.h for TIF_xxxx flags available
+/*
+ * Set thread flags in other task's structures.
+ * See asm/thread_info.h for TIF_xxxx flags available:
  */
 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-- 
cgit v1.2.3


From a528d35e8bfcc521d7cb70aaf03e1bd296c8493f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 16:46:22 +0000
Subject: statx: Add a system call to make enhanced file info available

Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.

The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode.  This change is propagated to the vfs_getattr*()
function.

Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.

========
OVERVIEW
========

The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.

A number of requests were gathered for features to be included.  The
following have been included:

 (1) Make the fields a consistent size on all arches and make them large.

 (2) Spare space, request flags and information flags are provided for
     future expansion.

 (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
     __s64).

 (4) Creation time: The SMB protocol carries the creation time, which could
     be exported by Samba, which will in turn help CIFS make use of
     FS-Cache as that can be used for coherency data (stx_btime).

     This is also specified in NFSv4 as a recommended attribute and could
     be exported by NFSD [Steve French].

 (5) Lightweight stat: Ask for just those details of interest, and allow a
     netfs (such as NFS) to approximate anything not of interest, possibly
     without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
     Dilger] (AT_STATX_DONT_SYNC).

 (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
     its cached attributes are up to date [Trond Myklebust]
     (AT_STATX_FORCE_SYNC).

And the following have been left out for future extension:

 (7) Data version number: Could be used by userspace NFS servers [Aneesh
     Kumar].

     Can also be used to modify fill_post_wcc() in NFSD which retrieves
     i_version directly, but has just called vfs_getattr().  It could get
     it from the kstat struct if it used vfs_xgetattr() instead.

     (There's disagreement on the exact semantics of a single field, since
     not all filesystems do this the same way).

 (8) BSD stat compatibility: Including more fields from the BSD stat such
     as creation time (st_btime) and inode generation number (st_gen)
     [Jeremy Allison, Bernd Schubert].

 (9) Inode generation number: Useful for FUSE and userspace NFS servers
     [Bernd Schubert].

     (This was asked for but later deemed unnecessary with the
     open-by-handle capability available and caused disagreement as to
     whether it's a security hole or not).

(10) Extra coherency data may be useful in making backups [Andreas Dilger].

     (No particular data were offered, but things like last backup
     timestamp, the data version number and the DOS archive bit would come
     into this category).

(11) Allow the filesystem to indicate what it can/cannot provide: A
     filesystem can now say it doesn't support a standard stat feature if
     that isn't available, so if, for instance, inode numbers or UIDs don't
     exist or are fabricated locally...

     (This requires a separate system call - I have an fsinfo() call idea
     for this).

(12) Store a 16-byte volume ID in the superblock that can be returned in
     struct xstat [Steve French].

     (Deferred to fsinfo).

(13) Include granularity fields in the time data to indicate the
     granularity of each of the times (NFSv4 time_delta) [Steve French].

     (Deferred to fsinfo).

(14) FS_IOC_GETFLAGS value.  These could be translated to BSD's st_flags.
     Note that the Linux IOC flags are a mess and filesystems such as Ext4
     define flags that aren't in linux/fs.h, so translation in the kernel
     may be a necessity (or, possibly, we provide the filesystem type too).

     (Some attributes are made available in stx_attributes, but the general
     feeling was that the IOC flags were to ext[234]-specific and shouldn't
     be exposed through statx this way).

(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
     Michael Kerrisk].

     (Deferred, probably to fsinfo.  Finding out if there's an ACL or
     seclabal might require extra filesystem operations).

(16) Femtosecond-resolution timestamps [Dave Chinner].

     (A __reserved field has been left in the statx_timestamp struct for
     this - if there proves to be a need).

(17) A set multiple attributes syscall to go with this.

===============
NEW SYSTEM CALL
===============

The new system call is:

	int ret = statx(int dfd,
			const char *filename,
			unsigned int flags,
			unsigned int mask,
			struct statx *buffer);

The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat().  There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags.  There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.

Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):

 (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
     respect.

 (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
     its attributes with the server - which might require data writeback to
     occur to get the timestamps correct.

 (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
     network filesystem.  The resulting values should be considered
     approximate.

mask is a bitmask indicating the fields in struct statx that are of
interest to the caller.  The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat().  It should be noted that asking for
more information may entail extra I/O operations.

buffer points to the destination for the data.  This must be 256 bytes in
size.

======================
MAIN ATTRIBUTES RECORD
======================

The following structures are defined in which to return the main attribute
set:

	struct statx_timestamp {
		__s64	tv_sec;
		__s32	tv_nsec;
		__s32	__reserved;
	};

	struct statx {
		__u32	stx_mask;
		__u32	stx_blksize;
		__u64	stx_attributes;
		__u32	stx_nlink;
		__u32	stx_uid;
		__u32	stx_gid;
		__u16	stx_mode;
		__u16	__spare0[1];
		__u64	stx_ino;
		__u64	stx_size;
		__u64	stx_blocks;
		__u64	__spare1[1];
		struct statx_timestamp	stx_atime;
		struct statx_timestamp	stx_btime;
		struct statx_timestamp	stx_ctime;
		struct statx_timestamp	stx_mtime;
		__u32	stx_rdev_major;
		__u32	stx_rdev_minor;
		__u32	stx_dev_major;
		__u32	stx_dev_minor;
		__u64	__spare2[14];
	};

The defined bits in request_mask and stx_mask are:

	STATX_TYPE		Want/got stx_mode & S_IFMT
	STATX_MODE		Want/got stx_mode & ~S_IFMT
	STATX_NLINK		Want/got stx_nlink
	STATX_UID		Want/got stx_uid
	STATX_GID		Want/got stx_gid
	STATX_ATIME		Want/got stx_atime{,_ns}
	STATX_MTIME		Want/got stx_mtime{,_ns}
	STATX_CTIME		Want/got stx_ctime{,_ns}
	STATX_INO		Want/got stx_ino
	STATX_SIZE		Want/got stx_size
	STATX_BLOCKS		Want/got stx_blocks
	STATX_BASIC_STATS	[The stuff in the normal stat struct]
	STATX_BTIME		Want/got stx_btime{,_ns}
	STATX_ALL		[All currently available stuff]

stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.

Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution.  Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.

The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does.  The following
attributes map to FS_*_FL flags and are the same numerical value:

	STATX_ATTR_COMPRESSED		File is compressed by the fs
	STATX_ATTR_IMMUTABLE		File is marked immutable
	STATX_ATTR_APPEND		File is append-only
	STATX_ATTR_NODUMP		File is not to be dumped
	STATX_ATTR_ENCRYPTED		File requires key to decrypt in fs

Within the kernel, the supported flags are listed by:

	KSTAT_ATTR_FS_IOC_FLAGS

[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]

New flags include:

	STATX_ATTR_AUTOMOUNT		Object is an automount trigger

These are for the use of GUI tools that might want to mark files specially,
depending on what they are.

Fields in struct statx come in a number of classes:

 (0) stx_dev_*, stx_blksize.

     These are local system information and are always available.

 (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
     stx_size, stx_blocks.

     These will be returned whether the caller asks for them or not.  The
     corresponding bits in stx_mask will be set to indicate whether they
     actually have valid values.

     If the caller didn't ask for them, then they may be approximated.  For
     example, NFS won't waste any time updating them from the server,
     unless as a byproduct of updating something requested.

     If the values don't actually exist for the underlying object (such as
     UID or GID on a DOS file), then the bit won't be set in the stx_mask,
     even if the caller asked for the value.  In such a case, the returned
     value will be a fabrication.

     Note that there are instances where the type might not be valid, for
     instance Windows reparse points.

 (2) stx_rdev_*.

     This will be set only if stx_mode indicates we're looking at a
     blockdev or a chardev, otherwise will be 0.

 (3) stx_btime.

     Similar to (1), except this will be set to 0 if it doesn't exist.

=======
TESTING
=======

The following test program can be used to test the statx system call:

	samples/statx/test-statx.c

Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.

Here's some example output.  Firstly, an NFS directory that crosses to
another FSID.  Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.

	[root@andromeda ~]# /tmp/test-statx -A /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:26           Inode: 1703937     Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000
	Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)

Secondly, the result of automounting on that directory.

	[root@andromeda ~]# /tmp/test-statx /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:27           Inode: 2           Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h       | 35 +++++++++++++++++++++++++++--------
 include/linux/nfs_fs.h   |  2 +-
 include/linux/stat.h     | 24 ++++++++++++++++++------
 include/linux/syscalls.h |  3 +++
 4 files changed, 49 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52350947c670..aad3fd0ff5f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1709,7 +1709,7 @@ struct inode_operations {
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
@@ -2902,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
 extern void generic_fillattr(struct inode *, struct kstat *);
-int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-extern int vfs_getattr(struct path *, struct kstat *);
+extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
+extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void __inode_sub_bytes(struct inode *inode, loff_t bytes);
@@ -2916,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
 
-extern int vfs_stat(const char __user *, struct kstat *);
-extern int vfs_lstat(const char __user *, struct kstat *);
-extern int vfs_fstat(unsigned int, struct kstat *);
-extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
+extern int vfs_statx(int, const char __user *, int, struct kstat *, u32);
+extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
+
+static inline int vfs_stat(const char __user *filename, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_lstat(const char __user *name, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW,
+			 stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstatat(int dfd, const char __user *filename,
+			      struct kstat *stat, int flags)
+{
+	return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstat(int fd, struct kstat *stat)
+{
+	return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0);
+}
+
+
 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
 extern int vfs_readlink(struct dentry *, char __user *, int);
 
@@ -2949,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *);
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
 extern int dcache_readdir(struct file *, struct dir_context *);
 extern int simple_setattr(struct dentry *, struct iattr *);
-extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int simple_statfs(struct dentry *, struct kstatfs *);
 extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f1da8c8dd473..287f34161086 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
-extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
 extern int nfs_permission(struct inode *, int);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 075cb0c7eb2a..c76e524fb34b 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -18,20 +18,32 @@
 #include <linux/time.h>
 #include <linux/uidgid.h>
 
+#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE)
+
 struct kstat {
-	u64		ino;
-	dev_t		dev;
+	u32		result_mask;	/* What fields the user got */
 	umode_t		mode;
 	unsigned int	nlink;
+	uint32_t	blksize;	/* Preferred I/O size */
+	u64		attributes;
+#define KSTAT_ATTR_FS_IOC_FLAGS				\
+	(STATX_ATTR_COMPRESSED |			\
+	 STATX_ATTR_IMMUTABLE |				\
+	 STATX_ATTR_APPEND |				\
+	 STATX_ATTR_NODUMP |				\
+	 STATX_ATTR_ENCRYPTED				\
+	 )/* Attrs corresponding to FS_*_FL flags */
+	u64		ino;
+	dev_t		dev;
+	dev_t		rdev;
 	kuid_t		uid;
 	kgid_t		gid;
-	dev_t		rdev;
 	loff_t		size;
-	struct timespec  atime;
+	struct timespec	atime;
 	struct timespec	mtime;
 	struct timespec	ctime;
-	unsigned long	blksize;
-	unsigned long long	blocks;
+	struct timespec	btime;			/* File creation time */
+	u64		blocks;
 };
 
 #endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91a740f6b884..980c3c9b06f8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -48,6 +48,7 @@ struct stat;
 struct stat64;
 struct statfs;
 struct statfs64;
+struct statx;
 struct __sysctl_args;
 struct sysinfo;
 struct timespec;
@@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
 				  unsigned long prot, int pkey);
 asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 asmlinkage long sys_pkey_free(int pkey);
+asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
+			  unsigned mask, struct statx __user *buffer);
 
 #endif
-- 
cgit v1.2.3


From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 28 Feb 2017 11:32:22 -0500
Subject: jump_label: Fix anonymous union initialization

Pre-4.6 gcc do not allow direct static initialization of members of
anonymous structs/unions. After commit 3821fd35b58d ("jump_label:
Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE}
definitions cannot be compiled with those older compilers.

Placing initializers inside curved brackets works around this problem.

Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com

Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Compiled-by: Chris Mason <clm@fb.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 680c98b2f41c..a7f90117cf7d 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key);
  */
 #define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = { 1 },					\
-	  .entries = (void *)JUMP_TYPE_TRUE }
+	  { .entries = (void *)JUMP_TYPE_TRUE } }
 #define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = { 0 },					\
-	  .entries = (void *)JUMP_TYPE_FALSE }
+	  { .entries = (void *)JUMP_TYPE_FALSE } }
 
 #else  /* !HAVE_JUMP_LABEL */
 
-- 
cgit v1.2.3


From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 17:28:45 -0500
Subject: jump_label: Add comment about initialization order for anonymous
 unions

Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
broke old compilers that could not handle static initialization of anonymous
unions. Boris fixed it with a patch that added brackets around the static
initializer. But this creates a dependency between those initializers and
the structure's order of its fields. Document this dependency in case new
fields are added to struct static_key in the future.

Noted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Suggested-by: Chris Mason <clm@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a7f90117cf7d..28e04a33535a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -90,6 +90,13 @@ extern bool static_key_initialized;
 struct static_key {
 	atomic_t enabled;
 /*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
  * bit 0 => 1 if key is initially true
  *	    0 if initially false
  * bit 1 => 1 if points to struct static_key_mod
-- 
cgit v1.2.3


From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 5 Mar 2017 12:34:49 -0800
Subject: net: phy: Do not perform software reset for Generic PHY

The Generic PHY driver is a catch-all PHY driver and it should preserve
whatever prior initialization has been done by boot loader or firmware
agents. For specific PHY device configuration it is expected that a
specialized PHY driver would take over that role.

Resetting the generic PHY was a bad idea that has lead to several
complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore
the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY
reset in phy_init_hw()").

Reported-by: Felix Fietkau <nbd@nbd.name>
Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 772476028a65..43a774873aa9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+	return 0;
+}
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
-- 
cgit v1.2.3


From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Feb 2017 11:32:47 +0000
Subject: KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass

Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to
zero, which implies that there is a way to bypass the GIC and
inject raw IRQ/FIQ by driving the CPU pins.

Of course, we don't allow that when the GIC is configured, but
we fail to indicate that to the guest. The obvious fix is to
set these bits (and never let them being changed again).

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 672cfef72fc8..97cbca19430d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -373,6 +373,8 @@
 #define ICC_IGRPEN0_EL1_MASK		(1 << ICC_IGRPEN0_EL1_SHIFT)
 #define ICC_IGRPEN1_EL1_SHIFT		0
 #define ICC_IGRPEN1_EL1_MASK		(1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB			(1U << 2)
+#define ICC_SRE_EL1_DFB			(1U << 1)
 #define ICC_SRE_EL1_SRE			(1U << 0)
 
 /*
-- 
cgit v1.2.3


From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Date: Thu, 2 Mar 2017 16:11:47 +0100
Subject: irqdomain: Add empty irq_domain_check_msi_remap

Fix following build error for s390:
drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group':
drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap'

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdomain.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 188eced6813e..9f3616085423 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
 {
 	return NULL;
 }
+static inline bool irq_domain_check_msi_remap(void)
+{
+	return false;
+}
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */
-- 
cgit v1.2.3


From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Mar 2017 15:03:22 -0600
Subject: ucount: Remove the atomicity from ucount->count

Always increment/decrement ucount->count under the ucounts_lock.  The
increments are there already and moving the decrements there means the
locking logic of the code is simpler.  This simplification in the
locking logic fixes a race between put_ucounts and get_ucounts that
could result in a use-after-free because the count could go zero then
be found by get_ucounts and then be freed by put_ucounts.

A bug presumably this one was found by a combination of syzkaller and
KASAN.  JongWhan Kim reported the syzkaller failure and Dmitry Vyukov
spotted the race in the code.

Cc: stable@vger.kernel.org
Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index be765234c0a2..32354b4b4b2b 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,7 +72,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	atomic_t count;
+	int count;
 	atomic_t ucount[UCOUNT_COUNTS];
 };
 
-- 
cgit v1.2.3


From 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sun, 12 Feb 2017 17:11:07 +0100
Subject: libceph: osd_request_timeout option

osd_request_timeout specifies how many seconds to wait for a response
from OSDs before returning -ETIMEDOUT from an OSD request.  0 (default)
means no limit.

osd_request_timeout is osdkeepalive-precise -- in-flight requests are
swept through every osdkeepalive seconds.  With ack vs commit behaviour
gone, abort_request() is really simple.

This is based on a patch from Artur Molchanov <artur.molchanov@synesis.ru>.

Tested-by: Artur Molchanov <artur.molchanov@synesis.ru>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/libceph.h    | 2 ++
 include/linux/ceph/osd_client.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e26581..88cd5dc8e238 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
 
 #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 2ea0c282f3dc..c125b5d9e13c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -189,6 +189,7 @@ struct ceph_osd_request {
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
+	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
 	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
-- 
cgit v1.2.3


From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Mar 2017 10:52:16 -0800
Subject: dccp: fix use-after-free in dccp_feat_activate_values

Dmitry reported crashes in DCCP stack [1]

Problem here is that when I got rid of listener spinlock, I missed the
fact that DCCP stores a complex state in struct dccp_request_sock,
while TCP does not.

Since multiple cpus could access it at the same time, we need to add
protection.

[1]
BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0
net/dccp/feat.c:1541 at addr ffff88003713be68
Read of size 8 by task syz-executor2/8457
CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162
 print_address_description mm/kasan/report.c:200 [inline]
 kasan_report_error mm/kasan/report.c:289 [inline]
 kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311
 kasan_report mm/kasan/report.c:332 [inline]
 __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332
 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
 </IRQ>
 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328
 do_softirq kernel/softirq.c:176 [inline]
 __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181
 local_bh_enable include/linux/bottom_half.h:31 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline]
 ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123
 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162
 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501
 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179
 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141
 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280
 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362
 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 SYSC_sendto+0x660/0x810 net/socket.c:1687
 SyS_sendto+0x40/0x50 net/socket.c:1655
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458b9
RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9
RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017
RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020
R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8
R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700
Object at ffff88003713be50, in cache kmalloc-64 size: 64
Allocated:
PID = 8446
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738
 kmalloc include/linux/slab.h:490 [inline]
 dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467
 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487
 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741
 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949
 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012
 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423
 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217
 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377
 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606
 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632
 sk_backlog_rcv include/net/sock.h:893 [inline]
 __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479
 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Freed:
PID = 15
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
 slab_free_hook mm/slub.c:1355 [inline]
 slab_free_freelist_hook mm/slub.c:1377 [inline]
 slab_free mm/slub.c:2954 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3874
 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418
 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline]
 dccp_feat_list_pop net/dccp/feat.c:541 [inline]
 dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Memory state around the buggy address:
 ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb
                                                          ^

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61d042bbbf60..68449293c4b6 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -163,6 +163,7 @@ struct dccp_request_sock {
 	__u64			 dreq_isr;
 	__u64			 dreq_gsr;
 	__be32			 dreq_service;
+	spinlock_t		 dreq_lock;
 	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
-- 
cgit v1.2.3


From c01228db4ba965986511a5b28c478bddd7e2726e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:34 +0100
Subject: Revert "scsi, block: fix duplicate bdi name registration crashes"

This reverts commit 0dba1314d4f81115dce711292ec7981d17231064. It causes
leaking of device numbers for SCSI when SCSI registers multiple gendisks
for one request_queue in succession. It can be easily reproduced using
Omar's script [1] on kernel with CONFIG_DEBUG_TEST_DRIVER_REMOVE.
Furthermore the protection provided by this commit is not needed anymore
as the problem it was fixing got also fixed by commit 165a5e22fafb
"block: Move bdi_unregister() to del_gendisk()".

[1]: http://marc.info/?l=linux-block&m=148554717109098&w=2

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 include/linux/genhd.h  | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 796016e63c1d..5a7da607ca04 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -435,7 +435,6 @@ struct request_queue {
 	struct delayed_work	delay_work;
 
 	struct backing_dev_info	*backing_dev_info;
-	struct disk_devt	*disk_devt;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a999d281a2f1..76f39754e7b0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,13 +167,6 @@ struct blk_integrity {
 };
 
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
-struct disk_devt {
-	atomic_t count;
-	void (*release)(struct disk_devt *disk_devt);
-};
-
-void put_disk_devt(struct disk_devt *disk_devt);
-void get_disk_devt(struct disk_devt *disk_devt);
 
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
@@ -183,7 +176,6 @@ struct gendisk {
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
-	struct disk_devt *disk_devt;
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
 	char *(*devnode)(struct gendisk *gd, umode_t *mode);
-- 
cgit v1.2.3


From bd0f9b356d00aa241ced36fb075a07041c28d3b8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 7 Mar 2017 15:33:14 -0800
Subject: sched/headers: fix up header file dependency on
 <linux/sched/signal.h>

The scheduler header file split and cleanups ended up exposing a few
nasty header file dependencies, and in particular it showed how we in
<linux/wait.h> ended up depending on "signal_pending()", which now comes
from <linux/sched/signal.h>.

That's a very subtle and annoying dependency, which already caused a
semantic merge conflict (see commit e58bc927835a "Pull overlayfs updates
from Miklos Szeredi", which added that fixup in the merge commit).

It turns out that we can avoid this dependency _and_ improve code
generation by moving the guts of the fairly nasty helper #define
__wait_event_interruptible_locked() to out-of-line code.  The code that
includes the signal_pending() check is all in the slow-path where we
actually go to sleep waiting for the event anyway, so using a helper
function is the right thing to do.

Using a helper function is also what we already did for the non-locked
versions, see the "__wait_event*()" macros and the "prepare_to_wait*()"
set of helper functions.

We might want to try to unify all these macro games, we have a _lot_ of
subtly different wait-event loops.  But this is the minimal patch to fix
the annoying header dependency.

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index aacb1282d19a..db076ca7f11d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -620,30 +620,19 @@ do {									\
 	__ret;								\
 })
 
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
 
-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
 ({									\
-	int __ret = 0;							\
+	int __ret;							\
 	DEFINE_WAIT(__wait);						\
 	if (exclusive)							\
 		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
 	do {								\
-		if (likely(list_empty(&__wait.task_list)))		\
-			__add_wait_queue_tail(&(wq), &__wait);		\
-		set_current_state(TASK_INTERRUPTIBLE);			\
-		if (signal_pending(current)) {				\
-			__ret = -ERESTARTSYS;				\
+		__ret = fn(&(wq), &__wait);				\
+		if (__ret)						\
 			break;						\
-		}							\
-		if (irq)						\
-			spin_unlock_irq(&(wq).lock);			\
-		else							\
-			spin_unlock(&(wq).lock);			\
-		schedule();						\
-		if (irq)						\
-			spin_lock_irq(&(wq).lock);			\
-		else							\
-			spin_lock(&(wq).lock);				\
 	} while (!(condition));						\
 	__remove_wait_queue(&(wq), &__wait);				\
 	__set_current_state(TASK_RUNNING);				\
@@ -676,7 +665,7 @@ do {									\
  */
 #define wait_event_interruptible_locked(wq, condition)			\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
  * wait_event_interruptible_locked_irq - sleep until a condition gets true
@@ -703,7 +692,7 @@ do {									\
  */
 #define wait_event_interruptible_locked_irq(wq, condition)		\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
  * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
@@ -734,7 +723,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
  * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
@@ -765,7 +754,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
 #define __wait_event_killable(wq, condition)				\
-- 
cgit v1.2.3


From 505a60e225606fbd3d2eadc31ff793d939ba66f1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:03 +0300
Subject: asm-generic: introduce 5level-fixup.h

We are going to switch core MM to 5-level paging abstraction.

This is preparation step which adds <asm-generic/5level-fixup.h>
As with 4level-fixup.h, the new header allows quickly make all
architectures compatible with 5-level paging in core MM.

In long run we would like to switch architectures to properly folded p4d
level by using <asm-generic/pgtable-nop4d.h>, but it requires more
changes to arch-specific code.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0d65dd72c0f4..be1fe264eb37 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1619,11 +1619,14 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
  * Remove it when 4level-fixup.h has been removed.
  */
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
+
+#ifndef __ARCH_HAS_5LEVEL_HACK
 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
 	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
 		NULL: pud_offset(pgd, address);
 }
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
 
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {
-- 
cgit v1.2.3


From c2febafc67734a62196c1b9dfba926412d4077ba Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:07 +0300
Subject: mm: convert generic code to 5-level paging

Convert all non-architecture-specific code to 5-level paging.

It's mostly mechanical adding handling one more page table level in
places where we deal with pud_t.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 ++++-
 include/linux/kasan.h   |  1 +
 include/linux/mm.h      | 31 +++++++++++++++++++++++++------
 3 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 503099d8aada..b857fc8cc2ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int flags);
 int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pmd);
+int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 #ifndef pgd_huge
 #define pgd_huge(x)	0
 #endif
+#ifndef p4d_huge
+#define p4d_huge(x)	0
+#endif
 
 #ifndef pgd_write
 static inline int pgd_write(pgd_t pgd)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ceb3fe78a0d3..1c823bef4c15 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
 extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
+extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
 
 void kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index be1fe264eb37..5f01c88f0800 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 	return ptep;
 }
 
+#ifdef __PAGETABLE_P4D_FOLDED
+static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+						unsigned long address)
+{
+	return 0;
+}
+#else
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+#endif
+
 #ifdef __PAGETABLE_PUD_FOLDED
-static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
+static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 						unsigned long address)
 {
 	return 0;
 }
 #else
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 #endif
 
 #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 
 #ifndef __ARCH_HAS_5LEVEL_HACK
-static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+		unsigned long address)
+{
+	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
+		NULL : p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
+		unsigned long address)
 {
-	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
-		NULL: pud_offset(pgd, address);
+	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
+		NULL : pud_offset(p4d, address);
 }
 #endif /* !__ARCH_HAS_5LEVEL_HACK */
 
@@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
-pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
+p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
+pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
-- 
cgit v1.2.3


From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 7 Mar 2017 20:00:13 -0800
Subject: bpf: convert htab map to hlist_nulls

when all map elements are pre-allocated one cpu can delete and reuse htab_elem
while another cpu is still walking the hlist. In such case the lookup may
miss the element. Convert hlist to hlist_nulls to avoid such scenario.
When bucket lock is taken there is no need to take such precautions,
so only convert map_lookup and map_get_next to nulls.
The race window is extremely small and only reproducible with explicit
udelay() inside lookup_nulls_elem_raw()

Similar to hlist add hlist_nulls_for_each_entry_safe() and
hlist_nulls_entry_safe() helpers.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Reported-by: Jonathan Perry <jonperry@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h    |  5 +++++
 include/linux/rculist_nulls.h | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe1009084..87ff4f58a2f0 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
 	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
 
 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+	})
 /**
  * ptr_is_a_nulls - Test if a ptr is a nulls
  * @ptr: ptr to be tested
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7e8597..a23a33153180 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
 
+/**
+ * hlist_nulls_for_each_entry_safe -
+ *   iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)		\
+	for (({barrier();}),							\
+	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+		(!is_a_nulls(pos)) &&						\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);	\
+		   pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
 #endif
 #endif
-- 
cgit v1.2.3


From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:31 -0800
Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  disble||disable
  disbled||disabled

I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c
untouched.  The macro is not referenced at all, but this commit is
touching only comment blocks just in case.

Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/regulator/machine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index ad3e5158e586..c9f795e9a2ee 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -65,7 +65,7 @@ struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
-	int disabled; /* is the regulator disbled in this suspend state */
+	int disabled; /* is the regulator disabled in this suspend state */
 };
 
 /**
-- 
cgit v1.2.3


From dd0db88d8094a6d9d4d1fc5fcd56ab619f54ccf8 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:49 -0800
Subject: userfaultfd: non-cooperative: rollback userfaultfd_exit

Patch series "userfaultfd non-cooperative further update for 4.11 merge
window".

Unfortunately I noticed one relevant bug in userfaultfd_exit while doing
more testing.  I've been doing testing before and this was also tested
by kbuild bot and exercised by the selftest, but this bug never
reproduced before.

I dropped userfaultfd_exit as result.  I dropped it because of
implementation difficulty in receiving signals in __mmput and because I
think -ENOSPC as result from the background UFFDIO_COPY should be enough
already.

Before I decided to remove userfaultfd_exit, I noticed userfaultfd_exit
wasn't exercised by the selftest and when I tried to exercise it, after
moving it to a more correct place in __mmput where it would make more
sense and where the vma list is stable, it resulted in the
event_wait_completion in D state.  So then I added the second patch to
be sure even if we call userfaultfd_event_wait_completion too late
during task exit(), we won't risk to generate tasks in D state.  The
same check exists in handle_userfault() for the same reason, except it
makes a difference there, while here is just a robustness check and it's
run under WARN_ON_ONCE.

While looking at the userfaultfd_event_wait_completion() function I
looked back at its callers too while at it and I think it's not ok to
stop executing dup_fctx on the fcs list because we relay on
userfaultfd_event_wait_completion to execute
userfaultfd_ctx_put(fctx->orig) which is paired against
userfaultfd_ctx_get(fctx->orig) in dup_userfault just before
list_add(fcs).  This change only takes care of fctx->orig but this area
also needs further review looking for similar problems in fctx->new.

The only patch that is urgent is the first because it's an use after
free during a SMP race condition that affects all processes if
CONFIG_USERFAULTFD=y.  Very hard to reproduce though and probably
impossible without SLUB poisoning enabled.

This patch (of 3):

I once reproduced this oops with the userfaultfd selftest, it's not
easily reproducible and it requires SLUB poisoning to reproduce.

    general protection fault: 0000 [#1] SMP
    Modules linked in:
    CPU: 2 PID: 18421 Comm: userfaultfd Tainted: G               ------------ T 3.10.0+ #15
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
    task: ffff8801f83b9440 ti: ffff8801f833c000 task.ti: ffff8801f833c000
    RIP: 0010:[<ffffffff81451299>]  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
    RSP: 0018:ffff8801f833fe80  EFLAGS: 00010202
    RAX: ffff8801f833ffd8 RBX: 6b6b6b6b6b6b6b6b RCX: ffff8801f83b9440
    RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800baf18600
    RBP: ffff8801f833fee8 R08: 0000000000000000 R09: 0000000000000001
    R10: 0000000000000000 R11: ffffffff8127ceb3 R12: 0000000000000000
    R13: ffff8800baf186b0 R14: ffff8801f83b99f8 R15: 00007faed746c700
    FS:  0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
    CR2: 00007faf0966f028 CR3: 0000000001bc6000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    Call Trace:
      do_exit+0x297/0xd10
      SyS_exit+0x17/0x20
      tracesys+0xdd/0xe2
    Code: 00 00 66 66 66 66 90 55 48 89 e5 41 54 53 48 83 ec 58 48 8b 1f 48 85 db 75 11 eb 73 66 0f 1f 44 00 00 48 8b 5b 10 48 85 db 74 64 <4c> 8b a3 b8 00 00 00 4d 85 e4 74 eb 41 f6 84 24 2c 01 00 00 80
    RIP  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
     RSP <ffff8801f833fe80>
    ---[ end trace 9fecd6dcb442846a ]---

In the debugger I located the "mm" pointer in the stack and walking
mm->mmap->vm_next through the end shows the vma->vm_next list is fully
consistent and it is null terminated list as expected.  So this has to
be an SMP race condition where userfaultfd_exit was running while the
vma list was being modified by another CPU.

When userfaultfd_exit() run one of the ->vm_next pointers pointed to
SLAB_POISON (RBX is the vma pointer and is 0x6b6b..).

The reason is that it's not running in __mmput but while there are still
other threads running and it's not holding the mmap_sem (it can't as it
has to wait the even to be received by the manager).  So this is an use
after free that was happening for all processes.

One more implementation problem aside from the race condition:
userfaultfd_exit has really to check a flag in mm->flags before walking
the vma or it's going to slowdown the exit() path for regular tasks.

One more implementation problem: at that point signals can't be
delivered so it would also create a task in D state if the manager
doesn't read the event.

The major design issue: it overall looks superfluous as the manager can
check for -ENOSPC in the background transfer:

	if (mmget_not_zero(ctx->mm)) {
[..]
	} else {
		return -ENOSPC;
	}

It's safer to roll it back and re-introduce it later if at all.

[rppt@linux.vnet.ibm.com: documentation fixup after removal of UFFD_EVENT_EXIT]
  Link: http://lkml.kernel.org/r/1488345437-4364-1-git-send-email-rppt@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20170224181957.19736-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 0468548acebf..f2b79bf4c895 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -72,8 +72,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 				       struct list_head *uf);
 
-extern void userfaultfd_exit(struct mm_struct *mm);
-
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -139,10 +137,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
 {
 }
 
-static inline void userfaultfd_exit(struct mm_struct *mm)
-{
-}
-
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
-- 
cgit v1.2.3


From cbfd0c1001bedb4b051cf4a1f5df24f1500381bc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 9 Mar 2017 16:16:57 -0800
Subject: include/linux/fs.h: fix unsigned enum warning with gcc-4.2

With arm-linux-gcc-4.2, almost every file we build in the kernel ends up
with this warning:

  include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is always false

Later versions don't have this problem, but it's easy enough to work
around.

Link: http://lkml.kernel.org/r/20161216105634.235457-12-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aad3fd0ff5f8..7251f7bb45e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = {
 
 static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
 {
-	if (id < 0 || id >= READING_MAX_ID)
+	if ((unsigned)id >= READING_MAX_ID)
 		return kernel_read_file_str[READING_UNKNOWN];
 
 	return kernel_read_file_str[id];
-- 
cgit v1.2.3


From ce9311cf95ad8baf044a014738d76973d93b739a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Thu, 9 Mar 2017 16:17:00 -0800
Subject: mm/vmstats: add thp_split_pud event for clarity

We added support for PUD-sized transparent hugepages, however we count
the event "thp split pud" into thp_split_pmd event.

To separate the event count of thp split pud from pmd, add a new event
named thp_split_pud.

Link: http://lkml.kernel.org/r/1488282380-5076-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 6aa1b6cb5828..a80b7b59cf33 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+		THP_SPLIT_PUD,
+#endif
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
-- 
cgit v1.2.3


From 70ccb92fdd90b35bb6f9200093d4ffd6cb38156b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:11 -0800
Subject: userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in
 MADV_DONTNEED

userfaultfd_remove() has to be execute before zapping the pagetables or
UFFDIO_COPY could keep filling pages after zap_page_range returned,
which would result in non zero data after a MADV_DONTNEED.

However userfaultfd_remove() may have to release the mmap_sem.  This was
handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a
potentially stale vma (the very vma passed to zap_page_range(vma, ...)).

The fix consists in revalidating the vma in case userfaultfd_remove()
had to release the mmap_sem.

This also optimizes away an unnecessary down_read/up_read in the
MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered.

It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as
userfaultfd_remove() will be defined as "true" at build time.

Link: http://lkml.kernel.org/r/20170302173738.18994-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/userfaultfd_k.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f2b79bf4c895..48a3483dccb1 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
-extern void userfaultfd_remove(struct vm_area_struct *vma,
-			       struct vm_area_struct **prev,
+extern bool userfaultfd_remove(struct vm_area_struct *vma,
 			       unsigned long start,
 			       unsigned long end);
 
@@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void userfaultfd_remove(struct vm_area_struct *vma,
-				      struct vm_area_struct **prev,
+static inline bool userfaultfd_remove(struct vm_area_struct *vma,
 				      unsigned long start,
 				      unsigned long end)
 {
+	return true;
 }
 
 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
-- 
cgit v1.2.3


From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Mar 2017 08:09:05 +0000
Subject: net: Work around lockdep limitation in sockets that use sockets

Lockdep issues a circular dependency warning when AFS issues an operation
through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem.

The theory lockdep comes up with is as follows:

 (1) If the pagefault handler decides it needs to read pages from AFS, it
     calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but
     creating a call requires the socket lock:

	mmap_sem must be taken before sk_lock-AF_RXRPC

 (2) afs_open_socket() opens an AF_RXRPC socket and binds it.  rxrpc_bind()
     binds the underlying UDP socket whilst holding its socket lock.
     inet_bind() takes its own socket lock:

	sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET

 (3) Reading from a TCP socket into a userspace buffer might cause a fault
     and thus cause the kernel to take the mmap_sem, but the TCP socket is
     locked whilst doing this:

	sk_lock-AF_INET must be taken before mmap_sem

However, lockdep's theory is wrong in this instance because it deals only
with lock classes and not individual locks.  The AF_INET lock in (2) isn't
really equivalent to the AF_INET lock in (3) as the former deals with a
socket entirely internal to the kernel that never sees userspace.  This is
a limitation in the design of lockdep.

Fix the general case by:

 (1) Double up all the locking keys used in sockets so that one set are
     used if the socket is created by userspace and the other set is used
     if the socket is created by the kernel.

 (2) Store the kern parameter passed to sk_alloc() in a variable in the
     sock struct (sk_kern_sock).  This informs sock_lock_init(),
     sock_init_data() and sk_clone_lock() as to the lock keys to be used.

     Note that the child created by sk_clone_lock() inherits the parent's
     kern setting.

 (3) Add a 'kern' parameter to ->accept() that is analogous to the one
     passed in to ->create() that distinguishes whether kernel_accept() or
     sys_accept4() was the caller and can be passed to sk_alloc().

     Note that a lot of accept functions merely dequeue an already
     allocated socket.  I haven't touched these as the new socket already
     exists before we get the parameter.

     Note also that there are a couple of places where I've made the accepted
     socket unconditionally kernel-based:

	irda_accept()
	rds_rcp_accept_one()
	tcp_accept_from_sock()

     because they follow a sock_create_kern() and accept off of that.

Whilst creating this, I noticed that lustre and ocfs don't create sockets
through sock_create_kern() and thus they aren't marked as for-kernel,
though they appear to be internal.  I wonder if these should do that so
that they use the new set of lock keys.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd0a1de..0620f5e18c96 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
 	int		(*socketpair)(struct socket *sock1,
 				      struct socket *sock2);
 	int		(*accept)    (struct socket *sock,
-				      struct socket *newsock, int flags);
+				      struct socket *newsock, int flags, bool kern);
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
 				      int *sockaddr_len, int peer);
-- 
cgit v1.2.3


From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Mar 2017 13:17:18 +0100
Subject: kexec, x86/purgatory: Unbreak it and clean it up

The purgatory code defines global variables which are referenced via a
symbol lookup in the kexec code (core and arch).

A recent commit addressing sparse warnings made these static and thereby
broke kexec_file.

Why did this happen? Simply because the whole machinery is undocumented and
lacks any form of forward declarations. The variable names are unspecific
and lack a prefix, so adding forward declarations creates shadow variables
in the core code. Aside of that the code relies on magic constants and
duplicate struct definitions with no way to ensure that these things stay
in sync. The section placement of the purgatory variables happened by
chance and not by design.

Unbreak kexec and cleanup the mess:

 - Add proper forward declarations and document the usage
 - Use common struct definition
 - Use the proper common defines instead of magic constants
 - Add a purgatory_ prefix to have a proper name space
 - Use ARRAY_SIZE() instead of a homebrewn reimplementation
 - Add proper sections to the purgatory variables [ From Mike ]

Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static")
Reported-by: Mike Galbraith <<efault@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: "Tobin C. Harding" <me@tobin.cc>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/purgatory.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/purgatory.h

(limited to 'include/linux')

diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644
index 000000000000..d60d4e278609
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
-- 
cgit v1.2.3


From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:23 +0800
Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when
 booting"

Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting")

The mapping of "cpuid <-> nodeid" is established at boot time via ACPI
tables to keep associations of workqueues and other node related items
consistent across cpu hotplug.

But, ACPI tables are unreliable and failures with that boot time mapping
have been reported on machines where the ACPI table and the physical
information which is retrieved at actual hotplug is inconsistent.

Revert the mapping implementation so it can be replaced with a less error
prone approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 673acda012af..63a7519b00cc 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 		 int *pcpu);
 int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-void acpi_set_processor_mapping(void);
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
-- 
cgit v1.2.3


From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:27 +0800
Subject: acpi/processor: Check for duplicate processor ids at hotplug time

The check for duplicate processor ids happens at boot time based on the
ACPI table contents, but the final sanity checks for a processor happen
at hotplug time.

At hotplug time, where the physical information is available, which might
differ from the ACPI table information, a check for duplicate processor
ids is missing.

Add it to the hotplug checks and rename the function so it better
reflects its purpose.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63a7519b00cc..9b05886f9773 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
 }
 
 /* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-- 
cgit v1.2.3


From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 16:55:49 +0100
Subject: bpf: improve read-only handling

Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a
one-time warning in case of an error when the image couldn't
be set read-only, and also mark struct bpf_prog as locked when
bpf_prog_lock_ro() was called.

Reason for the latter is that bpf_prog_unlock_ro() is called from
various places including error paths, and we shouldn't mess with
page attributes when really not needed.

For bpf_jit_binary_unlock_ro() this is not needed as jited flag
implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY
we're guaranteed to have a previously locked image. Overall, this
should also help us to identify any further potential issues with
set_memory_*() helpers.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c167fdee5f7..fbf7b39e8103 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -409,6 +409,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
+				locked:1,	/* Program image locked? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-	set_memory_ro((unsigned long)fp, fp->pages);
+	fp->locked = 1;
+	WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-	set_memory_rw((unsigned long)fp, fp->pages);
+	if (fp->locked) {
+		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+		/* In case set_memory_rw() fails, we want to be the first
+		 * to crash here instead of some random place later on.
+		 */
+		fp->locked = 0;
+	}
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_ro((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_rw((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
 }
 #else
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
-- 
cgit v1.2.3


From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 9 Mar 2017 17:20:04 +0100
Subject: iio: sw-device: Fix config group initialization

Use the IS_ENABLED() helper macro to ensure that the configfs group is
initialized either when configfs is built-in or when configfs is built as a
module. Otherwise software device creation will result in undefined
behaviour when configfs is built as a module since the configfs group for
the device not properly initialized.

Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group
initialization").

Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs")
Reported-by: Miguel Robles <miguel.robles@farole.net>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Daniel Baluta <daniel.baluta@gmail.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/sw_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h
index 23ca41515527..fa7931933067 100644
--- a/include/linux/iio/sw_device.h
+++ b/include/linux/iio/sw_device.h
@@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d,
 				  const char *name,
 				  struct config_item_type *type)
 {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
 	config_group_init_type_name(&d->group, name, type);
 #endif
 }
-- 
cgit v1.2.3


From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 13 Mar 2017 20:50:08 +0100
Subject: usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk

Some USB 2.0 devices erroneously report millisecond values in
bInterval. The generic config code manages to catch most of them,
but in some cases it's not completely enough.

The case at stake here is a USB 2.0 braille device, which wants to
announce 10ms and thus sets bInterval to 10, but with the USB 2.0
computation that yields to 64ms.  It happens that one can type fast
enough to reach this interval and get the device buffers overflown,
leading to problematic latencies.  The generic config code does not
catch this case because the 64ms is considered a sane enough value.

This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk
to mark devices which actually report milliseconds in bInterval,
and marks Vario Ultra devices as needing it.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 1d0043dc34e4..de2a722fe3cf 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -50,4 +50,10 @@
 /* device can't handle Link Power Management */
 #define USB_QUIRK_NO_LPM			BIT(10)
 
+/*
+ * Device reports its bInterval as linear frames instead of the
+ * USB 2.0 calculation.
+ */
+#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL	BIT(11)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 8 Feb 2017 09:24:25 +0000
Subject: serial: st-asc: Use new GPIOD API to obtain RTS pin

The commits mentioned below adapt the GPIO API to allow more information
to be passed directly through devm_get_gpiod_from_child() in the first
instance.  This facilitates the removal of subsequent calls, such as
gpiod_direction_output().  This patch firstly moves to utilise the new
API and secondly removes the now superfluous call do set the direction.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
[Also drop the header file dummies that only this driver was using]
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2484b2fcc6eb..933d93656605 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 						struct fwnode_handle *child,
 						enum gpiod_flags flags,
 						const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
-						      0, child,
-						      GPIOD_ASIS,
-						      "?");
-}
 
 #else /* CONFIG_GPIOLIB */
 
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
 #endif /* CONFIG_GPIOLIB */
 
 static inline
-- 
cgit v1.2.3


From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 22 Feb 2017 13:25:14 -0800
Subject: fscrypt: eliminate ->prepare_context() operation

The only use of the ->prepare_context() fscrypt operation was to allow
ext4 to evict inline data from the inode before ->set_context().
However, there is no reason why this cannot be done as simply the first
step in ->set_context(), and in fact it makes more sense to do it that
way because then the policy modes and flags get validated before any
real work is done.  Therefore, merge ext4_prepare_context() into
ext4_set_context(), and remove ->prepare_context().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/fscrypt_common.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 547f81592ba1..10c1abfbac6c 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -87,7 +87,6 @@ struct fscrypt_operations {
 	unsigned int flags;
 	const char *key_prefix;
 	int (*get_context)(struct inode *, void *, size_t);
-	int (*prepare_context)(struct inode *);
 	int (*set_context)(struct inode *, const void *, size_t, void *);
 	int (*dummy_context)(struct inode *);
 	bool (*is_encrypted)(struct inode *);
-- 
cgit v1.2.3


From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 4 Mar 2017 18:13:57 -0700
Subject: vmbus: use rcu for per-cpu channel list

The per-cpu channel list is now referred to in the interrupt
routine. This is mostly safe since the host will not normally generate
an interrupt when channel is being deleted but if it did then there
would be a use after free problem.

To solve, this use RCU protection on ther per-cpu list.

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 62bbf3c1aa4a..c4c7ae91f9d1 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -845,6 +845,13 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
+
+	/*
+	 * Defer freeing channel until after all cpu's have
+	 * gone through grace period.
+	 */
+	struct rcu_head rcu;
+
 	/*
 	 * For performance critical channels (storage, networking
 	 * etc,), Hyper-V has a mechanism to enhance the throughput
-- 
cgit v1.2.3


From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 4 Mar 2017 18:13:58 -0700
Subject: vmbus: remove hv_event_tasklet_disable/enable

With the recent introduction of per-channel tasklet, we need to update
the way we handle the 3 concurrency issues:

1. hv_process_channel_removal -> percpu_channel_deq vs.
   vmbus_chan_sched -> list_for_each_entry(..., percpu_list);

2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched.

3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event;

The first 2 issues can be handled by Stephen's recent patch
"vmbus: use rcu for per-cpu channel list", and the third issue
can be handled by calling tasklet_disable in vmbus_close_internal here.

We don't need the original hv_event_tasklet_disable/enable since we
now use per-channel tasklet instead of the previous per-CPU tasklet,
and actually we must remove them due to the side effect now:
vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will
start the per-channel callback prematurely, cauing NULL dereferencing
(the channel may haven't been properly configured to run the callback yet).

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c4c7ae91f9d1..970771a5f739 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_event_tasklet_disable(struct vmbus_channel *channel);
-void hv_event_tasklet_enable(struct vmbus_channel *channel);
-
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 void vmbus_setevent(struct vmbus_channel *channel);
-- 
cgit v1.2.3


From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001
From: Gary R Hook <ghook@amd.com>
Date: Fri, 10 Mar 2017 12:28:18 -0600
Subject: crypto: ccp - Assign DMA commands to the channel's CCP

The CCP driver generally uses a round-robin approach when
assigning operations to available CCPs. For the DMA engine,
however, the DMA mappings of the SGs are associated with a
specific CCP. When an IOMMU is enabled, the IOMMU is
programmed based on this specific device.

If the DMA operations are not performed by that specific
CCP then addressing errors and I/O page faults will occur.

Update the CCP driver to allow a specific CCP device to be
requested for an operation and use this in the DMA engine
support.

Cc: <stable@vger.kernel.org> # 4.9.x-
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/ccp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index c71dd8fa5764..c41b8d99dd0e 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -556,7 +556,7 @@ enum ccp_engine {
  * struct ccp_cmd - CCP operation request
  * @entry: list element (ccp driver use only)
  * @work: work element used for callbacks (ccp driver use only)
- * @ccp: CCP device to be run on (ccp driver use only)
+ * @ccp: CCP device to be run on
  * @ret: operation return code (ccp driver use only)
  * @flags: cmd processing flags
  * @engine: CCP operation to perform
-- 
cgit v1.2.3


From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 16 Mar 2017 16:40:21 -0700
Subject: kasan: add a prototype of task_struct to avoid warning

Add a prototype of task_struct to fix below warning on arm64.

  In file included from arch/arm64/kernel/probes/kprobes.c:19:0:
  include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
   static inline void kasan_unpoison_task_stack(struct task_struct *task) {}

As same as other types (kmem_cache, page, and vm_struct) this adds a
prototype of task_struct data structure on top of kasan.h.

[arnd] A related warning was fixed before, but now appears in a
different line in the same file in v4.11-rc2.  The patch from Masami
Hiramatsu still seems appropriate, so let's take his version.

Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove <linux/sched.h> from <linux/kasan.h>")
Link: https://patchwork.kernel.org/patch/9569839/
Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Alexander Potapenko <glider@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 1c823bef4c15..5734480c9590 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -6,6 +6,7 @@
 struct kmem_cache;
 struct page;
 struct vm_struct;
+struct task_struct;
 
 #ifdef CONFIG_KASAN
 
-- 
cgit v1.2.3


From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 16:40:33 -0700
Subject: drivers core: remove assert_held_device_hotplug()

The last caller of assert_held_device_hotplug() is gone, so remove it again.

Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 30c4570e928d..9ef518af5515 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
-- 
cgit v1.2.3


From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 13 Mar 2017 19:29:08 +0200
Subject: net/mlx4_core: Avoid delays during VF driver device shutdown

Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

For such Hypervisors, there is a race condition between the VF's
shutdown method and its internal-error detection/reset thread.

The internal-error detection/reset thread (which runs every 5 seconds) also
detects a disabled comm channel. If the internal-error detection/reset
flow wins the race, we still get delays (while that flow tries repeatedly
to detect comm-channel recovery).

The cited commit fixed the command timeout problem when the
internal-error detection/reset flow loses the race.

This commit avoids the unneeded delays when the internal-error
detection/reset flow wins.

Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reported-by: Simon Xiao <sixiao@microsoft.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7e66e4f62858..1beb1ec2fbdf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -476,6 +476,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_NOWAIT	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
-- 
cgit v1.2.3


From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 20 Mar 2017 11:25:16 +0100
Subject: reset: fix optional reset_control_get stubs to return NULL

When RESET_CONTROLLER is not enabled, the optional reset_control_get
stubs should now also return NULL.

Since it is now valid for reset_control_assert/deassert/reset/status/put
to be called unconditionally, with NULL as an argument for optional
resets, the stubs are not allowed to warn anymore.

Fixes: bb475230b8e5 ("reset: make optional functions really optional")
Reported-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Andrzej Hajda <a.hajda@samsung.com>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Cc: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 86b4ed75359e..96fb139bdd08 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev)
 
 static inline int reset_control_reset(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_assert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_deassert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_status(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline void reset_control_put(struct reset_control *rstc)
 {
-	WARN_ON(1);
 }
 
 static inline int __must_check device_reset(struct device *dev)
@@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get(
 					const char *id, int index, bool shared,
 					bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
 					int index, bool shared, bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 #endif /* CONFIG_RESET_CONTROLLER */
-- 
cgit v1.2.3


From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:14 +0800
Subject: vsock: track pkt owner vsock

So that we can cancel a queued pkt later if necessary.

Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 9638bfeb0d1f..584f9a647ad4 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
 	struct virtio_vsock_hdr	hdr;
 	struct work_struct work;
 	struct list_head list;
+	/* socket refcnt not held, only use for cancellation */
+	struct vsock_sock *vsk;
 	void *buf;
 	u32 len;
 	u32 off;
@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
 
 struct virtio_vsock_pkt_info {
 	u32 remote_cid, remote_port;
+	struct vsock_sock *vsk;
 	struct msghdr *msg;
 	u32 pkt_len;
 	u16 type;
-- 
cgit v1.2.3


From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 18 Mar 2017 17:03:00 -0400
Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS

SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled
while packets are collected on the error queue.
So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags
is not enough to safely assume that the skb contains
OPT_STATS data.

Add a bit in sock_exterr_skb to indicate whether the
skb contains opt_stats data.

Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 9ca23fcfb5d7..6fdfc884fdeb 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -20,6 +20,8 @@ struct sock_exterr_skb {
 	struct sock_extended_err	ee;
 	u16				addr_offset;
 	__be16				port;
+	u8				opt_stats:1,
+					unused:7;
 };
 
 #endif
-- 
cgit v1.2.3


From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001
From: Peter Huewe <PeterHuewe@gmx.de>
Date: Fri, 17 Mar 2017 00:28:56 +0100
Subject: hwmon: Add missing HWMON_T_ALARM

Unfortunately the HWMON_T_ALARM define was missing,
although the associated entry was present in hwmon_temp_attributes.
This is needed to convert drivers to the new interface which use channel
based alarms.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 78d59dba563e..88b673749121 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -88,6 +88,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_CRIT_HYST	BIT(hwmon_temp_crit_hyst)
 #define HWMON_T_EMERGENCY	BIT(hwmon_temp_emergency)
 #define HWMON_T_EMERGENCY_HYST	BIT(hwmon_temp_emergency_hyst)
+#define HWMON_T_ALARM		BIT(hwmon_temp_alarm)
 #define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)
-- 
cgit v1.2.3


From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:16 +0000
Subject: iommu: Disambiguate MSI region types

The introduction of reserved regions has left a couple of rough edges
which we could do with sorting out sooner rather than later. Since we
are not yet addressing the potential dynamic aspect of software-managed
reservations and presenting them at arbitrary fixed addresses, it is
incongruous that we end up displaying hardware vs. software-managed MSI
regions to userspace differently, especially since ARM-based systems may
actually require one or the other, or even potentially both at once,
(which iommu-dma currently has no hope of dealing with at all). Let's
resolve the former user-visible inconsistency ASAP before the ABI has
been baked into a kernel release, in a way that also lays the groundwork
for the latter shortcoming to be addressed by follow-up patches.

For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use
IOMMU_RESV_MSI to describe the hardware type, and document everything a
little bit. Since the x86 MSI remapping hardware falls squarely under
this meaning of IOMMU_RESV_MSI, apply that type to their regions as well,
so that we tell the same story to userspace across all platforms.

Secondly, as the various region types require quite different handling,
and it really makes little sense to ever try combining them, convert the
bitfield-esque #defines to a plain enum in the process before anyone
gets the wrong impression.

Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region")
Reviewed-by: Eric Auger <eric.auger@redhat.com>
CC: Alex Williamson <alex.williamson@redhat.com>
CC: David Woodhouse <dwmw2@infradead.org>
CC: kvm@vger.kernel.org
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6a6de187ddc0..2e4de0deee53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,9 +125,16 @@ enum iommu_attr {
 };
 
 /* These are the possible reserved region types */
-#define IOMMU_RESV_DIRECT	(1 << 0)
-#define IOMMU_RESV_RESERVED	(1 << 1)
-#define IOMMU_RESV_MSI		(1 << 2)
+enum iommu_resv_type {
+	/* Memory regions which must be mapped 1:1 at all times */
+	IOMMU_RESV_DIRECT,
+	/* Arbitrary "never map this or give it to a device" address ranges */
+	IOMMU_RESV_RESERVED,
+	/* Hardware MSI region (untranslated) */
+	IOMMU_RESV_MSI,
+	/* Software-managed MSI translation window */
+	IOMMU_RESV_SW_MSI,
+};
 
 /**
  * struct iommu_resv_region - descriptor for a reserved memory region
@@ -142,7 +149,7 @@ struct iommu_resv_region {
 	phys_addr_t		start;
 	size_t			length;
 	int			prot;
-	int			type;
+	enum iommu_resv_type	type;
 };
 
 #ifdef CONFIG_IOMMU_API
@@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern struct iommu_resv_region *
-iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
+			enum iommu_resv_type type);
 extern int iommu_get_group_resv_regions(struct iommu_group *group,
 					struct list_head *head);
 
-- 
cgit v1.2.3


From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Mar 2017 12:48:18 +0100
Subject: sched/clock, x86/perf: Fix "perf test tsc"

People reported that commit:

  5680d8094ffa ("sched/clock: Provide better clock continuity")

broke "perf test tsc".

That commit added another offset to the reported clock value; so
take that into account when computing the provided offset values.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Tested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/clock.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 4a68c6791207..34fe92ce1ebd 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
 }
 #else
 extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
 extern int sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
+/*
+ * When sched_clock_stable(), __sched_clock_offset provides the offset
+ * between local_clock() and sched_clock().
+ */
+extern u64 __sched_clock_offset;
+
+
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-- 
cgit v1.2.3


From 90db10434b163e46da413d34db8d0e77404cc645 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 23 Mar 2017 18:24:19 +0100
Subject: KVM: kvm_io_bus_unregister_dev() should never fail

No caller currently checks the return value of
kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on
freeing their device. A stale reference will remain in the io_bus,
getting at least used again, when the iobus gets teared down on
kvm_destroy_vm() - leading to use after free errors.

There is nothing the callers could do, except retrying over and over
again.

So let's simply remove the bus altogether, print an error and make
sure no one can access this broken bus again (returning -ENOMEM on any
attempt to access it).

Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU")
Cc: stable@vger.kernel.org # 3.4+
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da..d0250744507a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			      struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			       struct kvm_io_device *dev);
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 					 gpa_t addr);
 
-- 
cgit v1.2.3


From 5ea1320653359dd2ade7ff2ad81e37b790eb1f1f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 3 Mar 2017 11:47:40 -0800
Subject: Input: serio - add fast reconnect option

Devices connected to serio bus are quite slow, and to improve apparent
speed of resume process, serio core resumes (reconnects) its devices
asynchronously, by posting port reconnect requests to a workqueue.
Unfortunately this means that if there is a dependent device of a given
serio port (for example SMBus part of touchpad connected via both PS/2 and
SMBus), we do not have a good way of ensuring resume order.

This change allows drivers to define "fast reconnect" handlers that would
be called in-line during system resume. Drivers need to ensure that these
handlers are truly "fast".

Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/serio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index c733cff44e18..138a5efe863a 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -77,6 +77,7 @@ struct serio_driver {
 	irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int);
 	int  (*connect)(struct serio *, struct serio_driver *drv);
 	int  (*reconnect)(struct serio *);
+	int  (*fast_reconnect)(struct serio *);
 	void (*disconnect)(struct serio *);
 	void (*cleanup)(struct serio *);
 
-- 
cgit v1.2.3


From 07de36b378a58f1d1426829acf0ab7cf86f651f3 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Wed, 22 Mar 2017 17:32:49 +0300
Subject: clockevents: Fix syntax error in clkevt-of macro

The patch fix syntax errors introduced by commit 0c8893c9095d
("clockevents: Add a clkevt-of mechanism like clksrc-of").

Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/clockchips.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 5d3053c34fb3..6d7edc3082f9 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
 
 #ifdef CONFIG_CLKEVT_PROBE
 extern int clockevent_probe(void);
-#els
+#else
 static inline int clockevent_probe(void) { return 0; }
 #endif
 
-- 
cgit v1.2.3


From aa0e26bb786b00eaa897e024769e299470815efe Mon Sep 17 00:00:00 2001
From: David Rivshin <DRivshin@allworx.com>
Date: Wed, 29 Mar 2017 00:14:16 -0700
Subject: Input: matrix_keypad - add option to drive inactive columns

The gpio-matrix-keypad driver normally sets inactive columns as inputs
while scanning. This does not work for all hardware, which may require
the inactive columns to be actively driven in order to overcome any
pull-ups/downs on the columns.

Signed-off-by: David Rivshin <drivshin@allworx.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/matrix_keypad.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index 37b04a0fdea4..6174733a57eb 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -49,6 +49,8 @@ struct matrix_keymap_data {
  * @wakeup: controls whether the device should be set up as wakeup
  *	source
  * @no_autorepeat: disable key autorepeat
+ * @drive_inactive_cols: drive inactive columns during scan, rather than
+ *	making them inputs.
  *
  * This structure represents platform-specific data that use used by
  * matrix_keypad driver to perform proper initialization.
@@ -73,6 +75,7 @@ struct matrix_keypad_platform_data {
 	bool		active_low;
 	bool		wakeup;
 	bool		no_autorepeat;
+	bool		drive_inactive_cols;
 };
 
 int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data,
-- 
cgit v1.2.3


From 597b7305dd8bafdb3aef4957d97128bc90af8e9f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 31 Mar 2017 15:11:47 -0700
Subject: mm: move mm_percpu_wq initialization earlier

Yang Li has reported that drain_all_pages triggers a WARN_ON which means
that this function is called earlier than the mm_percpu_wq is
initialized on arm64 with CMA configured:

  WARNING: CPU: 2 PID: 1 at mm/page_alloc.c:2423 drain_all_pages+0x244/0x25c
  Modules linked in:
  CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-next-20170310-00027-g64dfbc5 #127
  Hardware name: Freescale Layerscape 2088A RDB Board (DT)
  task: ffffffc07c4a6d00 task.stack: ffffffc07c4a8000
  PC is at drain_all_pages+0x244/0x25c
  LR is at start_isolate_page_range+0x14c/0x1f0
  [...]
   drain_all_pages+0x244/0x25c
   start_isolate_page_range+0x14c/0x1f0
   alloc_contig_range+0xec/0x354
   cma_alloc+0x100/0x1fc
   dma_alloc_from_contiguous+0x3c/0x44
   atomic_pool_init+0x7c/0x208
   arm64_dma_init+0x44/0x4c
   do_one_initcall+0x38/0x128
   kernel_init_freeable+0x1a0/0x240
   kernel_init+0x10/0xfc
   ret_from_fork+0x10/0x20

Fix this by moving the whole setup_vmstat which is an initcall right now
to init_mm_internals which will be called right after the WQ subsystem
is initialized.

Link: http://lkml.kernel.org/r/20170315164021.28532-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Yang Li <pku.leo@gmail.com>
Tested-by: Yang Li <pku.leo@gmail.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5f01c88f0800..00a8fa7e366a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -32,6 +32,8 @@ struct user_struct;
 struct writeback_control;
 struct bdi_writeback;
 
+void init_mm_internals(void);
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES	/* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
 
-- 
cgit v1.2.3


From 553af430e7c981e6e8fa5007c5b7b5773acc63dd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 31 Mar 2017 15:11:50 -0700
Subject: mm: rmap: fix huge file mmap accounting in the memcg stats

Huge pages are accounted as single units in the memcg's "file_mapped"
counter.  Account the correct number of base pages, like we do in the
corresponding node counter.

Link: http://lkml.kernel.org/r/20170322005111.3156-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5af377303880..bb7250c45cb8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline void mem_cgroup_update_page_stat(struct page *page,
+					       enum mem_cgroup_stat_index idx,
+					       int nr)
+{
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
-- 
cgit v1.2.3


From b0845ce58379d11dcad4cdb6824a6410de260216 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 31 Mar 2017 15:12:04 -0700
Subject: kasan: report only the first error by default

Disable kasan after the first report.  There are several reasons for
this:

 - Single bug quite often has multiple invalid memory accesses causing
   storm in the dmesg.

 - Write OOB access might corrupt metadata so the next report will print
   bogus alloc/free stacktraces.

 - Reports after the first easily could be not bugs by itself but just
   side effects of the first one.

Given that multiple reports usually only do harm, it makes sense to
disable kasan after the first one.  If user wants to see all the
reports, the boot-time parameter kasan_multi_shot must be used.

[aryabinin@virtuozzo.com: wrote changelog and doc, added missing include]
Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5734480c9590..a5c7046f26b4 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -76,6 +76,9 @@ size_t ksize(const void *);
 static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
 size_t kasan_metadata_size(struct kmem_cache *cache);
 
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
 #else /* CONFIG_KASAN */
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
-- 
cgit v1.2.3


From 00a06c22e9fc0a33ae0b6ca2d47938340dbcd539 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 4 Mar 2017 11:29:34 -0800
Subject: i2c: export i2c_client_type structure

i2c bus has 2 different types of device belonging to the same bus and
bus notifiers use device type to distinguish between adapters and clients.
Previously we only had i2c_adapter_type exported, which made code wanting
to work with i2c_client devices test for type not equal to adapter type.
This unfortunately is not safe if we ever add another type to the bus,
so let's export i2c_client_type as well.

Reviewed-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 6b183521c616..6366989d1001 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -38,6 +38,7 @@
 
 extern struct bus_type i2c_bus_type;
 extern struct device_type i2c_adapter_type;
+extern struct device_type i2c_client_type;
 
 /* --- General options ------------------------------------------------	*/
 
-- 
cgit v1.2.3


From 4124c4eba40256b65acb5016a1edfdd59a1960b6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 11:45:51 -0800
Subject: i2c: allow attaching IRQ resources to i2c_board_info

Simple integer for interrupt number is not expressive enough, as it does
not convey interrupt trigger type that should be used. Let's allow
attaching array of resources to the board info and have i2c core parse
first IRQ resource and set up interrupt trigger as needed.

Reviewed-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/i2c.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 6366989d1001..c5bd8b8daf97 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -304,6 +304,8 @@ static inline int i2c_slave_event(struct i2c_client *client,
  * @of_node: pointer to OpenFirmware device node
  * @fwnode: device node supplied by the platform firmware
  * @properties: additional device properties for the device
+ * @resources: resources associated with the device
+ * @num_resources: number of resources in the @resources array
  * @irq: stored in i2c_client.irq
  *
  * I2C doesn't actually support hardware probing, although controllers and
@@ -326,6 +328,8 @@ struct i2c_board_info {
 	struct device_node *of_node;
 	struct fwnode_handle *fwnode;
 	const struct property_entry *properties;
+	const struct resource *resources;
+	unsigned int	num_resources;
 	int		irq;
 };
 
-- 
cgit v1.2.3


From e6eba3fac9a0eb2018a85505b91740e27c60fdba Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Mon, 3 Apr 2017 11:53:41 -0700
Subject: Input: cros_ec_keyb - add an EC event for sysrq

Some form factors (detachables/tablets) may not have a keyboard and
thus user may have to resort to using a defined EC UI to send sysrq(s)
to the kernel in order to collect crash info etc. This UI typically
is in the form of user pressing volume / power buttons in some specific
sequence and for some specific time. Add a new EC event that allows EC
to communicate the sysrq to the AP.

(We're skipping event number 5 because it has been reserved for
something else)

Signed-off-by: Rajat Jain <rajatja@google.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/mfd/cros_ec_commands.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index da1c188562bc..3ceebf6b9afb 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2040,6 +2040,9 @@ enum ec_mkbp_event {
 	/* The state of the switches have changed. */
 	EC_MKBP_EVENT_SWITCH = 4,
 
+	/* EC sent a sysrq command */
+	EC_MKBP_EVENT_SYSRQ = 6,
+
 	/* Number of MKBP events */
 	EC_MKBP_EVENT_COUNT,
 };
@@ -2052,6 +2055,7 @@ union ec_response_get_next_data {
 
 	uint32_t   buttons;
 	uint32_t   switches;
+	uint32_t   sysrq;
 } __packed;
 
 struct ec_response_get_next_event {
-- 
cgit v1.2.3


From d99caa472c0a28dc95dd9b98c30ee46f9755181f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 19 Feb 2017 17:21:56 -0800
Subject: Input: eeti_ts - switch to gpiod API

gpiod API allows standard way of specifying GPIO polarity and takes it into
account when reading or setting GPIO state. It also allows us to switch to
common way of obtaining GPIO descriptor and away form legacy platform data.

Reviewed-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/eeti_ts.h | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 include/linux/input/eeti_ts.h

(limited to 'include/linux')

diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h
deleted file mode 100644
index 16625d799b6f..000000000000
--- a/include/linux/input/eeti_ts.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef LINUX_INPUT_EETI_TS_H
-#define LINUX_INPUT_EETI_TS_H
-
-struct eeti_ts_platform_data {
-	int irq_gpio;
-	unsigned int irq_active_high;
-};
-
-#endif /* LINUX_INPUT_EETI_TS_H */
-
-- 
cgit v1.2.3