From c4be8c68e6900b1811bc64f74cb13d5032a389ce Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 31 May 2017 15:58:21 +0800
Subject: clk: sunxi-ng: h3: Export PLL_PERIPH0 clock for the PRCM

The PRCM takes PLL_PERIPH0 as one of its parents for the AR100 clock.
As such we need to be able to describe this relationship in the device
tree.

Export the PLL_PERIPH0 clock so we can reference it in the PRCM node.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 include/dt-bindings/clock/sun8i-h3-ccu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index c2afc41d6964..e139fe5c62ec 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
 #define _DT_BINDINGS_CLK_SUN8I_H3_H_
 
+#define CLK_PLL_PERIPH0		9
+
 #define CLK_CPUX		14
 
 #define CLK_BUS_CE		20
-- 
cgit v1.2.3


From d85da227c3ae43d9ca513d60f244213cb4e55485 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Wed, 31 May 2017 15:58:23 +0800
Subject: clk: sunxi-ng: a64: Export PLL_PERIPH0 clock for the PRCM

The PRCM takes PLL_PERIPH0 as one of its parents for the AR100 clock.
As such we need to be able to describe this relationship in the device
tree.

Export the PLL_PERIPH0 clock so we can reference it in the PRCM node.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 include/dt-bindings/clock/sun50i-a64-ccu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h
index 370c0a0473fc..d66432c6e675 100644
--- a/include/dt-bindings/clock/sun50i-a64-ccu.h
+++ b/include/dt-bindings/clock/sun50i-a64-ccu.h
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_
 #define _DT_BINDINGS_CLK_SUN50I_A64_H_
 
+#define CLK_PLL_PERIPH0		11
+
 #define CLK_BUS_MIPI_DSI	28
 #define CLK_BUS_CE		29
 #define CLK_BUS_DMA		30
-- 
cgit v1.2.3


From e918d79a5d0a1b431e2cac0e6e6ac9452fd9ab32 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 6 Jun 2017 13:30:32 -0700
Subject: drm/i915/cnl: Add Cannonlake PCI IDs for U-skus.

Platform enabling and its power-on are organized in different
skus (U x Y x S x H, etc). So instead of organizing it in
GT1 x GT2 x GT3 let's also use the platform sku.

This is also the new Spec style what makes the review much
more easy and straightforward.

v2: Really include the PCI IDs to the picidlist[];
v3: Remove PCI IDs not present in spec.
v4: Rebase.

Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Clinton Taylor <clinton.a.taylor@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-3-git-send-email-rodrigo.vivi@intel.com
---
 include/drm/i915_pciids.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 27e0dbaa6c0e..7f1bb3b0ce5b 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -334,4 +334,14 @@
 	INTEL_KBL_GT3_IDS(info), \
 	INTEL_KBL_GT4_IDS(info)
 
+/* CNL U 2+2 */
+#define INTEL_CNL_U_GT2_IDS(info) \
+	INTEL_VGA_DEVICE(0x5A52, info), \
+	INTEL_VGA_DEVICE(0x5A5A, info), \
+	INTEL_VGA_DEVICE(0x5A42, info), \
+	INTEL_VGA_DEVICE(0x5A4A, info)
+
+#define INTEL_CNL_IDS(info) \
+	INTEL_CNL_U_GT2_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From 95578277cbdb60e3c68cb92c843cafc1f77c4f55 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 6 Jun 2017 13:30:33 -0700
Subject: drm/i915/cnl: Add Cannonlake PCI IDs for Y-skus.

By the Spec all CNL Y skus are 2+2, i.e. GT2.

v2: Really include the PCI IDs to the picidlist[];

Reviewed-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496781040-20888-4-git-send-email-rodrigo.vivi@intel.com
---
 include/drm/i915_pciids.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 7f1bb3b0ce5b..7d2696a6588e 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -341,7 +341,17 @@
 	INTEL_VGA_DEVICE(0x5A42, info), \
 	INTEL_VGA_DEVICE(0x5A4A, info)
 
+/* CNL Y 2+2 */
+#define INTEL_CNL_Y_GT2_IDS(info) \
+	INTEL_VGA_DEVICE(0x5A51, info), \
+	INTEL_VGA_DEVICE(0x5A59, info), \
+	INTEL_VGA_DEVICE(0x5A41, info), \
+	INTEL_VGA_DEVICE(0x5A49, info), \
+	INTEL_VGA_DEVICE(0x5A71, info), \
+	INTEL_VGA_DEVICE(0x5A79, info)
+
 #define INTEL_CNL_IDS(info) \
-	INTEL_CNL_U_GT2_IDS(info)
+	INTEL_CNL_U_GT2_IDS(info), \
+	INTEL_CNL_Y_GT2_IDS(info)
 
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From b056f8f3d6b900e8afd19f312719160346d263b4 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 8 Jun 2017 16:41:05 -0700
Subject: drm/i915/cfl: Add Coffee Lake PCI IDs for S Skus.

Add PCI Ids for S Sku following the BSpec.

v2: Remove the unused INTEL_CFL_IDS.(Rodrigo)
v3: Add missing IDs(Rodrigo)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-1-git-send-email-anusha.srivatsa@intel.com
---
 include/drm/i915_pciids.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 7d2696a6588e..4858debd4b58 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -334,6 +334,14 @@
 	INTEL_KBL_GT3_IDS(info), \
 	INTEL_KBL_GT4_IDS(info)
 
+/* CFL S */
+#define INTEL_CFL_S_IDS(info) \
+	INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \
+	INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \
+	INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \
+	INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \
+	INTEL_VGA_DEVICE(0x3E96, info)  /* SRV GT2 */
+
 /* CNL U 2+2 */
 #define INTEL_CNL_U_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x5A52, info), \
-- 
cgit v1.2.3


From ccfd13215fd25a0e8c28221f3acc0dcaec11cd15 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 8 Jun 2017 16:41:06 -0700
Subject: drm/i915/cfl: Add Coffee Lake PCI IDs for H Sku.

Add PCI Ids for H Sku by following the BSpec.

v2: Remove unused INTEL_CFL_IDS.(Rodrigo).
v3: Add missing IDs(Rodrigo)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-2-git-send-email-anusha.srivatsa@intel.com
---
 include/drm/i915_pciids.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 4858debd4b58..8f25316013ba 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -342,6 +342,11 @@
 	INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \
 	INTEL_VGA_DEVICE(0x3E96, info)  /* SRV GT2 */
 
+/* CFL H */
+#define INTEL_CFL_H_IDS(info) \
+	INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
+	INTEL_VGA_DEVICE(0x3E94, info)  /* Halo GT2 */
+
 /* CNL U 2+2 */
 #define INTEL_CNL_U_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x5A52, info), \
-- 
cgit v1.2.3


From d29fe702c9cb682df99146d24d06e5455f043101 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Thu, 8 Jun 2017 16:41:07 -0700
Subject: drm/i915/cfl: Add Coffee Lake PCI IDs for U Sku.

Add PCI Ids for U Skus of Coffeelake.

v2: Use intel_coffeelake_gt3_info, in accordance to-
Rodrigo's patch:

v3: rebased

v3: Remove unused INTEL_CFL_IDS(Rodrigo).

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1496965267-21725-3-git-send-email-anusha.srivatsa@intel.com
---
 include/drm/i915_pciids.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 8f25316013ba..34c8f5600ce0 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -347,6 +347,13 @@
 	INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \
 	INTEL_VGA_DEVICE(0x3E94, info)  /* Halo GT2 */
 
+/* CFL U */
+#define INTEL_CFL_U_IDS(info) \
+	INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x3EA5, info)  /* ULT GT3 */
+
 /* CNL U 2+2 */
 #define INTEL_CNL_U_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x5A52, info), \
-- 
cgit v1.2.3


From 68dd02d19c811ca8ea60220a9d73e13b4bdad73a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 14 Jun 2017 09:28:11 +0200
Subject: dev_ioctl: copy only the smaller struct iwreq for wext

Unfortunately, struct iwreq isn't a proper subset of struct ifreq,
but is still handled by the same code path. Robert reported that
then applications may (randomly) fault if the struct iwreq they
pass happens to land within 8 bytes of the end of a mapping (the
struct is only 32 bytes, vs. struct ifreq's 40 bytes).

To fix this, pull out the code handling wireless extension ioctls
and copy only the smaller structure in this case.

This bug goes back a long time, I tracked that it was introduced
into mainline in 2.1.15, over 20 years ago!

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=195869

Reported-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/wext.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/wext.h b/include/net/wext.h
index 345911965dbb..454ff763eeba 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -6,7 +6,7 @@
 struct net;
 
 #ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 		      void __user *arg);
 int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 			     unsigned long arg);
@@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 struct iw_statistics *get_wireless_stats(struct net_device *dev);
 int call_commit_handler(struct net_device *dev);
 #else
-static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 				    void __user *arg)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From 7fed555c02f754af44f8963890b5ef8e30e97391 Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:22:59 +0100
Subject: drm/i915: expose _SLICE_MASK GETPARM

Enables userspace to determine the maximum number of slices that can
be enabled on the device and also know what specific slices can be
enabled. This information is required, for example, to be able to
analyse some OA counter reports where the counter configuration
depends on the HW slice configuration.

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 include/uapi/drm/i915_drm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f24a80d2d42e..25695c3d9a76 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -418,6 +418,9 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
 
+/* Query the mask of slices available for this system */
+#define I915_PARAM_SLICE_MASK		 46
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
-- 
cgit v1.2.3


From f532023381df49ac00cb2d1e70df607cf534720d Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:23:00 +0100
Subject: drm/i915: expose _SUBSLICE_MASK GETPARM

Assuming a uniform mask across all slices, this enables userspace to
determine the specific sub slices can be enabled. This information is
required, for example, to be able to analyse some OA counter reports
where the counter configuration depends on the HW sub slice
configuration.

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 include/uapi/drm/i915_drm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 25695c3d9a76..464547d08173 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -421,6 +421,11 @@ typedef struct drm_i915_irq_wait {
 /* Query the mask of slices available for this system */
 #define I915_PARAM_SLICE_MASK		 46
 
+/* Assuming it's uniform for each slice, this queries the mask of subslices
+ * per-slice for this system.
+ */
+#define I915_PARAM_SUBSLICE_MASK	 47
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
-- 
cgit v1.2.3


From 19f81df2859eb10e92d68991cefa39f826dea013 Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:23:03 +0100
Subject: drm/i915/perf: Add OA unit support for Gen 8+

Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all
share (more-or-less) the same OA unit design.

Of particular note in comparison to Haswell: some OA unit HW config
state has become per-context state and as a consequence it is somewhat
more complicated to manage synchronous state changes from the cpu while
there's no guarantee of what context (if any) is currently actively
running on the gpu.

The periodic sampling frequency which can be particularly useful for
system-wide analysis (as opposed to command stream synchronised
MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to
have become per-context save and restored (while the OABUFFER
destination is still a shared, system-wide resource).

This support for gen8+ takes care to consider a number of timing
challenges involved in synchronously updating per-context state
primarily by programming all config state from the cpu and updating all
current and saved contexts synchronously while the OA unit is still
disabled.

The driver intentionally avoids depending on command streamer
programming to update OA state considering the lack of synchronization
between the automatic loading of OACTXCONTROL state (that includes the
periodic sampling state and enable state) on context restore and the
parsing of any general purpose BB the driver can control. I.e. this
implementation is careful to avoid the possibility of a context restore
temporarily enabling any out-of-date periodic sampling state. In
addition to the risk of transiently-out-of-date state being loaded
automatically; there are also internal HW latencies involved in the
loading of MUX configurations which would be difficult to account for
from the command streamer (and we only want to enable the unit when once
the MUX configuration is complete).

Since the Gen8+ OA unit design no longer supports clock gating the unit
off for a single given context (which effectively stopped any progress
of counters while any other context was running) and instead supports
tagging OA reports with a context ID for filtering on the CPU, it means
we can no longer hide the system-wide progress of counters from a
non-privileged application only interested in metrics for its own
context. Although we could theoretically try and subtract the progress
of other contexts before forwarding reports via read() we aren't in a
position to filter reports captured via MI_REPORT_PERF_COUNT commands.
As a result, for Gen8+, we always require the
dev.i915.perf_stream_paranoid to be unset for any access to OA metrics
if not root.

v5: Drain submitted requests when enabling metric set to ensure no
    lite-restore erases the context image we just updated (Lionel)

v6: In addition to drain, switch to kernel context & update all
    context in place (Chris)

v7: Add missing mutex_unlock() if switching to kernel context fails
    (Matthew)

v8: Simplify OA period/flex-eu-counters programming by using the
    batchbuffer instead of modifying ctx-image (Lionel)

v9: Back to updating the context image (due to erroneous testing,
    batchbuffer programming the OA unit doesn't actually work)
    (Lionel)
    Pin context before updating context image (Chris)
    Drop MMIO programming now that we switch to a kernel context with
    right values in initial context image (Chris)

v10: Just pin_map the contexts we want to modify or let the
     configuration happen on first use (Chris)

v11: Update kernel context OA config through the batchbuffer rather
     than on the fly ctx-image update (Lionel)

v12: Rework OA context registers update again by swithing away from
     user contexts and reconfiguring the kernel context through the
     batchbuffer and updating all the other contexts' context image.
     Also take care to lock slice/subslice configuration when OA is
     on. (Lionel)

v13: Request rpcs updates on all engine when updating the OA config
     (Lionel)

v14: Drop any kind of rpcs management now that we monitor sseu
     configuration changes in a later patch (Lionel)
     Remove usleep after programming the NOA configs on Gen8+, this
     doesn't seem to be needed (Lionel)

v15: Respect coding style for block comments (Chris)

v16: Add missing i915_add_request() in case we fail to emit OA
     configuration (Matthew)

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> \o/
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 include/uapi/drm/i915_drm.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 464547d08173..15bc9f78ba4d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1316,13 +1316,18 @@ struct drm_i915_gem_context_param {
 };
 
 enum drm_i915_oa_format {
-	I915_OA_FORMAT_A13 = 1,
-	I915_OA_FORMAT_A29,
-	I915_OA_FORMAT_A13_B8_C8,
-	I915_OA_FORMAT_B4_C8,
-	I915_OA_FORMAT_A45_B8_C8,
-	I915_OA_FORMAT_B4_C8_A16,
-	I915_OA_FORMAT_C4_B8,
+	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
+	I915_OA_FORMAT_A29,	    /* HSW only */
+	I915_OA_FORMAT_A13_B8_C8,   /* HSW only */
+	I915_OA_FORMAT_B4_C8,	    /* HSW only */
+	I915_OA_FORMAT_A45_B8_C8,   /* HSW only */
+	I915_OA_FORMAT_B4_C8_A16,   /* HSW only */
+	I915_OA_FORMAT_C4_B8,	    /* HSW+ */
+
+	/* Gen8+ */
+	I915_OA_FORMAT_A12,
+	I915_OA_FORMAT_A12_B8_C8,
+	I915_OA_FORMAT_A32u40_A4u32_B8_C8,
 
 	I915_OA_FORMAT_MAX	    /* non-ABI */
 };
-- 
cgit v1.2.3


From 167b606aa262270ab6aeb5700adca6b1f33da26a Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 8 May 2017 14:34:59 -0600
Subject: drm/msm: Remove DRM_MSM_NUM_IOCTLS

The ioctl array is sparsely populated but the compiler will make sure
that it is sufficiently sized for all the values that we have so we
can safely use ARRAY_SIZE() instead of having a constantly changing
#define in the uapi header.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a4a189a240d7..a9985fe6efcd 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -261,7 +261,6 @@ struct drm_msm_gem_madvise {
 #define DRM_MSM_GEM_SUBMIT             0x06
 #define DRM_MSM_WAIT_FENCE             0x07
 #define DRM_MSM_GEM_MADVISE            0x08
-#define DRM_MSM_NUM_IOCTLS             0x09
 
 #define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
 #define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
-- 
cgit v1.2.3


From 49fd08baa36ac10b13ea7b23fc6bbee8b4a6fcfe Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 8 May 2017 14:35:01 -0600
Subject: drm/msm: Add hint to DRM_IOCTL_MSM_GEM_INFO to return an object IOVA

Modify the 'pad' member of struct drm_msm_gem_info to 'flags'. If the
user sets 'flags' to non-zero it means that they want a IOVA for the
GEM object instead of a mmap() offset. Return the iova in the 'offset'
member.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[robclark: s/hint/flags in commit msg]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a9985fe6efcd..26c54f6d595d 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -104,10 +104,14 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
+#define MSM_INFO_IOVA	0x01
+
+#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 pad;
-	__u64 offset;         /* out, offset to pass to mmap() */
+	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
+	__u64 offset;         /* out, mmap() offset or iova */
 };
 
 #define MSM_PREP_READ        0x01
-- 
cgit v1.2.3


From 1a71cf2fa646799d4397a49b223549d8617fece0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 Jun 2017 15:05:23 +0100
Subject: drm/i915: Allow execbuffer to use the first object as the batch

Currently, the last object in the execlist is the always the batch.
However, when building the batch buffer we often know the batch object
first and if we can use the first slot in the execlist we can emit
relocation instructions relative to it immediately and avoid a separate
pass to adjust the relocations to point to the last execlist slot.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 include/uapi/drm/i915_drm.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 15bc9f78ba4d..7ccbd6a2bbe0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -418,7 +418,6 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
 
-/* Query the mask of slices available for this system */
 #define I915_PARAM_SLICE_MASK		 46
 
 /* Assuming it's uniform for each slice, this queries the mask of subslices
@@ -426,6 +425,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_SUBSLICE_MASK	 47
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer
+ * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST.
+ */
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -912,7 +917,17 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_OUT		(1<<17)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
+/*
+ * Traditionally the execbuf ioctl has only considered the final element in
+ * the execobject[] to be the executable batch. Often though, the client
+ * will known the batch object prior to construction and being able to place
+ * it into the execobject[] array first can simplify the relocation tracking.
+ * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the
+ * execobject[] as the * batch instead (the default is to use the last
+ * element).
+ */
+#define I915_EXEC_BATCH_FIRST		(1<<18)
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
cgit v1.2.3


From ceea5e3771ed2378668455fa21861bead7504df5 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 8 Jun 2017 16:44:20 -0700
Subject: time: Fix clock->read(clock) race around clocksource changes

In tests, which excercise switching of clocksources, a NULL
pointer dereference can be observed on AMR64 platforms in the
clocksource read() function:

u64 clocksource_mmio_readl_down(struct clocksource *c)
{
	return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
}

This is called from the core timekeeping code via:

	cycle_now = tkr->read(tkr->clock);

tkr->read is the cached tkr->clock->read() function pointer.
When the clocksource is changed then tkr->clock and tkr->read
are updated sequentially. The code above results in a sequential
load operation of tkr->read and tkr->clock as well.

If the store to tkr->clock hits between the loads of tkr->read
and tkr->clock, then the old read() function is called with the
new clock pointer. As a consequence the read() function
dereferences a different data structure and the resulting 'reg'
pointer can point anywhere including NULL.

This problem was introduced when the timekeeping code was
switched over to use struct tk_read_base. Before that, it was
theoretically possible as well when the compiler decided to
reload clock in the code sequence:

     now = tk->clock->read(tk->clock);

Add a helper function which avoids the issue by reading
tk_read_base->clock once into a local variable clk and then issue
the read function via clk->read(clk). This guarantees that the
read() function always gets the proper clocksource pointer handed
in.

Since there is now no use for the tkr.read pointer, this patch
also removes it, and to address stopping the fast timekeeper
during suspend/resume, it introduces a dummy clocksource to use
rather then just a dummy read function.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: stable <stable@vger.kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Daniel Mentz <danielmentz@google.com>
Link: http://lkml.kernel.org/r/1496965462-20003-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeper_internal.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f4532188c..e9834ada4d0c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
-- 
cgit v1.2.3


From 3d88d56c5873f6eebe23e05c3da701960146b801 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 8 Jun 2017 16:44:21 -0700
Subject: time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting

Due to how the MONOTONIC_RAW accumulation logic was handled,
there is the potential for a 1ns discontinuity when we do
accumulations. This small discontinuity has for the most part
gone un-noticed, but since ARM64 enabled CLOCK_MONOTONIC_RAW
in their vDSO clock_gettime implementation, we've seen failures
with the inconsistency-check test in kselftest.

This patch addresses the issue by using the same sub-ns
accumulation handling that CLOCK_MONOTONIC uses, which avoids
the issue for in-kernel users.

Since the ARM64 vDSO implementation has its own clock_gettime
calculation logic, this patch reduces the frequency of errors,
but failures are still seen. The ARM64 vDSO will need to be
updated to include the sub-nanosecond xtime_nsec values in its
calculation for this issue to be completely fixed.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Tested-by: Daniel Mentz <danielmentz@google.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "stable #4 . 8+" <stable@vger.kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Link: http://lkml.kernel.org/r/1496965462-20003-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeper_internal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e9834ada4d0c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -57,7 +57,7 @@ struct tk_read_base {
  *			interval.
  * @xtime_remainder:	Shifted nano seconds left over when rounding
  *			@cycle_interval
- * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:		Difference between accumulated time and NTP time in ntp
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
@@ -99,7 +99,7 @@ struct timekeeper {
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
-	u32			raw_interval;
+	u64			raw_interval;
 	/* The ntp_tick_length() value currently being used.
 	 * This cached copy ensures we consistently apply the tick
 	 * length for an entire tick, as ntp_tick_length may change
-- 
cgit v1.2.3


From 8e8320c9315c47a6a090188720ccff32a6a6ba18 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 20 Jun 2017 17:56:13 -0600
Subject: blk-mq: fix performance regression with shared tags

If we have shared tags enabled, then every IO completion will trigger
a full loop of every queue belonging to a tag set, and every hardware
queue for each of those queues, even if nothing needs to be done.
This causes a massive performance regression if you have a lot of
shared devices.

Instead of doing this huge full scan on every IO, add an atomic
counter to the main queue that tracks how many hardware queues have
been marked as needing a restart. With that, we can avoid looking for
restartable queues, if we don't have to.

Max reports that this restores performance. Before this patch, 4K
IOPS was limited to 22-23K IOPS. With the patch, we are running at
950-970K IOPS.

Fixes: 6d8c6c0f97ad ("blk-mq: Restart a single queue if tag sets are shared")
Reported-by: Max Gurtovoy <maxg@mellanox.com>
Tested-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Tested-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b74a3edcb3da..1ddd36bd2173 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	atomic_t		shared_hctx_restart;
+
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
-- 
cgit v1.2.3


From e4330d8bf669139a983255d1801733b64c2ae841 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 19 Jun 2017 15:53:01 +0300
Subject: ACPI / scan: Fix enumeration for special SPI and I2C devices

Commit f406270bf73d ("ACPI / scan: Set the visited flag for all
enumerated devices") caused that two group of special SPI or I2C
devices do not enumerate. SPI and I2C devices are expected to be
enumerated by the SPI and I2C subsystems but change caused that
acpi_bus_attach() marks those devices with acpi_device_set_enumerated().

First group of devices are matched using Device Tree compatible property
with special _HID "PRP0001". Those devices have matched scan handler,
acpi_scan_attach_handler() retuns 1 and acpi_bus_attach() marks them
with acpi_device_set_enumerated().

Second group of devices without valid _HID such as "LNXVIDEO" have
device->pnp.type.platform_id set to zero and change again marks them
with acpi_device_set_enumerated().

Fix this by flagging the SPI and I2C devices during struct acpi_device
object initialization time and let the code in acpi_bus_attach() to go
through the device_attach() and acpi_default_enumeration() path for all
SPI and I2C devices.

Fixes: f406270bf73d (ACPI / scan: Set the visited flag for all enumerated devices)
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: 4.11+ <stable@vger.kernel.org> # 4.11+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_bus.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 197f3fffc9a7..408c7820e200 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,8 @@ struct acpi_device_flags {
 	u32 of_compatible_ok:1;
 	u32 coherent_dma:1;
 	u32 cca_seen:1;
-	u32 reserved:20;
+	u32 spi_i2c_slave:1;
+	u32 reserved:19;
 };
 
 /* File System */
-- 
cgit v1.2.3


From 3b7b314053d021601940c50b07f5f1423ae67e21 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 23 Jun 2017 15:08:52 -0700
Subject: slub: make sysfs file removal asynchronous

Commit bf5eb3de3847 ("slub: separate out sysfs_slab_release() from
sysfs_slab_remove()") made slub sysfs file removals synchronous to
kmem_cache shutdown.

Unfortunately, this created a possible ABBA deadlock between slab_mutex
and sysfs draining mechanism triggering the following lockdep warning.

  ======================================================
  [ INFO: possible circular locking dependency detected ]
  4.10.0-test+ #48 Not tainted
  -------------------------------------------------------
  rmmod/1211 is trying to acquire lock:
   (s_active#120){++++.+}, at: [<ffffffff81308073>] kernfs_remove+0x23/0x40

  but task is already holding lock:
   (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (slab_mutex){+.+.+.}:
	 lock_acquire+0xf6/0x1f0
	 __mutex_lock+0x75/0x950
	 mutex_lock_nested+0x1b/0x20
	 slab_attr_store+0x75/0xd0
	 sysfs_kf_write+0x45/0x60
	 kernfs_fop_write+0x13c/0x1c0
	 __vfs_write+0x28/0x120
	 vfs_write+0xc8/0x1e0
	 SyS_write+0x49/0xa0
	 entry_SYSCALL_64_fastpath+0x1f/0xc2

  -> #0 (s_active#120){++++.+}:
	 __lock_acquire+0x10ed/0x1260
	 lock_acquire+0xf6/0x1f0
	 __kernfs_remove+0x254/0x320
	 kernfs_remove+0x23/0x40
	 sysfs_remove_dir+0x51/0x80
	 kobject_del+0x18/0x50
	 __kmem_cache_shutdown+0x3e6/0x460
	 kmem_cache_destroy+0x1fb/0x2d0
	 kvm_exit+0x2d/0x80 [kvm]
	 vmx_exit+0x19/0xa1b [kvm_intel]
	 SyS_delete_module+0x198/0x1f0
	 entry_SYSCALL_64_fastpath+0x1f/0xc2

  other info that might help us debug this:

   Possible unsafe locking scenario:

	 CPU0                    CPU1
	 ----                    ----
    lock(slab_mutex);
				 lock(s_active#120);
				 lock(slab_mutex);
    lock(s_active#120);

   *** DEADLOCK ***

  2 locks held by rmmod/1211:
   #0:  (cpu_hotplug.dep_map){++++++}, at: [<ffffffff810a7877>] get_online_cpus+0x37/0x80
   #1:  (slab_mutex){+.+.+.}, at: [<ffffffff8120f691>] kmem_cache_destroy+0x41/0x2d0

  stack backtrace:
  CPU: 3 PID: 1211 Comm: rmmod Not tainted 4.10.0-test+ #48
  Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
  Call Trace:
   print_circular_bug+0x1be/0x210
   __lock_acquire+0x10ed/0x1260
   lock_acquire+0xf6/0x1f0
   __kernfs_remove+0x254/0x320
   kernfs_remove+0x23/0x40
   sysfs_remove_dir+0x51/0x80
   kobject_del+0x18/0x50
   __kmem_cache_shutdown+0x3e6/0x460
   kmem_cache_destroy+0x1fb/0x2d0
   kvm_exit+0x2d/0x80 [kvm]
   vmx_exit+0x19/0xa1b [kvm_intel]
   SyS_delete_module+0x198/0x1f0
   ? SyS_delete_module+0x5/0x1f0
   entry_SYSCALL_64_fastpath+0x1f/0xc2

It'd be the cleanest to deal with the issue by removing sysfs files
without holding slab_mutex before the rest of shutdown; however, given
the current code structure, it is pretty difficult to do so.

This patch punts sysfs file removal to a work item.  Before commit
bf5eb3de3847, the removal was punted to a RCU delayed work item which is
executed after release.  Now, we're punting to a different work item on
shutdown which still maintains the goal removing the sysfs files earlier
when destroying kmem_caches.

Link: http://lkml.kernel.org/r/20170620204512.GI21326@htj.duckdns.org
Fixes: bf5eb3de3847 ("slub: separate out sysfs_slab_release() from sysfs_slab_remove()")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..93315d6b21a8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
 	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
+	struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
-- 
cgit v1.2.3