40 files changed, 1120 insertions, 280 deletions
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index f4babed2f557..27671489477d 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -386,11 +386,11 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 			return err;
 		}
 
-		if (err < size)
-			return -EPROTO;
 
 		switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) {
 		case DP_AUX_NATIVE_REPLY_ACK:
+			if (err < size)
+				return -EPROTO;
 			return err;
 
 		case DP_AUX_NATIVE_REPLY_NACK:
@@ -599,8 +599,6 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 			return err;
 		}
 
-		if (err < msg->size)
-			return -EPROTO;
 
 		switch (msg->reply & DP_AUX_NATIVE_REPLY_MASK) {
 		case DP_AUX_NATIVE_REPLY_ACK:
@@ -639,6 +637,8 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 			 * Both native ACK and I2C ACK replies received. We
 			 * can assume the transfer was successful.
 			 */
+			if (err < msg->size)
+				return -EPROTO;
 			return 0;
 
 		case DP_AUX_I2C_REPLY_NACK:
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 9795c0636da2..04d3fd3658f3 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1159,6 +1159,7 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f
 {
 	struct drm_cmdline_mode *cmdline_mode;
 	struct drm_display_mode *mode = NULL;
+	bool prefer_non_interlace;
 
 	cmdline_mode = &fb_helper_conn->cmdline_mode;
 	if (cmdline_mode->specified == false)
@@ -1170,6 +1171,8 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f
 	if (cmdline_mode->rb || cmdline_mode->margins)
 		goto create_mode;
 
+	prefer_non_interlace = !cmdline_mode->interlace;
+ again:
 	list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) {
 		/* check width/height */
 		if (mode->hdisplay != cmdline_mode->xres ||
@@ -1184,10 +1187,18 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f
 		if (cmdline_mode->interlace) {
 			if (!(mode->flags & DRM_MODE_FLAG_INTERLACE))
 				continue;
+		} else if (prefer_non_interlace) {
+			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+				continue;
 		}
 		return mode;
 	}
 
+	if (prefer_non_interlace) {
+		prefer_non_interlace = false;
+		goto again;
+	}
+
 create_mode:
 	mode = drm_mode_create_from_cmdline_mode(fb_helper_conn->connector->dev,
 						 cmdline_mode);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index 2c07cb9550ef..05c97c5350a1 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -234,8 +234,17 @@ static int drm_gem_cma_mmap_obj(struct drm_gem_cma_object *cma_obj,
 {
 	int ret;
 
-	ret = remap_pfn_range(vma, vma->vm_start, cma_obj->paddr >> PAGE_SHIFT,
-			vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	/*
+	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
+
+	ret = dma_mmap_writecombine(cma_obj->base.dev->dev, vma,
+				    cma_obj->vaddr, cma_obj->paddr,
+				    vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -273,9 +282,9 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj, struct seq_file *m
 
 	off = drm_vma_node_start(&obj->vma_node);
 
-	seq_printf(m, "%2d (%2d) %08llx %08Zx %p %d",
+	seq_printf(m, "%2d (%2d) %08llx %pad %p %d",
 			obj->name, obj->refcount.refcount.counter,
-			off, cma_obj->paddr, cma_obj->vaddr, obj->size);
+			off, &cma_obj->paddr, cma_obj->vaddr, obj->size);
 
 	seq_printf(m, "\n");
 }
@@ -323,7 +332,7 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev, size_t size,
 	cma_obj->paddr = sg_dma_address(sgt->sgl);
 	cma_obj->sgt = sgt;
 
-	DRM_DEBUG_PRIME("dma_addr = 0x%x, size = %zu\n", cma_obj->paddr, size);
+	DRM_DEBUG_PRIME("dma_addr = %pad, size = %zu\n", &cma_obj->paddr, size);
 
 	return &cma_obj->base;
 }
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a2d45b748f86..71e2d3fcd6ee 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -82,6 +82,10 @@
  * this to implement guard pages between incompatible caching domains in the
  * graphics TT.
  *
+ * Two behaviors are supported for searching and allocating: bottom-up and top-down.
+ * The default is bottom-up. Top-down allocation can be used if the memory area
+ * has different restrictions, or just to reduce fragmentation.
+ *
  * Finally iteration helpers to walk all nodes and all holes are provided as are
  * some basic allocator dumpers for debugging.
  */
@@ -102,7 +106,8 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
 				 unsigned long size, unsigned alignment,
-				 unsigned long color)
+				 unsigned long color,
+				 enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -115,12 +120,22 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
+	BUG_ON(adj_start < hole_start);
+	BUG_ON(adj_end > hole_end);
+
 	if (adj_start == hole_start) {
 		hole_node->hole_follows = 0;
 		list_del(&hole_node->hole_stack);
@@ -205,7 +220,8 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
  * @size: size of the allocation
  * @alignment: alignment of the allocation
  * @color: opaque tag value to use for this node
- * @flags: flags to fine-tune the allocation
+ * @sflags: flags to fine-tune the allocation search
+ * @aflags: flags to fine-tune the allocation behavior
  *
  * The preallocated node must be cleared to 0.
  *
@@ -215,16 +231,17 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
 			       unsigned long size, unsigned alignment,
 			       unsigned long color,
-			       enum drm_mm_search_flags flags)
+			       enum drm_mm_search_flags sflags,
+			       enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_generic(mm, size, alignment,
-					       color, flags);
+					       color, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
-	drm_mm_insert_helper(hole_node, node, size, alignment, color);
+	drm_mm_insert_helper(hole_node, node, size, alignment, color, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_generic);
@@ -233,7 +250,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 				       struct drm_mm_node *node,
 				       unsigned long size, unsigned alignment,
 				       unsigned long color,
-				       unsigned long start, unsigned long end)
+				       unsigned long start, unsigned long end,
+				       enum drm_mm_allocator_flags flags)
 {
 	struct drm_mm *mm = hole_node->mm;
 	unsigned long hole_start = drm_mm_hole_node_start(hole_node);
@@ -248,13 +266,20 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 	if (adj_end > end)
 		adj_end = end;
 
+	if (flags & DRM_MM_CREATE_TOP)
+		adj_start = adj_end - size;
+
 	if (mm->color_adjust)
 		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
 
 	if (alignment) {
 		unsigned tmp = adj_start % alignment;
-		if (tmp)
-			adj_start += alignment - tmp;
+		if (tmp) {
+			if (flags & DRM_MM_CREATE_TOP)
+				adj_start -= tmp;
+			else
+				adj_start += alignment - tmp;
+		}
 	}
 
 	if (adj_start == hole_start) {
@@ -271,6 +296,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 	INIT_LIST_HEAD(&node->hole_stack);
 	list_add(&node->node_list, &hole_node->node_list);
 
+	BUG_ON(node->start < start);
+	BUG_ON(node->start < adj_start);
 	BUG_ON(node->start + node->size > adj_end);
 	BUG_ON(node->start + node->size > end);
 
@@ -290,7 +317,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
  * @color: opaque tag value to use for this node
  * @start: start of the allowed range for this node
  * @end: end of the allowed range for this node
- * @flags: flags to fine-tune the allocation
+ * @sflags: flags to fine-tune the allocation search
+ * @aflags: flags to fine-tune the allocation behavior
  *
  * The preallocated node must be cleared to 0.
  *
@@ -298,21 +326,23 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
-					unsigned long size, unsigned alignment, unsigned long color,
+					unsigned long size, unsigned alignment,
+					unsigned long color,
 					unsigned long start, unsigned long end,
-					enum drm_mm_search_flags flags)
+					enum drm_mm_search_flags sflags,
+					enum drm_mm_allocator_flags aflags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_in_range_generic(mm,
 							size, alignment, color,
-							start, end, flags);
+							start, end, sflags);
 	if (!hole_node)
 		return -ENOSPC;
 
 	drm_mm_insert_helper_range(hole_node, node,
 				   size, alignment, color,
-				   start, end);
+				   start, end, aflags);
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
@@ -391,7 +421,10 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
+		unsigned long hole_size = adj_end - adj_start;
+
 		if (mm->color_adjust) {
 			mm->color_adjust(entry, color, &adj_start, &adj_end);
 			if (adj_end <= adj_start)
@@ -404,9 +437,9 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 		if (!(flags & DRM_MM_SEARCH_BEST))
 			return entry;
 
-		if (entry->size < best_size) {
+		if (hole_size < best_size) {
 			best = entry;
-			best_size = entry->size;
+			best_size = hole_size;
 		}
 	}
 
@@ -432,7 +465,10 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 	best = NULL;
 	best_size = ~0UL;
 
-	drm_mm_for_each_hole(entry, mm, adj_start, adj_end) {
+	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
+			       flags & DRM_MM_SEARCH_BELOW) {
+		unsigned long hole_size = adj_end - adj_start;
+
 		if (adj_start < start)
 			adj_start = start;
 		if (adj_end > end)
@@ -450,9 +486,9 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 		if (!(flags & DRM_MM_SEARCH_BEST))
 			return entry;
 
-		if (entry->size < best_size) {
+		if (hole_size < best_size) {
 			best = entry;
-			best_size = entry->size;
+			best_size = hole_size;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33bbaa0d4412..404a5456bf3a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3264,7 +3264,8 @@ search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 						  size, alignment,
 						  obj->cache_level, 0, gtt_max,
-						  DRM_MM_SEARCH_DEFAULT);
+						  DRM_MM_SEARCH_DEFAULT,
+						  DRM_MM_CREATE_DEFAULT);
 	if (ret) {
 		ret = i915_gem_evict_something(dev, vm, size, alignment,
 					       obj->cache_level, flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ee535514aa41..2b3c79923d90 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1074,7 +1074,8 @@ alloc:
 						  &ppgtt->node, GEN6_PD_SIZE,
 						  GEN6_PD_ALIGN, 0,
 						  0, dev_priv->gtt.base.total,
-						  DRM_MM_SEARCH_DEFAULT);
+						  DRM_MM_SEARCH_DEFAULT,
+						  DRM_MM_CREATE_DEFAULT);
 	if (ret == -ENOSPC && !retried) {
 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 4f977a593bea..5e1e6b0cd8ac 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -7,6 +7,7 @@ msm-y := \
 	adreno/adreno_gpu.o \
 	adreno/a3xx_gpu.o \
 	hdmi/hdmi.o \
+	hdmi/hdmi_audio.o \
 	hdmi/hdmi_bridge.o \
 	hdmi/hdmi_connector.o \
 	hdmi/hdmi_i2c.o \
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 461df93e825e..f20fbde5dc49 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -35,7 +35,11 @@
 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
 	 A3XX_INT0_UCHE_OOB_ACCESS)
 
-static struct platform_device *a3xx_pdev;
+
+static bool hang_debug = false;
+MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
+module_param_named(hang_debug, hang_debug, bool, 0600);
+static void a3xx_dump(struct msm_gpu *gpu);
 
 static void a3xx_me_init(struct msm_gpu *gpu)
 {
@@ -291,6 +295,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
 
 static void a3xx_recover(struct msm_gpu *gpu)
 {
+	/* dump registers before resetting gpu, if enabled: */
+	if (hang_debug)
+		a3xx_dump(gpu);
 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
@@ -311,27 +318,18 @@ static void a3xx_destroy(struct msm_gpu *gpu)
 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
 #endif
 
-	put_device(&a3xx_gpu->pdev->dev);
 	kfree(a3xx_gpu);
 }
 
 static void a3xx_idle(struct msm_gpu *gpu)
 {
-	unsigned long t;
-
 	/* wait for ringbuffer to drain: */
 	adreno_idle(gpu);
 
-	t = jiffies + ADRENO_IDLE_TIMEOUT;
-
 	/* then wait for GPU to finish: */
-	do {
-		uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
-		if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY))
-			return;
-	} while(time_before(jiffies, t));
-
-	DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name);
+	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
+			A3XX_RBBM_STATUS_GPU_BUSY)))
+		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
 
 	/* TODO maybe we need to reset GPU here to recover from hang? */
 }
@@ -352,7 +350,6 @@ static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_DEBUG_FS
 static const unsigned int a3xx_registers[] = {
 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
@@ -392,11 +389,18 @@ static const unsigned int a3xx_registers[] = {
 	0x303c, 0x303c, 0x305e, 0x305f,
 };
 
+#ifdef CONFIG_DEBUG_FS
 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
+	struct drm_device *dev = gpu->dev;
 	int i;
 
 	adreno_show(gpu, m);
+
+	mutex_lock(&dev->struct_mutex);
+
+	gpu->funcs->pm_resume(gpu);
+
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
 
@@ -412,9 +416,36 @@ static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
 		}
 	}
+
+	gpu->funcs->pm_suspend(gpu);
+
+	mutex_unlock(&dev->struct_mutex);
 }
 #endif
 
+/* would be nice to not have to duplicate the _show() stuff with printk(): */
+static void a3xx_dump(struct msm_gpu *gpu)
+{
+	int i;
+
+	adreno_dump(gpu);
+	printk("status:   %08x\n",
+			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
+
+	/* dump these out in a form that can be parsed by demsm: */
+	printk("IO:region %s 00000000 00020000\n", gpu->name);
+	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
+		uint32_t start = a3xx_registers[i];
+		uint32_t end   = a3xx_registers[i+1];
+		uint32_t addr;
+
+		for (addr = start; addr <= end; addr++) {
+			uint32_t val = gpu_read(gpu, addr);
+			printk("IO:R %08x %08x\n", addr<<2, val);
+		}
+	}
+}
+
 static const struct adreno_gpu_funcs funcs = {
 	.base = {
 		.get_param = adreno_get_param,
@@ -439,7 +470,8 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	struct a3xx_gpu *a3xx_gpu = NULL;
 	struct adreno_gpu *adreno_gpu;
 	struct msm_gpu *gpu;
-	struct platform_device *pdev = a3xx_pdev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev = priv->gpu_pdev;
 	struct adreno_platform_config *config;
 	int ret;
 
@@ -460,7 +492,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	adreno_gpu = &a3xx_gpu->base;
 	gpu = &adreno_gpu->base;
 
-	get_device(&pdev->dev);
 	a3xx_gpu->pdev = pdev;
 
 	gpu->fast_rate = config->fast_rate;
@@ -522,17 +553,24 @@ fail:
 #  include <mach/kgsl.h>
 #endif
 
-static int a3xx_probe(struct platform_device *pdev)
+static void set_gpu_pdev(struct drm_device *dev,
+		struct platform_device *pdev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	priv->gpu_pdev = pdev;
+}
+
+static int a3xx_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct adreno_platform_config config = {};
 #ifdef CONFIG_OF
-	struct device_node *child, *node = pdev->dev.of_node;
+	struct device_node *child, *node = dev->of_node;
 	u32 val;
 	int ret;
 
 	ret = of_property_read_u32(node, "qcom,chipid", &val);
 	if (ret) {
-		dev_err(&pdev->dev, "could not find chipid: %d\n", ret);
+		dev_err(dev, "could not find chipid: %d\n", ret);
 		return ret;
 	}
 
@@ -548,7 +586,7 @@ static int a3xx_probe(struct platform_device *pdev)
 			for_each_child_of_node(child, pwrlvl) {
 				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
 				if (ret) {
-					dev_err(&pdev->dev, "could not find gpu-freq: %d\n", ret);
+					dev_err(dev, "could not find gpu-freq: %d\n", ret);
 					return ret;
 				}
 				config.fast_rate = max(config.fast_rate, val);
@@ -558,12 +596,12 @@ static int a3xx_probe(struct platform_device *pdev)
 	}
 
 	if (!config.fast_rate) {
-		dev_err(&pdev->dev, "could not find clk rates\n");
+		dev_err(dev, "could not find clk rates\n");
 		return -ENXIO;
 	}
 
 #else
-	struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
+	struct kgsl_device_platform_data *pdata = dev->platform_data;
 	uint32_t version = socinfo_get_version();
 	if (cpu_is_apq8064ab()) {
 		config.fast_rate = 450000000;
@@ -609,14 +647,30 @@ static int a3xx_probe(struct platform_device *pdev)
 	config.bus_scale_table = pdata->bus_scale_table;
 #  endif
 #endif
-	pdev->dev.platform_data = &config;
-	a3xx_pdev = pdev;
+	dev->platform_data = &config;
+	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
 	return 0;
 }
 
+static void a3xx_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	set_gpu_pdev(dev_get_drvdata(master), NULL);
+}
+
+static const struct component_ops a3xx_ops = {
+		.bind   = a3xx_bind,
+		.unbind = a3xx_unbind,
+};
+
+static int a3xx_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &a3xx_ops);
+}
+
 static int a3xx_remove(struct platform_device *pdev)
 {
-	a3xx_pdev = NULL;
+	component_del(&pdev->dev, &a3xx_ops);
 	return 0;
 }
 
@@ -624,7 +678,6 @@ static const struct of_device_id dt_match[] = {
 	{ .compatible = "qcom,kgsl-3d0" },
 	{}
 };
-MODULE_DEVICE_TABLE(of, dt_match);
 
 static struct platform_driver a3xx_driver = {
 	.probe = a3xx_probe,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index d321099abdd4..28ca8cd8b09e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -73,6 +73,12 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 	case MSM_PARAM_GMEM_SIZE:
 		*value = adreno_gpu->gmem;
 		return 0;
+	case MSM_PARAM_CHIP_ID:
+		*value = adreno_gpu->rev.patchid |
+				(adreno_gpu->rev.minor << 8) |
+				(adreno_gpu->rev.major << 16) |
+				(adreno_gpu->rev.core << 24);
+		return 0;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
@@ -225,19 +231,11 @@ void adreno_flush(struct msm_gpu *gpu)
 void adreno_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	uint32_t rptr, wptr = get_wptr(gpu->rb);
-	unsigned long t;
-
-	t = jiffies + ADRENO_IDLE_TIMEOUT;
-
-	/* then wait for CP to drain ringbuffer: */
-	do {
-		rptr = adreno_gpu->memptrs->rptr;
-		if (rptr == wptr)
-			return;
-	} while(time_before(jiffies, t));
+	uint32_t wptr = get_wptr(gpu->rb);
 
-	DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
+	/* wait for CP to drain ringbuffer: */
+	if (spin_until(adreno_gpu->memptrs->rptr == wptr))
+		DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
 
 	/* TODO maybe we need to reset GPU here to recover from hang? */
 }
@@ -260,22 +258,37 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 }
 #endif
 
-void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
+/* would be nice to not have to duplicate the _show() stuff with printk(): */
+void adreno_dump(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	uint32_t freedwords;
-	unsigned long t = jiffies + ADRENO_IDLE_TIMEOUT;
-	do {
-		uint32_t size = gpu->rb->size / 4;
-		uint32_t wptr = get_wptr(gpu->rb);
-		uint32_t rptr = adreno_gpu->memptrs->rptr;
-		freedwords = (rptr + (size - 1) - wptr) % size;
-
-		if (time_after(jiffies, t)) {
-			DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
-			break;
-		}
-	} while(freedwords < ndwords);
+
+	printk("revision: %d (%d.%d.%d.%d)\n",
+			adreno_gpu->info->revn, adreno_gpu->rev.core,
+			adreno_gpu->rev.major, adreno_gpu->rev.minor,
+			adreno_gpu->rev.patchid);
+
+	printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
+			gpu->submitted_fence);
+	printk("rptr:     %d\n", adreno_gpu->memptrs->rptr);
+	printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
+	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
+
+}
+
+static uint32_t ring_freewords(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	uint32_t size = gpu->rb->size / 4;
+	uint32_t wptr = get_wptr(gpu->rb);
+	uint32_t rptr = adreno_gpu->memptrs->rptr;
+	return (rptr + (size - 1) - wptr) % size;
+}
+
+void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
+{
+	if (spin_until(ring_freewords(gpu) >= ndwords))
+		DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
 }
 
 static const char *iommu_ports[] = {
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index ca11ea4da165..63c36ce33020 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -76,7 +76,20 @@ struct adreno_platform_config {
 #endif
 };
 
-#define ADRENO_IDLE_TIMEOUT (20 * 1000)
+#define ADRENO_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define spin_until(X) ({                                   \
+	int __ret = -ETIMEDOUT;                            \
+	unsigned long __t = jiffies + ADRENO_IDLE_TIMEOUT; \
+	do {                                               \
+		if (X) {                                   \
+			__ret = 0;                         \
+			break;                             \
+		}                                          \
+	} while (time_before(jiffies, __t));               \
+	__ret;                                             \
+})
+
 
 static inline bool adreno_is_a3xx(struct adreno_gpu *gpu)
 {
@@ -114,6 +127,7 @@ void adreno_idle(struct msm_gpu *gpu);
 #ifdef CONFIG_DEBUG_FS
 void adreno_show(struct msm_gpu *gpu, struct seq_file *m);
 #endif
+void adreno_dump(struct msm_gpu *gpu);
 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords);
 
 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 6f1588aa9071..ae750f6928c1 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -17,8 +17,6 @@
 
 #include "hdmi.h"
 
-static struct platform_device *hdmi_pdev;
-
 void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
 {
 	uint32_t ctrl = 0;
@@ -67,7 +65,7 @@ void hdmi_destroy(struct kref *kref)
 	if (hdmi->i2c)
 		hdmi_i2c_destroy(hdmi->i2c);
 
-	put_device(&hdmi->pdev->dev);
+	platform_set_drvdata(hdmi->pdev, NULL);
 }
 
 /* initialize connector */
@@ -75,7 +73,7 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
 {
 	struct hdmi *hdmi = NULL;
 	struct msm_drm_private *priv = dev->dev_private;
-	struct platform_device *pdev = hdmi_pdev;
+	struct platform_device *pdev = priv->hdmi_pdev;
 	struct hdmi_platform_config *config;
 	int i, ret;
 
@@ -95,13 +93,13 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
 
 	kref_init(&hdmi->refcount);
 
-	get_device(&pdev->dev);
-
 	hdmi->dev = dev;
 	hdmi->pdev = pdev;
 	hdmi->config = config;
 	hdmi->encoder = encoder;
 
+	hdmi_audio_infoframe_init(&hdmi->audio.infoframe);
+
 	/* not sure about which phy maps to which msm.. probably I miss some */
 	if (config->phy_init)
 		hdmi->phy = config->phy_init(hdmi);
@@ -228,6 +226,8 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
 	priv->bridges[priv->num_bridges++]       = hdmi->bridge;
 	priv->connectors[priv->num_connectors++] = hdmi->connector;
 
+	platform_set_drvdata(pdev, hdmi);
+
 	return hdmi;
 
 fail:
@@ -249,17 +249,24 @@ fail:
 
 #include <linux/of_gpio.h>
 
-static int hdmi_dev_probe(struct platform_device *pdev)
+static void set_hdmi_pdev(struct drm_device *dev,
+		struct platform_device *pdev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	priv->hdmi_pdev = pdev;
+}
+
+static int hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct hdmi_platform_config config = {};
 #ifdef CONFIG_OF
-	struct device_node *of_node = pdev->dev.of_node;
+	struct device_node *of_node = dev->of_node;
 
 	int get_gpio(const char *name)
 	{
 		int gpio = of_get_named_gpio(of_node, name, 0);
 		if (gpio < 0) {
-			dev_err(&pdev->dev, "failed to get gpio: %s (%d)\n",
+			dev_err(dev, "failed to get gpio: %s (%d)\n",
 					name, gpio);
 			gpio = -1;
 		}
@@ -305,7 +312,7 @@ static int hdmi_dev_probe(struct platform_device *pdev)
 		config.ddc_data_gpio = 71;
 		config.hpd_gpio      = 72;
 		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = 13 + NR_GPIO_IRQS;
+		config.mux_sel_gpio  = -1;
 	} else if (cpu_is_msm8960() || cpu_is_msm8960ab()) {
 		static const char *hpd_reg_names[] = {"8921_hdmi_mvs"};
 		config.phy_init      = hdmi_phy_8960_init;
@@ -336,14 +343,30 @@ static int hdmi_dev_probe(struct platform_device *pdev)
 		config.mux_sel_gpio  = -1;
 	}
 #endif
-	pdev->dev.platform_data = &config;
-	hdmi_pdev = pdev;
+	dev->platform_data = &config;
+	set_hdmi_pdev(dev_get_drvdata(master), to_platform_device(dev));
 	return 0;
 }
 
+static void hdmi_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	set_hdmi_pdev(dev_get_drvdata(master), NULL);
+}
+
+static const struct component_ops hdmi_ops = {
+		.bind   = hdmi_bind,
+		.unbind = hdmi_unbind,
+};
+
+static int hdmi_dev_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &hdmi_ops);
+}
+
 static int hdmi_dev_remove(struct platform_device *pdev)
 {
-	hdmi_pdev = NULL;
+	component_del(&pdev->dev, &hdmi_ops);
 	return 0;
 }
 
@@ -351,7 +374,6 @@ static const struct of_device_id dt_match[] = {
 	{ .compatible = "qcom,hdmi-tx" },
 	{}
 };
-MODULE_DEVICE_TABLE(of, dt_match);
 
 static struct platform_driver hdmi_driver = {
 	.probe = hdmi_dev_probe,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index 41b29add70b1..9fafee6a3e43 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
+#include <linux/hdmi.h>
 
 #include "msm_drv.h"
 #include "hdmi.xml.h"
@@ -30,6 +31,12 @@
 struct hdmi_phy;
 struct hdmi_platform_config;
 
+struct hdmi_audio {
+	bool enabled;
+	struct hdmi_audio_infoframe infoframe;
+	int rate;
+};
+
 struct hdmi {
 	struct kref refcount;
 
@@ -38,6 +45,13 @@ struct hdmi {
 
 	const struct hdmi_platform_config *config;
 
+	/* audio state: */
+	struct hdmi_audio audio;
+
+	/* video state: */
+	bool power_on;
+	unsigned long int pixclock;
+
 	void __iomem *mmio;
 
 	struct regulator *hpd_regs[2];
@@ -132,6 +146,17 @@ struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi);
 struct hdmi_phy *hdmi_phy_8x74_init(struct hdmi *hdmi);
 
 /*
+ * audio:
+ */
+
+int hdmi_audio_update(struct hdmi *hdmi);
+int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
+	uint32_t num_of_channels, uint32_t channel_allocation,
+	uint32_t level_shift, bool down_mix);
+void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate);
+
+
+/*
  * hdmi bridge:
  */
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
new file mode 100644
index 000000000000..872485f60134
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/hdmi.h>
+#include "hdmi.h"
+
+
+/* Supported HDMI Audio channels */
+#define MSM_HDMI_AUDIO_CHANNEL_2		0
+#define MSM_HDMI_AUDIO_CHANNEL_4		1
+#define MSM_HDMI_AUDIO_CHANNEL_6		2
+#define MSM_HDMI_AUDIO_CHANNEL_8		3
+
+/* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */
+static int nchannels[] = { 2, 4, 6, 8 };
+
+/* Supported HDMI Audio sample rates */
+#define MSM_HDMI_SAMPLE_RATE_32KHZ		0
+#define MSM_HDMI_SAMPLE_RATE_44_1KHZ		1
+#define MSM_HDMI_SAMPLE_RATE_48KHZ		2
+#define MSM_HDMI_SAMPLE_RATE_88_2KHZ		3
+#define MSM_HDMI_SAMPLE_RATE_96KHZ		4
+#define MSM_HDMI_SAMPLE_RATE_176_4KHZ		5
+#define MSM_HDMI_SAMPLE_RATE_192KHZ		6
+#define MSM_HDMI_SAMPLE_RATE_MAX		7
+
+
+struct hdmi_msm_audio_acr {
+	uint32_t n;	/* N parameter for clock regeneration */
+	uint32_t cts;	/* CTS parameter for clock regeneration */
+};
+
+struct hdmi_msm_audio_arcs {
+	unsigned long int pixclock;
+	struct hdmi_msm_audio_acr lut[MSM_HDMI_SAMPLE_RATE_MAX];
+};
+
+#define HDMI_MSM_AUDIO_ARCS(pclk, ...) { (1000 * (pclk)), __VA_ARGS__ }
+
+/* Audio constants lookup table for hdmi_msm_audio_acr_setup */
+/* Valid Pixel-Clock rates: 25.2MHz, 27MHz, 27.03MHz, 74.25MHz, 148.5MHz */
+static const struct hdmi_msm_audio_arcs acr_lut[] = {
+	/*  25.200MHz  */
+	HDMI_MSM_AUDIO_ARCS(25200, {
+		{4096, 25200}, {6272, 28000}, {6144, 25200}, {12544, 28000},
+		{12288, 25200}, {25088, 28000}, {24576, 25200} }),
+	/*  27.000MHz  */
+	HDMI_MSM_AUDIO_ARCS(27000, {
+		{4096, 27000}, {6272, 30000}, {6144, 27000}, {12544, 30000},
+		{12288, 27000}, {25088, 30000}, {24576, 27000} }),
+	/*  27.027MHz */
+	HDMI_MSM_AUDIO_ARCS(27030, {
+		{4096, 27027}, {6272, 30030}, {6144, 27027}, {12544, 30030},
+		{12288, 27027}, {25088, 30030}, {24576, 27027} }),
+	/*  74.250MHz */
+	HDMI_MSM_AUDIO_ARCS(74250, {
+		{4096, 74250}, {6272, 82500}, {6144, 74250}, {12544, 82500},
+		{12288, 74250}, {25088, 82500}, {24576, 74250} }),
+	/* 148.500MHz */
+	HDMI_MSM_AUDIO_ARCS(148500, {
+		{4096, 148500}, {6272, 165000}, {6144, 148500}, {12544, 165000},
+		{12288, 148500}, {25088, 165000}, {24576, 148500} }),
+};
+
+static const struct hdmi_msm_audio_arcs *get_arcs(unsigned long int pixclock)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(acr_lut); i++) {
+		const struct hdmi_msm_audio_arcs *arcs = &acr_lut[i];
+		if (arcs->pixclock == pixclock)
+			return arcs;
+	}
+
+	return NULL;
+}
+
+int hdmi_audio_update(struct hdmi *hdmi)
+{
+	struct hdmi_audio *audio = &hdmi->audio;
+	struct hdmi_audio_infoframe *info = &audio->infoframe;
+	const struct hdmi_msm_audio_arcs *arcs = NULL;
+	bool enabled = audio->enabled;
+	uint32_t acr_pkt_ctrl, vbi_pkt_ctrl, aud_pkt_ctrl;
+	uint32_t infofrm_ctrl, audio_config;
+
+	DBG("audio: enabled=%d, channels=%d, channel_allocation=0x%x, "
+		"level_shift_value=%d, downmix_inhibit=%d, rate=%d",
+		audio->enabled, info->channels,  info->channel_allocation,
+		info->level_shift_value, info->downmix_inhibit, audio->rate);
+	DBG("video: power_on=%d, pixclock=%lu", hdmi->power_on, hdmi->pixclock);
+
+	if (enabled && !(hdmi->power_on && hdmi->pixclock)) {
+		DBG("disabling audio: no video");
+		enabled = false;
+	}
+
+	if (enabled) {
+		arcs = get_arcs(hdmi->pixclock);
+		if (!arcs) {
+			DBG("disabling audio: unsupported pixclock: %lu",
+					hdmi->pixclock);
+			enabled = false;
+		}
+	}
+
+	/* Read first before writing */
+	acr_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_ACR_PKT_CTRL);
+	vbi_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_VBI_PKT_CTRL);
+	aud_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_AUDIO_PKT_CTRL1);
+	infofrm_ctrl = hdmi_read(hdmi, REG_HDMI_INFOFRAME_CTRL0);
+	audio_config = hdmi_read(hdmi, REG_HDMI_AUDIO_CFG);
+
+	/* Clear N/CTS selection bits */
+	acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_SELECT__MASK;
+
+	if (enabled) {
+		uint32_t n, cts, multiplier;
+		enum hdmi_acr_cts select;
+		uint8_t buf[14];
+
+		n   = arcs->lut[audio->rate].n;
+		cts = arcs->lut[audio->rate].cts;
+
+		if ((MSM_HDMI_SAMPLE_RATE_192KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_176_4KHZ == audio->rate)) {
+			multiplier = 4;
+			n >>= 2; /* divide N by 4 and use multiplier */
+		} else if ((MSM_HDMI_SAMPLE_RATE_96KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_88_2KHZ == audio->rate)) {
+			multiplier = 2;
+			n >>= 1; /* divide N by 2 and use multiplier */
+		} else {
+			multiplier = 1;
+		}
+
+		DBG("n=%u, cts=%u, multiplier=%u", n, cts, multiplier);
+
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SOURCE;
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_AUDIO_PRIORITY;
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_N_MULTIPLIER(multiplier);
+
+		if ((MSM_HDMI_SAMPLE_RATE_48KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_96KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_192KHZ == audio->rate))
+			select = ACR_48;
+		else if ((MSM_HDMI_SAMPLE_RATE_44_1KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_88_2KHZ == audio->rate) ||
+				(MSM_HDMI_SAMPLE_RATE_176_4KHZ == audio->rate))
+			select = ACR_44;
+		else /* default to 32k */
+			select = ACR_32;
+
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SELECT(select);
+
+		hdmi_write(hdmi, REG_HDMI_ACR_0(select - 1),
+				HDMI_ACR_0_CTS(cts));
+		hdmi_write(hdmi, REG_HDMI_ACR_1(select - 1),
+				HDMI_ACR_1_N(n));
+
+		hdmi_write(hdmi, REG_HDMI_AUDIO_PKT_CTRL2,
+				COND(info->channels != 2, HDMI_AUDIO_PKT_CTRL2_LAYOUT) |
+				HDMI_AUDIO_PKT_CTRL2_OVERRIDE);
+
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_CONT;
+		acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SEND;
+
+		/* configure infoframe: */
+		hdmi_audio_infoframe_pack(info, buf, sizeof(buf));
+		hdmi_write(hdmi, REG_HDMI_AUDIO_INFO0,
+				(buf[3] <<  0) || (buf[4] <<  8) ||
+				(buf[5] << 16) || (buf[6] << 24));
+		hdmi_write(hdmi, REG_HDMI_AUDIO_INFO1,
+				(buf[7] <<  0) || (buf[8] << 8));
+
+		hdmi_write(hdmi, REG_HDMI_GC, 0);
+
+		vbi_pkt_ctrl |= HDMI_VBI_PKT_CTRL_GC_ENABLE;
+		vbi_pkt_ctrl |= HDMI_VBI_PKT_CTRL_GC_EVERY_FRAME;
+
+		aud_pkt_ctrl |= HDMI_AUDIO_PKT_CTRL1_AUDIO_SAMPLE_SEND;
+
+		infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SEND;
+		infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_CONT;
+		infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE;
+		infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE;
+
+		audio_config &= ~HDMI_AUDIO_CFG_FIFO_WATERMARK__MASK;
+		audio_config |= HDMI_AUDIO_CFG_FIFO_WATERMARK(4);
+		audio_config |= HDMI_AUDIO_CFG_ENGINE_ENABLE;
+	} else {
+		hdmi_write(hdmi, REG_HDMI_GC, HDMI_GC_MUTE);
+		acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_CONT;
+		acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_SEND;
+		vbi_pkt_ctrl &= ~HDMI_VBI_PKT_CTRL_GC_ENABLE;
+		vbi_pkt_ctrl &= ~HDMI_VBI_PKT_CTRL_GC_EVERY_FRAME;
+		aud_pkt_ctrl &= ~HDMI_AUDIO_PKT_CTRL1_AUDIO_SAMPLE_SEND;
+		infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SEND;
+		infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_CONT;
+		infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE;
+		infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE;
+		audio_config &= ~HDMI_AUDIO_CFG_ENGINE_ENABLE;
+	}
+
+	hdmi_write(hdmi, REG_HDMI_ACR_PKT_CTRL, acr_pkt_ctrl);
+	hdmi_write(hdmi, REG_HDMI_VBI_PKT_CTRL, vbi_pkt_ctrl);
+	hdmi_write(hdmi, REG_HDMI_AUDIO_PKT_CTRL1, aud_pkt_ctrl);
+	hdmi_write(hdmi, REG_HDMI_INFOFRAME_CTRL0, infofrm_ctrl);
+
+	hdmi_write(hdmi, REG_HDMI_AUD_INT,
+			COND(enabled, HDMI_AUD_INT_AUD_FIFO_URUN_INT) |
+			COND(enabled, HDMI_AUD_INT_AUD_SAM_DROP_INT));
+
+	hdmi_write(hdmi, REG_HDMI_AUDIO_CFG, audio_config);
+
+
+	DBG("audio %sabled", enabled ? "en" : "dis");
+
+	return 0;
+}
+
+int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
+	uint32_t num_of_channels, uint32_t channel_allocation,
+	uint32_t level_shift, bool down_mix)
+{
+	struct hdmi_audio *audio;
+
+	if (!hdmi)
+		return -ENXIO;
+
+	audio = &hdmi->audio;
+
+	if (num_of_channels >= ARRAY_SIZE(nchannels))
+		return -EINVAL;
+
+	audio->enabled = enabled;
+	audio->infoframe.channels = nchannels[num_of_channels];
+	audio->infoframe.channel_allocation = channel_allocation;
+	audio->infoframe.level_shift_value = level_shift;
+	audio->infoframe.downmix_inhibit = down_mix;
+
+	return hdmi_audio_update(hdmi);
+}
+
+void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate)
+{
+	struct hdmi_audio *audio;
+
+	if (!hdmi)
+		return;
+
+	audio = &hdmi->audio;
+
+	if ((rate < 0) || (rate >= MSM_HDMI_SAMPLE_RATE_MAX))
+		return;
+
+	audio->rate = rate;
+	hdmi_audio_update(hdmi);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
index 7d10e55403c6..f6cf745c249e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
@@ -19,11 +19,7 @@
 
 struct hdmi_bridge {
 	struct drm_bridge base;
-
 	struct hdmi *hdmi;
-	bool power_on;
-
-	unsigned long int pixclock;
 };
 #define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base)
 
@@ -52,8 +48,8 @@ static void power_on(struct drm_bridge *bridge)
 	}
 
 	if (config->pwr_clk_cnt > 0) {
-		DBG("pixclock: %lu", hdmi_bridge->pixclock);
-		ret = clk_set_rate(hdmi->pwr_clks[0], hdmi_bridge->pixclock);
+		DBG("pixclock: %lu", hdmi->pixclock);
+		ret = clk_set_rate(hdmi->pwr_clks[0], hdmi->pixclock);
 		if (ret) {
 			dev_err(dev->dev, "failed to set pixel clk: %s (%d)\n",
 					config->pwr_clk_names[0], ret);
@@ -102,12 +98,13 @@ static void hdmi_bridge_pre_enable(struct drm_bridge *bridge)
 
 	DBG("power up");
 
-	if (!hdmi_bridge->power_on) {
+	if (!hdmi->power_on) {
 		power_on(bridge);
-		hdmi_bridge->power_on = true;
+		hdmi->power_on = true;
+		hdmi_audio_update(hdmi);
 	}
 
-	phy->funcs->powerup(phy, hdmi_bridge->pixclock);
+	phy->funcs->powerup(phy, hdmi->pixclock);
 	hdmi_set_mode(hdmi, true);
 }
 
@@ -129,9 +126,10 @@ static void hdmi_bridge_post_disable(struct drm_bridge *bridge)
 	hdmi_set_mode(hdmi, false);
 	phy->funcs->powerdown(phy);
 
-	if (hdmi_bridge->power_on) {
+	if (hdmi->power_on) {
 		power_off(bridge);
-		hdmi_bridge->power_on = false;
+		hdmi->power_on = false;
+		hdmi_audio_update(hdmi);
 	}
 }
 
@@ -146,7 +144,7 @@ static void hdmi_bridge_mode_set(struct drm_bridge *bridge,
 
 	mode = adjusted_mode;
 
-	hdmi_bridge->pixclock = mode->clock * 1000;
+	hdmi->pixclock = mode->clock * 1000;
 
 	hdmi->hdmi_mode = drm_match_cea_mode(mode) > 1;
 
@@ -194,9 +192,7 @@ static void hdmi_bridge_mode_set(struct drm_bridge *bridge,
 	DBG("frame_ctrl=%08x", frame_ctrl);
 	hdmi_write(hdmi, REG_HDMI_FRAME_CTRL, frame_ctrl);
 
-	// TODO until we have audio, this might be safest:
-	if (hdmi->hdmi_mode)
-		hdmi_write(hdmi, REG_HDMI_GC, HDMI_GC_MUTE);
+	hdmi_audio_update(hdmi);
 }
 
 static const struct drm_bridge_funcs hdmi_bridge_funcs = {
diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.c b/drivers/gpu/drm/msm/mdp/mdp_kms.c
index 3be48f7c36be..03455b64a245 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp_kms.c
@@ -101,7 +101,8 @@ void mdp_irq_wait(struct mdp_kms *mdp_kms, uint32_t irqmask)
 		.count = 1,
 	};
 	mdp_irq_register(mdp_kms, &wait.irq);
-	wait_event(wait_event, (wait.count <= 0));
+	wait_event_timeout(wait_event, (wait.count <= 0),
+			msecs_to_jiffies(100));
 	mdp_irq_unregister(mdp_kms, &wait.irq);
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index e6adafc7eff3..f9de156b9e65 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -56,6 +56,10 @@ static char *vram;
 MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU");
 module_param(vram, charp, 0);
 
+/*
+ * Util/helpers:
+ */
+
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 		const char *dbgname)
 {
@@ -143,6 +147,8 @@ static int msm_unload(struct drm_device *dev)
 				priv->vram.paddr, &attrs);
 	}
 
+	component_unbind_all(dev->dev, dev);
+
 	dev->dev_private = NULL;
 
 	kfree(priv);
@@ -175,6 +181,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 	struct msm_kms *kms;
 	int ret;
 
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
 		dev_err(dev->dev, "failed to allocate private data\n");
@@ -226,6 +233,13 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 				(uint32_t)(priv->vram.paddr + size));
 	}
 
+	platform_set_drvdata(pdev, dev);
+
+	/* Bind all our sub-components: */
+	ret = component_bind_all(dev->dev, dev);
+	if (ret)
+		return ret;
+
 	switch (get_mdp_ver(pdev)) {
 	case 4:
 		kms = mdp4_kms_init(dev);
@@ -281,8 +295,6 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 		goto fail;
 	}
 
-	platform_set_drvdata(pdev, dev);
-
 #ifdef CONFIG_DRM_MSM_FBDEV
 	priv->fbdev = msm_fbdev_init(dev);
 #endif
@@ -311,7 +323,6 @@ static void load_gpu(struct drm_device *dev)
 		gpu = NULL;
 		/* not fatal */
 	}
-	mutex_unlock(&dev->struct_mutex);
 
 	if (gpu) {
 		int ret;
@@ -321,10 +332,16 @@ static void load_gpu(struct drm_device *dev)
 			dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
 			gpu->funcs->destroy(gpu);
 			gpu = NULL;
+		} else {
+			/* give inactive pm a chance to kick in: */
+			msm_gpu_retire(gpu);
 		}
+
 	}
 
 	priv->gpu = gpu;
+
+	mutex_unlock(&dev->struct_mutex);
 }
 
 static int msm_open(struct drm_device *dev, struct drm_file *file)
@@ -647,6 +664,12 @@ static int msm_ioctl_gem_new(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
 	struct drm_msm_gem_new *args = data;
+
+	if (args->flags & ~MSM_BO_FLAGS) {
+		DRM_ERROR("invalid flags: %08x\n", args->flags);
+		return -EINVAL;
+	}
+
 	return msm_gem_new_handle(dev, file, args->size,
 			args->flags, &args->handle);
 }
@@ -660,6 +683,11 @@ static int msm_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
 	struct drm_gem_object *obj;
 	int ret;
 
+	if (args->op & ~MSM_PREP_FLAGS) {
+		DRM_ERROR("invalid op: %08x\n", args->op);
+		return -EINVAL;
+	}
+
 	obj = drm_gem_object_lookup(dev, file, args->handle);
 	if (!obj)
 		return -ENOENT;
@@ -714,7 +742,14 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
 	struct drm_msm_wait_fence *args = data;
-	return msm_wait_fence_interruptable(dev, args->fence, &TS(args->timeout));
+
+	if (args->pad) {
+		DRM_ERROR("invalid pad: %08x\n", args->pad);
+		return -EINVAL;
+	}
+
+	return msm_wait_fence_interruptable(dev, args->fence,
+			&TS(args->timeout));
 }
 
 static const struct drm_ioctl_desc msm_ioctls[] = {
@@ -819,18 +854,110 @@ static const struct dev_pm_ops msm_pm_ops = {
 };
 
 /*
+ * Componentized driver support:
+ */
+
+#ifdef CONFIG_OF
+/* NOTE: the CONFIG_OF case duplicates the same code as exynos or imx
+ * (or probably any other).. so probably some room for some helpers
+ */
+static int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int msm_drm_add_components(struct device *master, struct master *m)
+{
+	struct device_node *np = master->of_node;
+	unsigned i;
+	int ret;
+
+	for (i = 0; ; i++) {
+		struct device_node *node;
+
+		node = of_parse_phandle(np, "connectors", i);
+		if (!node)
+			break;
+
+		ret = component_master_add_child(m, compare_of, node);
+		of_node_put(node);
+
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+#else
+static int compare_dev(struct device *dev, void *data)
+{
+	return dev == data;
+}
+
+static int msm_drm_add_components(struct device *master, struct master *m)
+{
+	/* For non-DT case, it kinda sucks.  We don't actually have a way
+	 * to know whether or not we are waiting for certain devices (or if
+	 * they are simply not present).  But for non-DT we only need to
+	 * care about apq8064/apq8060/etc (all mdp4/a3xx):
+	 */
+	static const char *devnames[] = {
+			"hdmi_msm.0", "kgsl-3d0.0",
+	};
+	int i;
+
+	DBG("Adding components..");
+
+	for (i = 0; i < ARRAY_SIZE(devnames); i++) {
+		struct device *dev;
+		int ret;
+
+		dev = bus_find_device_by_name(&platform_bus_type,
+				NULL, devnames[i]);
+		if (!dev) {
+			dev_info(master, "still waiting for %s\n", devnames[i]);
+			return -EPROBE_DEFER;
+		}
+
+		ret = component_master_add_child(m, compare_dev, dev);
+		if (ret) {
+			DBG("could not add child: %d", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static int msm_drm_bind(struct device *dev)
+{
+	return drm_platform_init(&msm_driver, to_platform_device(dev));
+}
+
+static void msm_drm_unbind(struct device *dev)
+{
+	drm_put_dev(platform_get_drvdata(to_platform_device(dev)));
+}
+
+static const struct component_master_ops msm_drm_ops = {
+		.add_components = msm_drm_add_components,
+		.bind = msm_drm_bind,
+		.unbind = msm_drm_unbind,
+};
+
+/*
  * Platform driver:
  */
 
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	return drm_platform_init(&msm_driver, pdev);
+	return component_master_add(&pdev->dev, &msm_drm_ops);
 }
 
 static int msm_pdev_remove(struct platform_device *pdev)
 {
-	drm_put_dev(platform_get_drvdata(pdev));
+	component_master_del(&pdev->dev, &msm_drm_ops);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 3d63269c5b29..9d10ee0b5aac 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
+#include <linux/component.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
@@ -69,6 +70,9 @@ struct msm_drm_private {
 
 	struct msm_kms *kms;
 
+	/* subordinate devices, if present: */
+	struct platform_device *hdmi_pdev, *gpu_pdev;
+
 	/* when we have more than one 'msm_gpu' these need to be an array: */
 	struct msm_gpu *gpu;
 	struct msm_file_private *lastctx;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5423e914e491..1f1f4cffdaed 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -23,7 +23,6 @@
  * Cmdstream submission:
  */
 
-#define BO_INVALID_FLAGS ~(MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
 /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
 #define BO_VALID    0x8000
 #define BO_LOCKED   0x4000
@@ -77,7 +76,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 			goto out_unlock;
 		}
 
-		if (submit_bo.flags & BO_INVALID_FLAGS) {
+		if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
 			DRM_ERROR("invalid flags: %x\n", submit_bo.flags);
 			ret = -EINVAL;
 			goto out_unlock;
@@ -369,6 +368,18 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 			goto out;
 		}
 
+		/* validate input from userspace: */
+		switch (submit_cmd.type) {
+		case MSM_SUBMIT_CMD_BUF:
+		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
+		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
+			break;
+		default:
+			DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
+			ret = -EINVAL;
+			goto out;
+		}
+
 		ret = submit_bo(submit, submit_cmd.submit_idx,
 				&msm_obj, &iova, NULL);
 		if (ret)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0cfe3f426ee4..3e667ca1f2b9 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -154,9 +154,18 @@ static int disable_axi(struct msm_gpu *gpu)
 
 int msm_gpu_pm_resume(struct msm_gpu *gpu)
 {
+	struct drm_device *dev = gpu->dev;
 	int ret;
 
-	DBG("%s", gpu->name);
+	DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	if (gpu->active_cnt++ > 0)
+		return 0;
+
+	if (WARN_ON(gpu->active_cnt <= 0))
+		return -EINVAL;
 
 	ret = enable_pwrrail(gpu);
 	if (ret)
@@ -175,9 +184,18 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu)
 
 int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 {
+	struct drm_device *dev = gpu->dev;
 	int ret;
 
-	DBG("%s", gpu->name);
+	DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	if (--gpu->active_cnt > 0)
+		return 0;
+
+	if (WARN_ON(gpu->active_cnt < 0))
+		return -EINVAL;
 
 	ret = disable_axi(gpu);
 	if (ret)
@@ -195,6 +213,55 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 }
 
 /*
+ * Inactivity detection (for suspend):
+ */
+
+static void inactive_worker(struct work_struct *work)
+{
+	struct msm_gpu *gpu = container_of(work, struct msm_gpu, inactive_work);
+	struct drm_device *dev = gpu->dev;
+
+	if (gpu->inactive)
+		return;
+
+	DBG("%s: inactive!\n", gpu->name);
+	mutex_lock(&dev->struct_mutex);
+	if (!(msm_gpu_active(gpu) || gpu->inactive)) {
+		disable_axi(gpu);
+		disable_clk(gpu);
+		gpu->inactive = true;
+	}
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void inactive_handler(unsigned long data)
+{
+	struct msm_gpu *gpu = (struct msm_gpu *)data;
+	struct msm_drm_private *priv = gpu->dev->dev_private;
+
+	queue_work(priv->wq, &gpu->inactive_work);
+}
+
+/* cancel inactive timer and make sure we are awake: */
+static void inactive_cancel(struct msm_gpu *gpu)
+{
+	DBG("%s", gpu->name);
+	del_timer(&gpu->inactive_timer);
+	if (gpu->inactive) {
+		enable_clk(gpu);
+		enable_axi(gpu);
+		gpu->inactive = false;
+	}
+}
+
+static void inactive_start(struct msm_gpu *gpu)
+{
+	DBG("%s", gpu->name);
+	mod_timer(&gpu->inactive_timer,
+			round_jiffies_up(jiffies + DRM_MSM_INACTIVE_JIFFIES));
+}
+
+/*
  * Hangcheck detection for locked gpu:
  */
 
@@ -206,7 +273,10 @@ static void recover_worker(struct work_struct *work)
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 
 	mutex_lock(&dev->struct_mutex);
-	gpu->funcs->recover(gpu);
+	if (msm_gpu_active(gpu)) {
+		inactive_cancel(gpu);
+		gpu->funcs->recover(gpu);
+	}
 	mutex_unlock(&dev->struct_mutex);
 
 	msm_gpu_retire(gpu);
@@ -281,6 +351,9 @@ static void retire_worker(struct work_struct *work)
 	}
 
 	mutex_unlock(&dev->struct_mutex);
+
+	if (!msm_gpu_active(gpu))
+		inactive_start(gpu);
 }
 
 /* call from irq handler to schedule work to retire bo's */
@@ -302,6 +375,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	gpu->submitted_fence = submit->fence;
 
+	inactive_cancel(gpu);
+
 	ret = gpu->funcs->submit(gpu, submit, ctx);
 	priv->lastctx = ctx;
 
@@ -357,11 +432,15 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	gpu->dev = drm;
 	gpu->funcs = funcs;
 	gpu->name = name;
+	gpu->inactive = true;
 
 	INIT_LIST_HEAD(&gpu->active_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->inactive_work, inactive_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 
+	setup_timer(&gpu->inactive_timer, inactive_handler,
+			(unsigned long)gpu);
 	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
 			(unsigned long)gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 458db8c64c28..fad27008922f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -72,6 +72,10 @@ struct msm_gpu {
 
 	uint32_t submitted_fence;
 
+	/* is gpu powered/active? */
+	int active_cnt;
+	bool inactive;
+
 	/* worker for handling active-list retiring: */
 	struct work_struct retire_work;
 
@@ -91,7 +95,12 @@ struct msm_gpu {
 	uint32_t bsc;
 #endif
 
-	/* Hang Detction: */
+	/* Hang and Inactivity Detection:
+	 */
+#define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
+#define DRM_MSM_INACTIVE_JIFFIES  msecs_to_jiffies(DRM_MSM_INACTIVE_PERIOD)
+	struct timer_list inactive_timer;
+	struct work_struct inactive_work;
 #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
 #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
 	struct timer_list hangcheck_timer;
@@ -99,6 +108,11 @@ struct msm_gpu {
 	struct work_struct recover_work;
 };
 
+static inline bool msm_gpu_active(struct msm_gpu *gpu)
+{
+	return gpu->submitted_fence > gpu->funcs->last_fence(gpu);
+}
+
 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
 {
 	msm_writel(data, gpu->mmio + (reg << 2));
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 0d5e9b7e8bbf..d2b8c49bfb4a 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -306,7 +306,8 @@ struct drm_connector *omap_framebuffer_get_next_connector(
 	struct drm_connector *connector = from;
 
 	if (!from)
-		return list_first_entry(connector_list, typeof(*from), head);
+		return list_first_entry_or_null(connector_list, typeof(*from),
+						head);
 
 	list_for_each_entry_from(connector, connector_list, head) {
 		if (connector != from) {
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 821ab7b9409b..14e776f1d14e 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -349,7 +349,7 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 		qxl_fence_add_release_locked(&qbo->fence, release->id);
 
 		ttm_bo_add_to_lru(bo);
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 		entry->reserved = false;
 	}
 	spin_unlock(&bdev->fence_lock);
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 62fefbbaf263..745143c2358f 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1096,7 +1096,7 @@ static const u32 spectre_golden_registers[] =
 	0x8a14, 0xf000003f, 0x00000007,
 	0x8b24, 0xffffffff, 0x00ffffff,
 	0x28350, 0x3f3f3fff, 0x00000082,
-	0x28355, 0x0000003f, 0x00000000,
+	0x28354, 0x0000003f, 0x00000000,
 	0x3e78, 0x00000001, 0x00000002,
 	0x913c, 0xffff03df, 0x00000004,
 	0xc768, 0x00000008, 0x00000008,
@@ -6542,8 +6542,8 @@ void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
 		buffer[count++] = cpu_to_le32(0x00000000);
 		break;
 	case CHIP_HAWAII:
-		buffer[count++] = 0x3a00161a;
-		buffer[count++] = 0x0000002e;
+		buffer[count++] = cpu_to_le32(0x3a00161a);
+		buffer[count++] = cpu_to_le32(0x0000002e);
 		break;
 	default:
 		buffer[count++] = cpu_to_le32(0x00000000);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 15f954cd81cb..835516d2d257 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1551,10 +1551,12 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
 	/* reset hpd state */
 	radeon_hpd_init(rdev);
 	/* blat the mode back in */
-	drm_helper_resume_force_mode(dev);
-	/* turn on display hw */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+	if (fbcon) {
+		drm_helper_resume_force_mode(dev);
+		/* turn on display hw */
+		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+		}
 	}
 
 	drm_kms_helper_poll_enable(dev);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 5701fbb36b3c..386cfa4c194d 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -34,6 +34,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
+#include <linux/gcd.h>
+
 static void avivo_crtc_load_lut(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -792,6 +794,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 	if (radeon_connector->edid) {
 		drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid);
 		ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid);
+		drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid);
 		return ret;
 	}
 	drm_mode_connector_update_edid_property(&radeon_connector->base, NULL);
@@ -799,66 +802,57 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 }
 
 /* avivo */
-static void avivo_get_fb_div(struct radeon_pll *pll,
-			     u32 target_clock,
-			     u32 post_div,
-			     u32 ref_div,
-			     u32 *fb_div,
-			     u32 *frac_fb_div)
-{
-	u32 tmp = post_div * ref_div;
 
-	tmp *= target_clock;
-	*fb_div = tmp / pll->reference_freq;
-	*frac_fb_div = tmp % pll->reference_freq;
-
-        if (*fb_div > pll->max_feedback_div)
-		*fb_div = pll->max_feedback_div;
-        else if (*fb_div < pll->min_feedback_div)
-                *fb_div = pll->min_feedback_div;
-}
-
-static u32 avivo_get_post_div(struct radeon_pll *pll,
-			      u32 target_clock)
+/**
+ * avivo_reduce_ratio - fractional number reduction
+ *
+ * @nom: nominator
+ * @den: denominator
+ * @nom_min: minimum value for nominator
+ * @den_min: minimum value for denominator
+ *
+ * Find the greatest common divisor and apply it on both nominator and
+ * denominator, but make nominator and denominator are at least as large
+ * as their minimum values.
+ */
+static void avivo_reduce_ratio(unsigned *nom, unsigned *den,
+			       unsigned nom_min, unsigned den_min)
 {
-	u32 vco, post_div, tmp;
-
-	if (pll->flags & RADEON_PLL_USE_POST_DIV)
-		return pll->post_div;
-
-	if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP) {
-		if (pll->flags & RADEON_PLL_IS_LCD)
-			vco = pll->lcd_pll_out_min;
-		else
-			vco = pll->pll_out_min;
-	} else {
-		if (pll->flags & RADEON_PLL_IS_LCD)
-			vco = pll->lcd_pll_out_max;
-		else
-			vco = pll->pll_out_max;
+	unsigned tmp;
+
+	/* reduce the numbers to a simpler ratio */
+	tmp = gcd(*nom, *den);
+	*nom /= tmp;
+	*den /= tmp;
+
+	/* make sure nominator is large enough */
+        if (*nom < nom_min) {
+		tmp = (nom_min + *nom - 1) / *nom;
+		*nom *= tmp;
+		*den *= tmp;
 	}
 
-	post_div = vco / target_clock;
-	tmp = vco % target_clock;
-
-	if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP) {
-		if (tmp)
-			post_div++;
-	} else {
-		if (!tmp)
-			post_div--;
+	/* make sure the denominator is large enough */
+	if (*den < den_min) {
+		tmp = (den_min + *den - 1) / *den;
+		*nom *= tmp;
+		*den *= tmp;
 	}
-
-	if (post_div > pll->max_post_div)
-		post_div = pll->max_post_div;
-	else if (post_div < pll->min_post_div)
-		post_div = pll->min_post_div;
-
-	return post_div;
 }
 
-#define MAX_TOLERANCE 10
-
+/**
+ * radeon_compute_pll_avivo - compute PLL paramaters
+ *
+ * @pll: information about the PLL
+ * @dot_clock_p: resulting pixel clock
+ * fb_div_p: resulting feedback divider
+ * frac_fb_div_p: fractional part of the feedback divider
+ * ref_div_p: resulting reference divider
+ * post_div_p: resulting reference divider
+ *
+ * Try to calculate the PLL parameters to generate the given frequency:
+ * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
+ */
 void radeon_compute_pll_avivo(struct radeon_pll *pll,
 			      u32 freq,
 			      u32 *dot_clock_p,
@@ -867,53 +861,123 @@ void radeon_compute_pll_avivo(struct radeon_pll *pll,
 			      u32 *ref_div_p,
 			      u32 *post_div_p)
 {
-	u32 target_clock = freq / 10;
-	u32 post_div = avivo_get_post_div(pll, target_clock);
-	u32 ref_div = pll->min_ref_div;
-	u32 fb_div = 0, frac_fb_div = 0, tmp;
+	unsigned fb_div_min, fb_div_max, fb_div;
+	unsigned post_div_min, post_div_max, post_div;
+	unsigned ref_div_min, ref_div_max, ref_div;
+	unsigned post_div_best, diff_best;
+	unsigned nom, den, tmp;
 
-	if (pll->flags & RADEON_PLL_USE_REF_DIV)
-		ref_div = pll->reference_div;
+	/* determine allowed feedback divider range */
+	fb_div_min = pll->min_feedback_div;
+	fb_div_max = pll->max_feedback_div;
 
 	if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) {
-		avivo_get_fb_div(pll, target_clock, post_div, ref_div, &fb_div, &frac_fb_div);
-		frac_fb_div = (100 * frac_fb_div) / pll->reference_freq;
-		if (frac_fb_div >= 5) {
-			frac_fb_div -= 5;
-			frac_fb_div = frac_fb_div / 10;
-			frac_fb_div++;
+		fb_div_min *= 10;
+		fb_div_max *= 10;
+	}
+
+	/* determine allowed ref divider range */
+	if (pll->flags & RADEON_PLL_USE_REF_DIV)
+		ref_div_min = pll->reference_div;
+	else
+		ref_div_min = pll->min_ref_div;
+	ref_div_max = pll->max_ref_div;
+
+	/* determine allowed post divider range */
+	if (pll->flags & RADEON_PLL_USE_POST_DIV) {
+		post_div_min = pll->post_div;
+		post_div_max = pll->post_div;
+	} else {
+		unsigned target_clock = freq / 10;
+		unsigned vco_min, vco_max;
+
+		if (pll->flags & RADEON_PLL_IS_LCD) {
+			vco_min = pll->lcd_pll_out_min;
+			vco_max = pll->lcd_pll_out_max;
+		} else {
+			vco_min = pll->pll_out_min;
+			vco_max = pll->pll_out_max;
 		}
-		if (frac_fb_div >= 10) {
-			fb_div++;
-			frac_fb_div = 0;
+
+		post_div_min = vco_min / target_clock;
+		if ((target_clock * post_div_min) < vco_min)
+			++post_div_min;
+		if (post_div_min < pll->min_post_div)
+			post_div_min = pll->min_post_div;
+
+		post_div_max = vco_max / target_clock;
+		if ((target_clock * post_div_max) > vco_max)
+			--post_div_max;
+		if (post_div_max > pll->max_post_div)
+			post_div_max = pll->max_post_div;
+	}
+
+	/* represent the searched ratio as fractional number */
+	nom = pll->flags & RADEON_PLL_USE_FRAC_FB_DIV ? freq : freq / 10;
+	den = pll->reference_freq;
+
+	/* reduce the numbers to a simpler ratio */
+	avivo_reduce_ratio(&nom, &den, fb_div_min, post_div_min);
+
+	/* now search for a post divider */
+	if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP)
+		post_div_best = post_div_min;
+	else
+		post_div_best = post_div_max;
+	diff_best = ~0;
+
+	for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
+		unsigned diff = abs(den - den / post_div * post_div);
+		if (diff < diff_best || (diff == diff_best &&
+		    !(pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP))) {
+
+			post_div_best = post_div;
+			diff_best = diff;
 		}
+	}
+	post_div = post_div_best;
+
+	/* get matching reference and feedback divider */
+	ref_div = max(den / post_div, 1u);
+	fb_div = nom;
+
+	/* we're almost done, but reference and feedback
+	   divider might be to large now */
+
+	tmp = ref_div;
+
+        if (fb_div > fb_div_max) {
+		ref_div = ref_div * fb_div_max / fb_div;
+		fb_div = fb_div_max;
+	}
+
+	if (ref_div > ref_div_max) {
+		ref_div = ref_div_max;
+		fb_div = nom * ref_div_max / tmp;
+	}
+
+	/* reduce the numbers to a simpler ratio once more */
+	/* this also makes sure that the reference divider is large enough */
+	avivo_reduce_ratio(&fb_div, &ref_div, fb_div_min, ref_div_min);
+
+	/* and finally save the result */
+	if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) {
+		*fb_div_p = fb_div / 10;
+		*frac_fb_div_p = fb_div % 10;
 	} else {
-		while (ref_div <= pll->max_ref_div) {
-			avivo_get_fb_div(pll, target_clock, post_div, ref_div,
-					 &fb_div, &frac_fb_div);
-			if (frac_fb_div >= (pll->reference_freq / 2))
-				fb_div++;
-			frac_fb_div = 0;
-			tmp = (pll->reference_freq * fb_div) / (post_div * ref_div);
-			tmp = (tmp * 10000) / target_clock;
-
-			if (tmp > (10000 + MAX_TOLERANCE))
-				ref_div++;
-			else if (tmp >= (10000 - MAX_TOLERANCE))
-				break;
-			else
-				ref_div++;
-		}
+		*fb_div_p = fb_div;
+		*frac_fb_div_p = 0;
 	}
 
-	*dot_clock_p = ((pll->reference_freq * fb_div * 10) + (pll->reference_freq * frac_fb_div)) /
-		(ref_div * post_div * 10);
-	*fb_div_p = fb_div;
-	*frac_fb_div_p = frac_fb_div;
+	*dot_clock_p = ((pll->reference_freq * *fb_div_p * 10) +
+			(pll->reference_freq * *frac_fb_div_p)) /
+		       (ref_div * post_div * 10);
 	*ref_div_p = ref_div;
 	*post_div_p = post_div;
-	DRM_DEBUG_KMS("%d, pll dividers - fb: %d.%d ref: %d, post %d\n",
-		      *dot_clock_p, fb_div, frac_fb_div, ref_div, post_div);
+
+	DRM_DEBUG_KMS("%d - %d, pll dividers - fb: %d.%d ref: %d, post %d\n",
+		      freq, *dot_clock_p, *fb_div_p, *frac_fb_div_p,
+		      ref_div, post_div);
 }
 
 /* pre-avivo */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e8b0284e34bb..d0eba48dd74e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -405,11 +405,15 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (radeon_runtime_pm == 0)
-		return -EINVAL;
+	if (radeon_runtime_pm == 0) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
 
-	if (radeon_runtime_pm == -1 && !radeon_is_px())
-		return -EINVAL;
+	if (radeon_runtime_pm == -1 && !radeon_is_px()) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 	drm_kms_helper_poll_disable(drm_dev);
@@ -458,12 +462,15 @@ static int radeon_pmops_runtime_idle(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	struct drm_crtc *crtc;
 
-	if (radeon_runtime_pm == 0)
+	if (radeon_runtime_pm == 0) {
+		pm_runtime_forbid(dev);
 		return -EBUSY;
+	}
 
 	/* are we PX enabled? */
 	if (radeon_runtime_pm == -1 && !radeon_is_px()) {
 		DRM_DEBUG_DRIVER("failing to power off - not px\n");
+		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 1375ff85b08a..19bec0dbfa38 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -104,7 +104,7 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 
 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 {
-	u32 c = 0;
+	u32 c = 0, i;
 
 	rbo->placement.fpfn = 0;
 	rbo->placement.lpfn = 0;
@@ -131,6 +131,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	rbo->placement.num_placement = c;
 	rbo->placement.num_busy_placement = c;
+
+	/*
+	 * Use two-ended allocation depending on the buffer size to
+	 * improve fragmentation quality.
+	 * 512kb was measured as the most optimal number.
+	 */
+	if (rbo->tbo.mem.size > 512 * 1024) {
+		for (i = 0; i < c; i++) {
+			rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN;
+		}
+	}
 }
 
 int radeon_bo_create(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 8b0dfdd23793..f8050f5429e2 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -262,6 +262,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 		r = radeon_ib_test(rdev, i, ring);
 		if (r) {
 			ring->ready = false;
+			rdev->needs_reset = false;
 
 			if (i == RADEON_RING_TYPE_GFX_INDEX) {
 				/* oh, oh, that's really bad */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index fbeabd9a281f..a87edfac111f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -248,7 +248,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 			continue;
 		}
 
-		rcar_du_encoder_init(rcdu, pdata->type, pdata->output, pdata);
+		ret = rcar_du_encoder_init(rcdu, pdata->type, pdata->output,
+					   pdata);
+		if (ret < 0)
+			return ret;
 	}
 
 	/* Set the possible CRTCs and possible clones. There's always at least
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9df79ac7b8f5..4ab9f7171c4f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -412,7 +412,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	int ret;
 
 	spin_lock(&glob->lru_lock);
-	ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
+	ret = __ttm_bo_reserve(bo, false, true, false, 0);
 
 	spin_lock(&bdev->fence_lock);
 	(void) ttm_bo_wait(bo, false, false, true);
@@ -443,7 +443,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 			ttm_bo_add_to_lru(bo);
 		}
 
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 	}
 
 	kref_get(&bo->list_kref);
@@ -494,7 +494,7 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 		sync_obj = driver->sync_obj_ref(bo->sync_obj);
 		spin_unlock(&bdev->fence_lock);
 
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 		spin_unlock(&glob->lru_lock);
 
 		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
@@ -514,7 +514,7 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 			return ret;
 
 		spin_lock(&glob->lru_lock);
-		ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
+		ret = __ttm_bo_reserve(bo, false, true, false, 0);
 
 		/*
 		 * We raced, and lost, someone else holds the reservation now,
@@ -532,7 +532,7 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 		spin_unlock(&bdev->fence_lock);
 
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 		spin_unlock(&glob->lru_lock);
 		return ret;
 	}
@@ -577,11 +577,11 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			kref_get(&nentry->list_kref);
 		}
 
-		ret = ttm_bo_reserve_nolru(entry, false, true, false, 0);
+		ret = __ttm_bo_reserve(entry, false, true, false, 0);
 		if (remove_all && ret) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_bo_reserve_nolru(entry, false, false,
-						   false, 0);
+			ret = __ttm_bo_reserve(entry, false, false,
+					       false, 0);
 			spin_lock(&glob->lru_lock);
 		}
 
@@ -726,7 +726,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 
 	spin_lock(&glob->lru_lock);
 	list_for_each_entry(bo, &man->lru, lru) {
-		ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
+		ret = __ttm_bo_reserve(bo, false, true, false, 0);
 		if (!ret)
 			break;
 	}
@@ -1630,7 +1630,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 
 	spin_lock(&glob->lru_lock);
 	list_for_each_entry(bo, &glob->swap_lru, swap) {
-		ret = ttm_bo_reserve_nolru(bo, false, true, false, 0);
+		ret = __ttm_bo_reserve(bo, false, true, false, 0);
 		if (!ret)
 			break;
 	}
@@ -1697,7 +1697,7 @@ out:
 	 * already swapped buffer.
 	 */
 
-	ww_mutex_unlock(&bo->resv->lock);
+	__ttm_bo_unreserve(bo);
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	return ret;
 }
@@ -1731,10 +1731,10 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
 		return -ERESTARTSYS;
 	if (!ww_mutex_is_locked(&bo->resv->lock))
 		goto out_unlock;
-	ret = ttm_bo_reserve_nolru(bo, true, false, false, NULL);
+	ret = __ttm_bo_reserve(bo, true, false, false, NULL);
 	if (unlikely(ret != 0))
 		goto out_unlock;
-	ww_mutex_unlock(&bo->resv->lock);
+	__ttm_bo_unreserve(bo);
 
 out_unlock:
 	mutex_unlock(&bo->wu_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index c58eba33bd5f..bd850c9f4bca 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -55,6 +55,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
 	struct drm_mm *mm = &rman->mm;
 	struct drm_mm_node *node = NULL;
+	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
 	unsigned long lpfn;
 	int ret;
 
@@ -66,11 +67,15 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	if (!node)
 		return -ENOMEM;
 
+	if (bo->mem.placement & TTM_PL_FLAG_TOPDOWN)
+		aflags = DRM_MM_CREATE_TOP;
+
 	spin_lock(&rman->lock);
-	ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
-					  mem->page_alignment,
+	ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
+					  mem->page_alignment, 0,
 					  placement->fpfn, lpfn,
-					  DRM_MM_SEARCH_BEST);
+					  DRM_MM_SEARCH_BEST,
+					  aflags);
 	spin_unlock(&rman->lock);
 
 	if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 479e9418e3d7..e8dac8758528 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -46,7 +46,7 @@ static void ttm_eu_backoff_reservation_locked(struct list_head *list)
 			ttm_bo_add_to_lru(bo);
 			entry->removed = false;
 		}
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 	}
 }
 
@@ -140,8 +140,8 @@ retry:
 		if (entry->reserved)
 			continue;
 
-		ret = ttm_bo_reserve_nolru(bo, true, (ticket == NULL), true,
-					   ticket);
+		ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true,
+				       ticket);
 
 		if (ret == -EDEADLK) {
 			/* uh oh, we lost out, drop every reservation and try
@@ -224,7 +224,7 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 		entry->old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
 		ttm_bo_add_to_lru(bo);
-		ww_mutex_unlock(&bo->resv->lock);
+		__ttm_bo_unreserve(bo);
 		entry->reserved = false;
 	}
 	spin_unlock(&bdev->fence_lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 701d5207def6..8bb26dcd9eae 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -117,10 +117,10 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 		(void) vmw_context_binding_state_kill
 			(&container_of(res, struct vmw_user_context, res)->cbs);
 		(void) vmw_gb_context_destroy(res);
+		mutex_unlock(&dev_priv->binding_mutex);
 		if (dev_priv->pinned_bo != NULL &&
 		    !dev_priv->query_cid_valid)
 			__vmw_execbuf_release_pinned_bo(dev_priv, NULL);
-		mutex_unlock(&dev_priv->binding_mutex);
 		mutex_unlock(&dev_priv->cmdbuf_mutex);
 		return;
 	}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c7009581bb23..4a223bbea3b3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -171,10 +171,10 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
-		      DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+		      DRM_UNLOCKED | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_SIGNALED,
 		      vmw_fence_obj_signaled_ioctl,
-		      DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+		      DRM_UNLOCKED | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
 		      DRM_UNLOCKED | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_FENCE_EVENT, vmw_fence_event_ioctl,
@@ -205,7 +205,7 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_SYNCCPU,
 		      vmw_user_dmabuf_synccpu_ioctl,
-		      DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+		      DRM_UNLOCKED | DRM_RENDER_ALLOW),
 };
 
 static struct pci_device_id vmw_pci_id_list[] = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 9699bd174ae4..a89ad938eacf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -147,7 +147,7 @@ static int vmw_fb_check_var(struct fb_var_screeninfo *var,
 	}
 
 	if (!vmw_kms_validate_mode_vram(vmw_priv,
-					info->fix.line_length,
+					var->xres * var->bits_per_pixel/8,
 					var->yoffset + var->yres)) {
 		DRM_ERROR("Requested geom can not fit in framebuffer\n");
 		return -EINVAL;
@@ -162,6 +162,8 @@ static int vmw_fb_set_par(struct fb_info *info)
 	struct vmw_private *vmw_priv = par->vmw_priv;
 	int ret;
 
+	info->fix.line_length = info->var.xres * info->var.bits_per_pixel/8;
+
 	ret = vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres,
 				 info->fix.line_length,
 				 par->bpp, par->depth);
@@ -177,6 +179,7 @@ static int vmw_fb_set_par(struct fb_info *info)
 		vmw_write(vmw_priv, SVGA_REG_DISPLAY_POSITION_Y, info->var.yoffset);
 		vmw_write(vmw_priv, SVGA_REG_DISPLAY_WIDTH, info->var.xres);
 		vmw_write(vmw_priv, SVGA_REG_DISPLAY_HEIGHT, info->var.yres);
+		vmw_write(vmw_priv, SVGA_REG_BYTES_PER_LINE, info->fix.line_length);
 		vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID);
 	}
 
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index 89b4d7db1ebd..2a3cea91606d 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -1,6 +1,8 @@
 #ifndef __DRM_GEM_CMA_HELPER_H__
 #define __DRM_GEM_CMA_HELPER_H__
 
+#include <drm/drmP.h>
+
 struct drm_gem_cma_object {
 	struct drm_gem_object base;
 	dma_addr_t paddr;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 8b6981ab3fcf..a24addfdfcec 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -47,8 +47,17 @@
 enum drm_mm_search_flags {
 	DRM_MM_SEARCH_DEFAULT =		0,
 	DRM_MM_SEARCH_BEST =		1 << 0,
+	DRM_MM_SEARCH_BELOW =		1 << 1,
 };
 
+enum drm_mm_allocator_flags {
+	DRM_MM_CREATE_DEFAULT =		0,
+	DRM_MM_CREATE_TOP =		1 << 0,
+};
+
+#define DRM_MM_BOTTOMUP DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT
+#define DRM_MM_TOPDOWN DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP
+
 struct drm_mm_node {
 	struct list_head node_list;
 	struct list_head hole_stack;
@@ -186,6 +195,9 @@ static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
  * Implementation Note:
  * We need to inline list_for_each_entry in order to be able to set hole_start
  * and hole_end on each iteration while keeping the macro sane.
+ *
+ * The __drm_mm_for_each_hole version is similar, but with added support for
+ * going backwards.
  */
 #define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
 	for (entry = list_entry((mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
@@ -195,6 +207,14 @@ static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node)
 	     1 : 0; \
 	     entry = list_entry(entry->hole_stack.next, struct drm_mm_node, hole_stack))
 
+#define __drm_mm_for_each_hole(entry, mm, hole_start, hole_end, backwards) \
+	for (entry = list_entry((backwards) ? (mm)->hole_stack.prev : (mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
+	     &entry->hole_stack != &(mm)->hole_stack ? \
+	     hole_start = drm_mm_hole_node_start(entry), \
+	     hole_end = drm_mm_hole_node_end(entry), \
+	     1 : 0; \
+	     entry = list_entry((backwards) ? entry->hole_stack.prev : entry->hole_stack.next, struct drm_mm_node, hole_stack))
+
 /*
  * Basic range manager support (drm_mm.c)
  */
@@ -205,7 +225,8 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
 			       unsigned long size,
 			       unsigned alignment,
 			       unsigned long color,
-			       enum drm_mm_search_flags flags);
+			       enum drm_mm_search_flags sflags,
+			       enum drm_mm_allocator_flags aflags);
 /**
  * drm_mm_insert_node - search for space and insert @node
  * @mm: drm_mm to allocate from
@@ -228,7 +249,8 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
 				     unsigned alignment,
 				     enum drm_mm_search_flags flags)
 {
-	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags);
+	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
+					  DRM_MM_CREATE_DEFAULT);
 }
 
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
@@ -238,7 +260,8 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
 					unsigned long color,
 					unsigned long start,
 					unsigned long end,
-					enum drm_mm_search_flags flags);
+					enum drm_mm_search_flags sflags,
+					enum drm_mm_allocator_flags aflags);
 /**
  * drm_mm_insert_node_in_range - ranged search for space and insert @node
  * @mm: drm_mm to allocate from
@@ -266,7 +289,8 @@ static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
 					      enum drm_mm_search_flags flags)
 {
 	return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
-						   0, start, end, flags);
+						   0, start, end, flags,
+						   DRM_MM_CREATE_DEFAULT);
 }
 
 void drm_mm_remove_node(struct drm_mm_node *node);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 5d8aabe68f6c..a5183da3ef92 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -788,7 +788,7 @@ extern void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo);
 extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
 
 /**
- * ttm_bo_reserve_nolru:
+ * __ttm_bo_reserve:
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @interruptible: Sleep interruptible if waiting.
@@ -809,10 +809,10 @@ extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
  * -EALREADY: Bo already reserved using @ticket. This error code will only
  * be returned if @use_ticket is set to true.
  */
-static inline int ttm_bo_reserve_nolru(struct ttm_buffer_object *bo,
-				       bool interruptible,
-				       bool no_wait, bool use_ticket,
-				       struct ww_acquire_ctx *ticket)
+static inline int __ttm_bo_reserve(struct ttm_buffer_object *bo,
+				   bool interruptible,
+				   bool no_wait, bool use_ticket,
+				   struct ww_acquire_ctx *ticket)
 {
 	int ret = 0;
 
@@ -888,8 +888,7 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo,
 
 	WARN_ON(!atomic_read(&bo->kref.refcount));
 
-	ret = ttm_bo_reserve_nolru(bo, interruptible, no_wait, use_ticket,
-				    ticket);
+	ret = __ttm_bo_reserve(bo, interruptible, no_wait, use_ticket, ticket);
 	if (likely(ret == 0))
 		ttm_bo_del_sub_from_lru(bo);
 
@@ -929,20 +928,14 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
 }
 
 /**
- * ttm_bo_unreserve_ticket
+ * __ttm_bo_unreserve
  * @bo: A pointer to a struct ttm_buffer_object.
- * @ticket: ww_acquire_ctx used for reserving
  *
- * Unreserve a previous reservation of @bo made with @ticket.
+ * Unreserve a previous reservation of @bo where the buffer object is
+ * already on lru lists.
  */
-static inline void ttm_bo_unreserve_ticket(struct ttm_buffer_object *bo,
-					   struct ww_acquire_ctx *t)
+static inline void __ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
-	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
-		spin_lock(&bo->glob->lru_lock);
-		ttm_bo_add_to_lru(bo);
-		spin_unlock(&bo->glob->lru_lock);
-	}
 	ww_mutex_unlock(&bo->resv->lock);
 }
 
@@ -955,7 +948,25 @@ static inline void ttm_bo_unreserve_ticket(struct ttm_buffer_object *bo,
  */
 static inline void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
-	ttm_bo_unreserve_ticket(bo, NULL);
+	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
+		spin_lock(&bo->glob->lru_lock);
+		ttm_bo_add_to_lru(bo);
+		spin_unlock(&bo->glob->lru_lock);
+	}
+	__ttm_bo_unreserve(bo);
+}
+
+/**
+ * ttm_bo_unreserve_ticket
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @ticket: ww_acquire_ctx used for reserving
+ *
+ * Unreserve a previous reservation of @bo made with @ticket.
+ */
+static inline void ttm_bo_unreserve_ticket(struct ttm_buffer_object *bo,
+					   struct ww_acquire_ctx *t)
+{
+	ttm_bo_unreserve(bo);
 }
 
 /*
diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
index c84ff153a564..8ed44f9bbdfb 100644
--- a/include/drm/ttm/ttm_placement.h
+++ b/include/drm/ttm/ttm_placement.h
@@ -65,6 +65,8 @@
  * reference the buffer.
  * TTM_PL_FLAG_NO_EVICT means that the buffer may never
  * be evicted to make room for other buffers.
+ * TTM_PL_FLAG_TOPDOWN requests to be placed from the
+ * top of the memory area, instead of the bottom.
  */
 
 #define TTM_PL_FLAG_CACHED      (1 << 16)
@@ -72,6 +74,7 @@
 #define TTM_PL_FLAG_WC          (1 << 18)
 #define TTM_PL_FLAG_SHARED      (1 << 20)
 #define TTM_PL_FLAG_NO_EVICT    (1 << 21)
+#define TTM_PL_FLAG_TOPDOWN     (1 << 22)
 
 #define TTM_PL_MASK_CACHING     (TTM_PL_FLAG_CACHED | \
 				 TTM_PL_FLAG_UNCACHED | \
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index d3c62074016d..0664c31f010c 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -50,6 +50,7 @@ struct drm_msm_timespec {
 
 #define MSM_PARAM_GPU_ID     0x01
 #define MSM_PARAM_GMEM_SIZE  0x02
+#define MSM_PARAM_CHIP_ID    0x03
 
 struct drm_msm_param {
 	uint32_t pipe;           /* in, MSM_PIPE_x */
@@ -69,6 +70,12 @@ struct drm_msm_param {
 #define MSM_BO_WC            0x00020000
 #define MSM_BO_UNCACHED      0x00040000
 
+#define MSM_BO_FLAGS         (MSM_BO_SCANOUT | \
+                              MSM_BO_GPU_READONLY | \
+                              MSM_BO_CACHED | \
+                              MSM_BO_WC | \
+                              MSM_BO_UNCACHED)
+
 struct drm_msm_gem_new {
 	uint64_t size;           /* in */
 	uint32_t flags;          /* in, mask of MSM_BO_x */
@@ -85,6 +92,8 @@ struct drm_msm_gem_info {
 #define MSM_PREP_WRITE       0x02
 #define MSM_PREP_NOSYNC      0x04
 
+#define MSM_PREP_FLAGS       (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC)
+
 struct drm_msm_gem_cpu_prep {
 	uint32_t handle;         /* in */
 	uint32_t op;             /* in, mask of MSM_PREP_x */
@@ -152,6 +161,9 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+
 struct drm_msm_gem_submit_bo {
 	uint32_t flags;          /* in, mask of MSM_SUBMIT_BO_x */
 	uint32_t handle;         /* in, GEM handle */