Merge branch 'l4t/l4t-r16-r2' into colibri

Conflicts: arch/arm/mach-tegra/tegra3_usb_phy.c arch/arm/mach-tegra/usb_phy.c drivers/usb/gadget/tegra_udc.c drivers/usb/otg/Makefile drivers/video/tegra/fb.c sound/soc/tegra/tegra_pcm.c
author: Marcel Ziswiler <marcel.ziswiler@toradex.com> 2012-11-12 15:28:39 +0100
committer: Marcel Ziswiler <marcel.ziswiler@toradex.com> 2012-11-12 15:28:39 +0100
commit: f987e832a9e79d2ce8009a5ea9c7b677624b3b30 (patch)
tree: 0dd09a5e6b4c60ee0a9916907dfc2cda83f3e496 /drivers/video
parent: f737b7f46a72c099cf8ac88baff02fbf61b1a47c (diff)
parent: fc993d9bc48f772133d8cd156c67c296477db070 (diff)
39 files changed, 1540 insertions, 555 deletions
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index debd41c9313e..400cf43c5972 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_BACKLIGHT_OMAP1)	+= omap1_bl.o
 obj-$(CONFIG_BACKLIGHT_PROGEAR) += progear_bl.o
 obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_PWM)	+= pwm_bl.o
+CFLAGS_tegra_pwm_bl.o	= -Werror
 obj-$(CONFIG_BACKLIGHT_TEGRA_PWM) += tegra_pwm_bl.o
 obj-$(CONFIG_BACKLIGHT_DA903X)	+= da903x_bl.o
 obj-$(CONFIG_BACKLIGHT_MAX8925)	+= max8925_bl.o
diff --git a/drivers/video/tegra/Kconfig b/drivers/video/tegra/Kconfig
index b5540a5793b2..68cf54264403 100644
--- a/drivers/video/tegra/Kconfig
+++ b/drivers/video/tegra/Kconfig
@@ -119,7 +119,7 @@ config NVMAP_PAGE_POOL_SIZE
 	default 0x0
 
 config NVMAP_CACHE_MAINT_BY_SET_WAYS
-	bool "Enalbe cache maintenance by set/ways"
+	bool "Enable cache maintenance by set/ways"
 	depends on TEGRA_NVMAP
 	help
 	 Say Y here to reduce cache maintenance overhead by MVA.
@@ -127,6 +127,14 @@ config NVMAP_CACHE_MAINT_BY_SET_WAYS
 	 where inner cache includes only L1. For the systems, where inner cache
 	 includes L1 and L2, keep this option disabled.
 
+config NVMAP_OUTER_CACHE_MAINT_BY_SET_WAYS
+	bool "Enable outer cache maintenance by set/ways"
+	depends on TEGRA_NVMAP
+	help
+	  Say Y here if you want to optimize cache maintenance for ranges
+	  bigger than size of outer cache. This option has no effect on
+	  system without outer cache.
+
 config NVMAP_VPR
 	bool "Enable VPR Heap."
 	depends on TEGRA_NVMAP
@@ -135,6 +143,15 @@ config NVMAP_VPR
 	  Say Y here to enable Video Protection Region(VPR) heap.
 	  if unsure, say N.
 
+config NVMAP_FORCE_ZEROED_USER_PAGES
+	bool "Only alloc zeroed pages for user space"
+	depends on TEGRA_NVMAP
+	help
+	  Say Y here to force zeroing of pages allocated for user space. This
+	  avoids leaking kernel secure data to user space. This can add
+	  significant overhead to allocation operations depending on the
+	  allocation size requested.
+
 config TEGRA_DSI
 	bool "Enable DSI panel."
 	default n
diff --git a/drivers/video/tegra/dc/csc.c b/drivers/video/tegra/dc/csc.c
index 74fa900352a1..09db5fee4c6f 100644
--- a/drivers/video/tegra/dc/csc.c
+++ b/drivers/video/tegra/dc/csc.c
@@ -54,10 +54,12 @@ int tegra_dc_update_csc(struct tegra_dc *dc, int win_idx)
 		return -EFAULT;
 	}
 
+	tegra_dc_hold_dc_out(dc);
 	tegra_dc_writel(dc, WINDOW_A_SELECT << win_idx,
 			DC_CMD_DISPLAY_WINDOW_HEADER);
 
 	tegra_dc_set_csc(dc, &dc->windows[win_idx].csc);
+	tegra_dc_release_dc_out(dc);
 
 	mutex_unlock(&dc->lock);
 
diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c
index 1f7e2ce67682..d01df2f520b8 100644
--- a/drivers/video/tegra/dc/dc.c
+++ b/drivers/video/tegra/dc/dc.c
@@ -82,7 +82,7 @@ struct tegra_dc *tegra_dcs[TEGRA_MAX_DC];
 DEFINE_MUTEX(tegra_dc_lock);
 DEFINE_MUTEX(shared_lock);
 
-static inline void tegra_dc_clk_enable(struct tegra_dc *dc)
+void tegra_dc_clk_enable(struct tegra_dc *dc)
 {
 	if (!tegra_is_clk_enabled(dc->clk)) {
 		clk_enable(dc->clk);
@@ -90,7 +90,7 @@ static inline void tegra_dc_clk_enable(struct tegra_dc *dc)
 	}
 }
 
-static inline void tegra_dc_clk_disable(struct tegra_dc *dc)
+void tegra_dc_clk_disable(struct tegra_dc *dc)
 {
 	if (tegra_is_clk_enabled(dc->clk)) {
 		clk_disable(dc->clk);
@@ -98,6 +98,18 @@ static inline void tegra_dc_clk_disable(struct tegra_dc *dc)
 	}
 }
 
+void tegra_dc_hold_dc_out(struct tegra_dc *dc)
+{
+	if (dc->out_ops->hold)
+		dc->out_ops->hold(dc);
+}
+
+void tegra_dc_release_dc_out(struct tegra_dc *dc)
+{
+	if (dc->out_ops->release)
+		dc->out_ops->release(dc);
+}
+
 #define DUMP_REG(a) do {			\
 	snprintf(buff, sizeof(buff), "%-32s\t%03x\t%08lx\n", \
 		 #a, a, tegra_dc_readl(dc, a));		      \
@@ -121,8 +133,9 @@ static void _dump_regs(struct tegra_dc *dc, void *data,
 	int i;
 	char buff[256];
 
+	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	tegra_dc_io_start(dc);
-	tegra_dc_clk_enable(dc);
 
 	DUMP_REG(DC_CMD_DISPLAY_COMMAND_OPTION0);
 	DUMP_REG(DC_CMD_DISPLAY_COMMAND);
@@ -272,8 +285,9 @@ static void _dump_regs(struct tegra_dc *dc, void *data,
 	DUMP_REG(DC_COM_PM1_DUTY_CYCLE);
 	DUMP_REG(DC_DISP_SD_CONTROL);
 
-	tegra_dc_clk_disable(dc);
 	tegra_dc_io_end(dc);
+	tegra_dc_release_dc_out(dc);
+	mutex_unlock(&dc->lock);
 }
 
 #undef DUMP_REG
@@ -488,9 +502,13 @@ int tegra_dc_get_stride(struct tegra_dc *dc, unsigned win)
 	if (!dc->enabled)
 		return 0;
 	BUG_ON(win > DC_N_WINDOWS);
+	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	tegra_dc_writel(dc, WINDOW_A_SELECT << win,
 		DC_CMD_DISPLAY_WINDOW_HEADER);
 	stride = tegra_dc_readl(dc, DC_WIN_LINE_STRIDE);
+	tegra_dc_release_dc_out(dc);
+	mutex_unlock(&dc->lock);
 	return GET_LINE_STRIDE(stride);
 }
 EXPORT_SYMBOL(tegra_dc_get_stride);
@@ -550,17 +568,6 @@ static void tegra_dc_set_scaling_filter(struct tegra_dc *dc)
 	}
 }
 
-void tegra_dc_host_suspend(struct tegra_dc *dc)
-{
-	tegra_dsi_host_suspend(dc);
-	tegra_dc_clk_disable(dc);
-}
-
-void tegra_dc_host_resume(struct tegra_dc *dc) {
-	tegra_dc_clk_enable(dc);
-	tegra_dsi_host_resume(dc);
-}
-
 static inline void disable_dc_irq(unsigned int irq)
 {
 	disable_irq(irq);
@@ -577,9 +584,11 @@ u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc, int i)
 	u32 max;
 
 	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	max = nvhost_syncpt_incr_max_ext(dc->ndev,
 		dc->syncpt[i].id, ((dc->enabled) ? 1 : 0));
 	dc->syncpt[i].max = max;
+	tegra_dc_release_dc_out(dc);
 	mutex_unlock(&dc->lock);
 
 	return max;
@@ -588,11 +597,14 @@ u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc, int i)
 void tegra_dc_incr_syncpt_min(struct tegra_dc *dc, int i, u32 val)
 {
 	mutex_lock(&dc->lock);
-	if ( dc->enabled )
+	if (dc->enabled) {
+		tegra_dc_hold_dc_out(dc);
 		while (dc->syncpt[i].min < val) {
 			dc->syncpt[i].min++;
 			nvhost_syncpt_cpu_incr_ext(dc->ndev, dc->syncpt[i].id);
 		}
+		tegra_dc_release_dc_out(dc);
+	}
 	mutex_unlock(&dc->lock);
 }
 
@@ -609,8 +621,7 @@ tegra_dc_config_pwm(struct tegra_dc *dc, struct tegra_dc_pwm_params *cfg)
 		return;
 	}
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
+	tegra_dc_hold_dc_out(dc);
 
 	ctrl = ((cfg->period << PM_PERIOD_SHIFT) |
 		(cfg->clk_div << PM_CLK_DIVIDER_SHIFT) |
@@ -644,6 +655,7 @@ tegra_dc_config_pwm(struct tegra_dc *dc, struct tegra_dc_pwm_params *cfg)
 		break;
 	}
 	tegra_dc_writel(dc, cmd_state, DC_CMD_STATE_ACCESS);
+	tegra_dc_release_dc_out(dc);
 	mutex_unlock(&dc->lock);
 }
 EXPORT_SYMBOL(tegra_dc_config_pwm);
@@ -790,6 +802,9 @@ EXPORT_SYMBOL(tegra_dc_get_out_max_pixclock);
 void tegra_dc_enable_crc(struct tegra_dc *dc)
 {
 	u32 val;
+
+	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	tegra_dc_io_start(dc);
 
 	val = CRC_ALWAYS_ENABLE | CRC_INPUT_DATA_ACTIVE_DATA |
@@ -797,15 +812,21 @@ void tegra_dc_enable_crc(struct tegra_dc *dc)
 	tegra_dc_writel(dc, val, DC_COM_CRC_CONTROL);
 	tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL);
 	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+	tegra_dc_release_dc_out(dc);
+	mutex_unlock(&dc->lock);
 }
 
 void tegra_dc_disable_crc(struct tegra_dc *dc)
 {
+	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	tegra_dc_writel(dc, 0x0, DC_COM_CRC_CONTROL);
 	tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL);
 	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
 
 	tegra_dc_io_end(dc);
+	tegra_dc_release_dc_out(dc);
+	mutex_unlock(&dc->lock);
 }
 
 u32 tegra_dc_read_checksum_latched(struct tegra_dc *dc)
@@ -821,7 +842,11 @@ u32 tegra_dc_read_checksum_latched(struct tegra_dc *dc)
 	 * DC_COM_CRC_CHECKSUM_LATCHED is available after VBLANK */
 	mdelay(TEGRA_CRC_LATCHED_DELAY);
 
+	mutex_lock(&dc->lock);
+	tegra_dc_hold_dc_out(dc);
 	crc = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM_LATCHED);
+	tegra_dc_release_dc_out(dc);
+	mutex_unlock(&dc->lock);
 crc_error:
 	return crc;
 }
@@ -848,6 +873,36 @@ static inline void enable_dc_irq(unsigned int irq)
 #endif
 }
 
+void tegra_dc_get_fbvblank(struct tegra_dc *dc, struct fb_vblank *vblank)
+{
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		vblank->flags = FB_VBLANK_HAVE_VSYNC;
+}
+
+int tegra_dc_wait_for_vsync(struct tegra_dc *dc)
+{
+	int ret = -ENOTTY;
+
+	if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) || !dc->enabled)
+		return ret;
+
+	/*
+	 * Logic is as follows
+	 * a) Indicate we need a vblank.
+	 * b) Wait for completion to be signalled from isr.
+	 * c) Initialize completion for next iteration.
+	 */
+
+	tegra_dc_hold_dc_out(dc);
+	dc->out->user_needs_vblank = true;
+
+	ret = wait_for_completion_interruptible(&dc->out->user_vblank_comp);
+	init_completion(&dc->out->user_vblank_comp);
+	tegra_dc_release_dc_out(dc);
+
+	return ret;
+}
+
 static void tegra_dc_vblank(struct work_struct *work)
 {
 	struct tegra_dc *dc = container_of(work, struct tegra_dc, vblank_work);
@@ -860,6 +915,7 @@ static void tegra_dc_vblank(struct work_struct *work)
 		return;
 	}
 
+	tegra_dc_hold_dc_out(dc);
 	/* use the new frame's bandwidth setting instead of max(current, new),
 	 * skip this if we're using tegra_dc_one_shot_worker() */
 	if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE))
@@ -886,6 +942,7 @@ static void tegra_dc_vblank(struct work_struct *work)
 	if (!dc->vblank_ref_count)
 		tegra_dc_mask_interrupt(dc, V_BLANK_INT);
 
+	tegra_dc_release_dc_out(dc);
 	mutex_unlock(&dc->lock);
 
 	/* Do the actual brightness update outside of the mutex */
@@ -908,8 +965,8 @@ static void tegra_dc_one_shot_worker(struct work_struct *work)
 	/* memory client has gone idle */
 	tegra_dc_clear_bandwidth(dc);
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_suspend(dc);
+	if (dc->out_ops->idle)
+		dc->out_ops->idle(dc);
 
 	mutex_unlock(&dc->lock);
 }
@@ -962,9 +1019,9 @@ static void tegra_dc_underflow_handler(struct tegra_dc *dc)
 #endif
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
 			if (dc->windows[i].underflows > 4) {
-				printk("%s:dc in underflow state."
+				trace_printk("%s:window %c in underflow state."
 					" enable UF_LINE_FLUSH to clear up\n",
-					__func__);
+					dc->ndev->name, (65 + i));
 				tegra_dc_writel(dc, UF_LINE_FLUSH,
 						DC_DISP_DISP_MISC_CONTROL);
 				tegra_dc_writel(dc, GENERAL_UPDATE,
@@ -996,6 +1053,13 @@ static void tegra_dc_underflow_handler(struct tegra_dc *dc)
 #ifndef CONFIG_TEGRA_FPGA_PLATFORM
 static void tegra_dc_one_shot_irq(struct tegra_dc *dc, unsigned long status)
 {
+	/* pending user vblank, so wakeup */
+	if ((status & (V_BLANK_INT | MSF_INT)) &&
+	    (dc->out->user_needs_vblank)) {
+		dc->out->user_needs_vblank = false;
+		complete(&dc->out->user_vblank_comp);
+	}
+
 	if (status & V_BLANK_INT) {
 		/* Sync up windows. */
 		tegra_dc_trigger_windows(dc);
@@ -1018,6 +1082,10 @@ static void tegra_dc_continuous_irq(struct tegra_dc *dc, unsigned long status)
 		queue_work(system_freezable_wq, &dc->vblank_work);
 
 	if (status & FRAME_END_INT) {
+		struct timespec tm = CURRENT_TIME;
+		dc->frame_end_timestamp = timespec_to_ns(&tm);
+		wake_up(&dc->timestamp_wq);
+
 		/* Mark the frame_end as complete. */
 		if (!completion_done(&dc->frame_end_complete))
 			complete(&dc->frame_end_complete);
@@ -1025,6 +1093,22 @@ static void tegra_dc_continuous_irq(struct tegra_dc *dc, unsigned long status)
 		tegra_dc_trigger_windows(dc);
 	}
 }
+
+/* XXX: Not sure if we limit look ahead to 1 frame */
+bool tegra_dc_is_within_n_vsync(struct tegra_dc *dc, s64 ts)
+{
+	BUG_ON(!dc->frametime_ns);
+	return ((ts - dc->frame_end_timestamp) < dc->frametime_ns);
+}
+
+bool tegra_dc_does_vsync_separate(struct tegra_dc *dc, s64 new_ts, s64 old_ts)
+{
+	BUG_ON(!dc->frametime_ns);
+	return (((new_ts - old_ts) > dc->frametime_ns)
+		|| (div_s64((new_ts - dc->frame_end_timestamp), dc->frametime_ns)
+			!= div_s64((old_ts - dc->frame_end_timestamp),
+				dc->frametime_ns)));
+}
 #endif
 
 static irqreturn_t tegra_dc_irq(int irq, void *ptr)
@@ -1187,6 +1271,7 @@ static u32 get_syncpt(struct tegra_dc *dc, int idx)
 static int tegra_dc_init(struct tegra_dc *dc)
 {
 	int i;
+	int int_enable;
 
 	tegra_dc_writel(dc, 0x00000100, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
 	if (dc->ndev->id == 0) {
@@ -1222,8 +1307,12 @@ static int tegra_dc_init(struct tegra_dc *dc)
 	tegra_dc_writel(dc, 0x00000000, DC_DISP_DISP_MISC_CONTROL);
 #endif
 	/* enable interrupts for vblank, frame_end and underflows */
-	tegra_dc_writel(dc, (FRAME_END_INT | V_BLANK_INT | ALL_UF_INT),
-		DC_CMD_INT_ENABLE);
+	int_enable = (FRAME_END_INT | V_BLANK_INT | ALL_UF_INT);
+	/* for panels with one-shot mode enable tearing effect interrupt */
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		int_enable |= MSF_INT;
+
+	tegra_dc_writel(dc, int_enable, DC_CMD_INT_ENABLE);
 	tegra_dc_writel(dc, ALL_UF_INT, DC_CMD_INT_MASK);
 
 	tegra_dc_writel(dc, 0x00000000, DC_DISP_BORDER_COLOR);
@@ -1272,14 +1361,18 @@ static bool _tegra_dc_controller_enable(struct tegra_dc *dc)
 	tegra_dc_clk_enable(dc);
 
 	/* do not accept interrupts during initialization */
-	tegra_dc_writel(dc, 0, DC_CMD_INT_ENABLE);
 	tegra_dc_writel(dc, 0, DC_CMD_INT_MASK);
 
 	enable_dc_irq(dc->irq);
 
 	failed_init = tegra_dc_init(dc);
 	if (failed_init) {
-		_tegra_dc_controller_disable(dc);
+		tegra_dc_writel(dc, 0, DC_CMD_INT_MASK);
+		disable_irq(dc->irq);
+		tegra_dc_clear_bandwidth(dc);
+		tegra_dc_clk_disable(dc);
+		if (dc->out && dc->out->disable)
+			dc->out->disable();
 		return false;
 	}
 
@@ -1367,19 +1460,14 @@ static bool _tegra_dc_controller_reset_enable(struct tegra_dc *dc)
 
 static int _tegra_dc_set_default_videomode(struct tegra_dc *dc)
 {
-	return tegra_dc_set_fb_mode(dc, &tegra_dc_hdmi_fallback_mode, 0);
-}
-
-static bool _tegra_dc_enable(struct tegra_dc *dc)
-{
 	if (dc->mode.pclk == 0) {
 		switch (dc->out->type) {
 		case TEGRA_DC_OUT_HDMI:
 		/* DC enable called but no videomode is loaded.
 		     Check if HDMI is connected, then set fallback mdoe */
 		if (tegra_dc_hpd(dc)) {
-			if (_tegra_dc_set_default_videomode(dc))
-				return false;
+			return tegra_dc_set_fb_mode(dc,
+					&tegra_dc_hdmi_fallback_mode, 0);
 		} else
 			return false;
 
@@ -1395,12 +1483,24 @@ static bool _tegra_dc_enable(struct tegra_dc *dc)
 		}
 	}
 
+	return false;
+}
+
+static bool _tegra_dc_enable(struct tegra_dc *dc)
+{
+	if (dc->mode.pclk == 0)
+		return false;
+
 	if (!dc->out)
 		return false;
 
 	tegra_dc_io_start(dc);
 
-	return _tegra_dc_controller_enable(dc);
+	if (!_tegra_dc_controller_enable(dc)) {
+		tegra_dc_io_end(dc);
+		return false;
+	}
+	return true;
 }
 
 void tegra_dc_enable(struct tegra_dc *dc)
@@ -1505,8 +1605,20 @@ void tegra_dc_blank(struct tegra_dc *dc)
 
 static void _tegra_dc_disable(struct tegra_dc *dc)
 {
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) {
+		mutex_lock(&dc->one_shot_lock);
+		cancel_delayed_work_sync(&dc->one_shot_work);
+	}
+
+	tegra_dc_hold_dc_out(dc);
+
 	_tegra_dc_controller_disable(dc);
 	tegra_dc_io_end(dc);
+
+	tegra_dc_release_dc_out(dc);
+
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		mutex_unlock(&dc->one_shot_lock);
 }
 
 void tegra_dc_disable(struct tegra_dc *dc)
@@ -1516,16 +1628,9 @@ void tegra_dc_disable(struct tegra_dc *dc)
 	/* it's important that new underflow work isn't scheduled before the
 	 * lock is acquired. */
 	cancel_delayed_work_sync(&dc->underflow_work);
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) {
-		mutex_lock(&dc->one_shot_lock);
-		cancel_delayed_work_sync(&dc->one_shot_work);
-	}
 
 	mutex_lock(&dc->lock);
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
-
 	if (dc->enabled) {
 		dc->enabled = false;
 
@@ -1538,8 +1643,6 @@ void tegra_dc_disable(struct tegra_dc *dc)
 #endif
 
 	mutex_unlock(&dc->lock);
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
-		mutex_unlock(&dc->one_shot_lock);
 	print_mode_info(dc, dc->mode);
 }
 
@@ -1605,12 +1708,12 @@ static void tegra_dc_underflow_worker(struct work_struct *work)
 		to_delayed_work(work), struct tegra_dc, underflow_work);
 
 	mutex_lock(&dc->lock);
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
+	tegra_dc_hold_dc_out(dc);
 
 	if (dc->enabled) {
 		tegra_dc_underflow_handler(dc);
 	}
+	tegra_dc_release_dc_out(dc);
 	mutex_unlock(&dc->lock);
 }
 
@@ -1721,6 +1824,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev,
 	mutex_init(&dc->one_shot_lock);
 	init_completion(&dc->frame_end_complete);
 	init_waitqueue_head(&dc->wq);
+	init_waitqueue_head(&dc->timestamp_wq);
 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
 	INIT_WORK(&dc->reset_work, tegra_dc_reset_worker);
 #endif
@@ -1772,8 +1876,10 @@ static int tegra_dc_probe(struct nvhost_device *ndev,
 	}
 
 	mutex_lock(&dc->lock);
-	if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED)
+	if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED) {
+		_tegra_dc_set_default_videomode(dc);
 		dc->enabled = _tegra_dc_enable(dc);
+	}
 	mutex_unlock(&dc->lock);
 
 	/* interrupt handler must be registered before tegra_fb_register() */
@@ -1789,7 +1895,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev,
 	dev_info(&ndev->dev, "probed\n");
 
 	if (dc->pdata->fb) {
-		if (dc->pdata->fb->bits_per_pixel == -1) {
+		if (dc->enabled && dc->pdata->fb->bits_per_pixel == -1) {
 			unsigned long fmt;
 			tegra_dc_writel(dc,
 					WINDOW_A_SELECT << dc->pdata->fb->win,
@@ -1921,8 +2027,10 @@ static int tegra_dc_resume(struct nvhost_device *ndev)
 	mutex_lock(&dc->lock);
 	dc->suspended = false;
 
-	if (dc->enabled)
+	if (dc->enabled) {
+		_tegra_dc_set_default_videomode(dc);
 		_tegra_dc_enable(dc);
+	}
 
 	if (dc->out && dc->out->hotplug_init)
 		dc->out->hotplug_init();
diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h
index fb1243593587..75c3a2a29658 100644
--- a/drivers/video/tegra/dc/dc_priv.h
+++ b/drivers/video/tegra/dc/dc_priv.h
@@ -75,6 +75,12 @@ struct tegra_dc_out_ops {
 	void (*enable)(struct tegra_dc *dc);
 	/* disable output.  dc clocks are on at this point */
 	void (*disable)(struct tegra_dc *dc);
+	/* hold output.  keeps dc clocks on. */
+	void (*hold)(struct tegra_dc *dc);
+	/* release output.  dc clocks may turn off after this. */
+	void (*release)(struct tegra_dc *dc);
+	/* idle routine of output.  dc clocks may turn off after this. */
+	void (*idle)(struct tegra_dc *dc);
 	/* suspend output.  dc clocks are on at this point */
 	void (*suspend)(struct tegra_dc *dc);
 	/* resume output.  dc clocks are on at this point */
@@ -107,12 +113,14 @@ struct tegra_dc {
 	void				*out_data;
 
 	struct tegra_dc_mode		mode;
+	s64				frametime_ns;
 
 	struct tegra_dc_win		windows[DC_N_WINDOWS];
 	struct tegra_dc_blend		blend;
 	int				n_windows;
 
 	wait_queue_head_t		wq;
+	wait_queue_head_t		timestamp_wq;
 
 	struct mutex			lock;
 	struct mutex			one_shot_lock;
@@ -157,6 +165,7 @@ struct tegra_dc {
 	struct delayed_work		underflow_work;
 	u32				one_shot_delay_ms;
 	struct delayed_work		one_shot_work;
+	s64				frame_end_timestamp;
 };
 
 #define print_mode_info(dc, mode) do {					\
@@ -366,9 +375,17 @@ void tegra_dc_disable_crc(struct tegra_dc *dc);
 void tegra_dc_set_out_pin_polars(struct tegra_dc *dc,
 				const struct tegra_dc_out_pin *pins,
 				const unsigned int n_pins);
-/* defined in dc.c, used in bandwidth.c */
+/* defined in dc.c, used in bandwidth.c and ext/dev.c */
 unsigned int tegra_dc_has_multiple_dc(void);
 
+/* defined in dc.c, used in dsi.c */
+void tegra_dc_clk_enable(struct tegra_dc *dc);
+void tegra_dc_clk_disable(struct tegra_dc *dc);
+
+/* defined in dc.c, used in nvsd.c and dsi.c */
+void tegra_dc_hold_dc_out(struct tegra_dc *dc);
+void tegra_dc_release_dc_out(struct tegra_dc *dc);
+
 /* defined in bandwidth.c, used in dc.c */
 void tegra_dc_clear_bandwidth(struct tegra_dc *dc);
 void tegra_dc_program_bandwidth(struct tegra_dc *dc, bool use_new);
diff --git a/drivers/video/tegra/dc/dc_sysfs.c b/drivers/video/tegra/dc/dc_sysfs.c
index bf27e963f233..09a8e2dbb5b1 100644
--- a/drivers/video/tegra/dc/dc_sysfs.c
+++ b/drivers/video/tegra/dc/dc_sysfs.c
@@ -313,6 +313,14 @@ static ssize_t nvdps_store(struct device *dev,
 
 static DEVICE_ATTR(nvdps, S_IRUGO|S_IWUSR, nvdps_show, nvdps_store);
 
+static ssize_t smart_panel_show(struct device *device,
+	struct device_attribute *attr, char  *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "1\n");
+}
+
+static DEVICE_ATTR(smart_panel, S_IRUGO, smart_panel_show, NULL);
+
 void __devexit tegra_dc_remove_sysfs(struct device *dev)
 {
 	struct nvhost_device *ndev = to_nvhost_device(dev);
@@ -332,6 +340,9 @@ void __devexit tegra_dc_remove_sysfs(struct device *dev)
 
 	if (sd_settings)
 		nvsd_remove_sysfs(dev);
+
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		device_remove_file(dev, &dev_attr_smart_panel);
 }
 
 void tegra_dc_create_sysfs(struct device *dev)
@@ -355,6 +366,9 @@ void tegra_dc_create_sysfs(struct device *dev)
 	if (sd_settings)
 		error |= nvsd_create_sysfs(dev);
 
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		error |= device_create_file(dev, &dev_attr_smart_panel);
+
 	if (error)
 		dev_err(&ndev->dev, "Failed to create sysfs attributes!\n");
 }
diff --git a/drivers/video/tegra/dc/dsi.c b/drivers/video/tegra/dc/dsi.c
index 7ee9375f58f1..d3b1d40d535b 100644
--- a/drivers/video/tegra/dc/dsi.c
+++ b/drivers/video/tegra/dc/dsi.c
@@ -79,6 +79,8 @@
 #define DSI_LP_OP_WRITE			0x1
 #define DSI_LP_OP_READ			0x2
 
+#define DSI_HOST_IDLE_PERIOD		1000
+
 static bool enable_read_debug;
 module_param(enable_read_debug, bool, 0644);
 MODULE_PARM_DESC(enable_read_debug,
@@ -129,6 +131,11 @@ struct tegra_dc_dsi_data {
 	bool ulpm;
 	bool enabled;
 	bool host_suspended;
+	struct mutex host_resume_lock;
+	struct delayed_work idle_work;
+	unsigned long idle_delay;
+	spinlock_t host_ref_lock;
+	u8 host_ref;
 
 	u8 driven_mode;
 	u8 controller_index;
@@ -294,6 +301,10 @@ const u32 init_reg[] = {
 	DSI_PKT_LEN_6_7,
 };
 
+static int tegra_dsi_host_suspend(struct tegra_dc *dc);
+static int tegra_dsi_host_resume(struct tegra_dc *dc);
+static void tegra_dc_dsi_idle_work(struct work_struct *work);
+
 inline unsigned long tegra_dsi_readl(struct tegra_dc_dsi_data *dsi, u32 reg)
 {
 	unsigned long ret;
@@ -331,6 +342,7 @@ static int dbg_dsi_show(struct seq_file *s, void *unused)
 	DUMP_REG(DSI_CTXSW);
 	DUMP_REG(DSI_POWER_CONTROL);
 	DUMP_REG(DSI_INT_ENABLE);
+	DUMP_REG(DSI_HOST_DSI_CONTROL);
 	DUMP_REG(DSI_CONTROL);
 	DUMP_REG(DSI_SOL_DELAY);
 	DUMP_REG(DSI_MAX_THRESHOLD);
@@ -650,6 +662,13 @@ static void tegra_dsi_init_sw(struct tegra_dc *dc,
 		dsi->info.video_clock_mode = TEGRA_DSI_VIDEO_CLOCK_CONTINUOUS;
 	}
 
+	dsi->host_ref = 0;
+	dsi->host_suspended = false;
+	spin_lock_init(&dsi->host_ref_lock);
+	mutex_init(&dsi->host_resume_lock);
+	init_completion(&dc->out->user_vblank_comp);
+	INIT_DELAYED_WORK(&dsi->idle_work, tegra_dc_dsi_idle_work);
+	dsi->idle_delay = msecs_to_jiffies(DSI_HOST_IDLE_PERIOD);
 }
 
 #define SELECT_T_PHY(platform_t_phy_ns, default_phy, clk_ns, hw_inc) ( \
@@ -1341,9 +1360,38 @@ static void tegra_dsi_reset_underflow_overflow
 	}
 }
 
+static void tegra_dsi_soft_reset(struct tegra_dc_dsi_data *dsi)
+{
+	u32 trigger;
+
+	tegra_dsi_writel(dsi,
+		DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_DISABLE),
+		DSI_POWER_CONTROL);
+	/* stabilization delay */
+	udelay(300);
+
+	tegra_dsi_writel(dsi,
+		DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_ENABLE),
+		DSI_POWER_CONTROL);
+	/* stabilization delay */
+	udelay(300);
+
+	/* dsi HW does not clear host trigger bit automatically
+	 * on dsi interface disable if host fifo is empty or in mid
+	 * of host transmission
+	 */
+	trigger = tegra_dsi_readl(dsi, DSI_TRIGGER);
+	if (trigger)
+		tegra_dsi_writel(dsi, 0x0, DSI_TRIGGER);
+}
+
 static void tegra_dsi_stop_dc_stream(struct tegra_dc *dc,
 					struct tegra_dc_dsi_data *dsi)
 {
+	/* Mask the MSF interrupt. */
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+		tegra_dc_mask_interrupt(dc, MSF_INT);
+
 	tegra_dc_writel(dc, DISP_CTRL_MODE_STOP, DC_CMD_DISPLAY_COMMAND);
 	tegra_dc_writel(dc, 0, DC_DISP_DISP_WIN_OPTIONS);
 	tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL);
@@ -1359,13 +1407,13 @@ static void tegra_dsi_stop_dc_stream_at_frame_end(struct tegra_dc *dc,
 	long timeout;
 	u32 frame_period = DIV_ROUND_UP(S_TO_MS(1), dsi->info.refresh_rate);
 
-	/* stop dc */
-	tegra_dsi_stop_dc_stream(dc, dsi);
+	INIT_COMPLETION(dc->frame_end_complete);
 
-	/* enable frame end interrupt */
+	/* unmask frame end interrupt */
 	val = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	val |= FRAME_END_INT;
-	tegra_dc_writel(dc, val, DC_CMD_INT_MASK);
+	tegra_dc_writel(dc, val | FRAME_END_INT, DC_CMD_INT_MASK);
+
+	tegra_dsi_stop_dc_stream(dc, dsi);
 
 	/* wait for frame_end completion.
 	 * timeout is 2 frame duration to accomodate for
@@ -1375,9 +1423,14 @@ static void tegra_dsi_stop_dc_stream_at_frame_end(struct tegra_dc *dc,
 			&dc->frame_end_complete,
 			msecs_to_jiffies(2 * frame_period));
 
-	/* disable frame end interrupt */
-	val = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	val &= ~FRAME_END_INT;
+	/* give 2 line time to dsi HW to catch up
+	 * with pixels sent by dc
+	 */
+	udelay(50);
+
+	tegra_dsi_soft_reset(dsi);
+
+	/* reinstate interrupt mask */
 	tegra_dc_writel(dc, val, DC_CMD_INT_MASK);
 
 	if (timeout == 0)
@@ -1419,6 +1472,9 @@ static void tegra_dsi_start_dc_stream(struct tegra_dc *dc,
 		tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL);
 		tegra_dc_writel(dc, GENERAL_ACT_REQ | NC_HOST_TRIG,
 						DC_CMD_STATE_CONTROL);
+
+		/* Unmask the MSF interrupt. */
+		tegra_dc_unmask_interrupt(dc, MSF_INT);
 	} else {
 		/* set continuous mode */
 		tegra_dc_writel(dc, DISP_CTRL_MODE_C_DISPLAY,
@@ -1559,7 +1615,8 @@ static void tegra_dsi_set_control_reg_lp(struct tegra_dc_dsi_data *dsi)
 	dsi->status.vtype = DSI_VIDEO_TYPE_NOT_INIT;
 }
 
-static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi)
+static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi,
+						u8 driven_mode)
 {
 	u32 dsi_control;
 	u32 host_dsi_control;
@@ -1571,7 +1628,7 @@ static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi)
 	max_threshold = 0;
 	dcs_cmd = 0;
 
-	if (dsi->driven_mode == TEGRA_DSI_DRIVEN_BY_HOST) {
+	if (driven_mode == TEGRA_DSI_DRIVEN_BY_HOST) {
 		dsi_control |= DSI_CTRL_HOST_DRIVEN;
 		host_dsi_control |= HOST_DSI_CTRL_HOST_DRIVEN;
 		max_threshold =
@@ -1583,17 +1640,19 @@ static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi)
 		max_threshold =
 			DSI_MAX_THRESHOLD_MAX_THRESHOLD(DSI_VIDEO_FIFO_DEPTH);
 		dsi->status.driven = DSI_DRIVEN_MODE_DC;
-	}
 
-	if (dsi->info.video_data_type == TEGRA_DSI_VIDEO_TYPE_COMMAND_MODE) {
-		dsi_control |= DSI_CTRL_CMD_MODE;
-		dcs_cmd = DSI_DCS_CMDS_LT5_DCS_CMD(DSI_WRITE_MEMORY_START)|
-			DSI_DCS_CMDS_LT3_DCS_CMD(DSI_WRITE_MEMORY_CONTINUE);
-		dsi->status.vtype = DSI_VIDEO_TYPE_CMD_MODE;
-
-	} else {
-		dsi_control |= DSI_CTRL_VIDEO_MODE;
-		dsi->status.vtype = DSI_VIDEO_TYPE_VIDEO_MODE;
+		if (dsi->info.video_data_type ==
+			TEGRA_DSI_VIDEO_TYPE_COMMAND_MODE) {
+			dsi_control |= DSI_CTRL_CMD_MODE;
+			dcs_cmd = DSI_DCS_CMDS_LT5_DCS_CMD(
+				DSI_WRITE_MEMORY_START)|
+				DSI_DCS_CMDS_LT3_DCS_CMD(
+				DSI_WRITE_MEMORY_CONTINUE);
+			dsi->status.vtype = DSI_VIDEO_TYPE_CMD_MODE;
+		} else {
+			dsi_control |= DSI_CTRL_VIDEO_MODE;
+			dsi->status.vtype = DSI_VIDEO_TYPE_VIDEO_MODE;
+		}
 	}
 
 	tegra_dsi_writel(dsi, max_threshold, DSI_MAX_THRESHOLD);
@@ -1734,6 +1793,7 @@ static int tegra_dsi_set_to_lp_mode(struct tegra_dc *dc,
 
 	dsi->status.lphs = DSI_LPHS_IN_LP_MODE;
 	dsi->status.lp_op = lp_op;
+	dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_HOST;
 success:
 	err = 0;
 fail:
@@ -1741,7 +1801,8 @@ fail:
 }
 
 static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc,
-					struct tegra_dc_dsi_data *dsi)
+					struct tegra_dc_dsi_data *dsi,
+					u8 driven_mode)
 {
 	int err;
 
@@ -1750,9 +1811,12 @@ static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc,
 		goto fail;
 	}
 
-	if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE)
+	if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE &&
+		dsi->driven_mode == driven_mode)
 		goto success;
 
+	dsi->driven_mode = driven_mode;
+
 	if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE)
 		tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi);
 
@@ -1767,14 +1831,14 @@ static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc,
 
 	tegra_dsi_set_phy_timing(dsi, DSI_LPHS_IN_HS_MODE);
 
-	if (dsi->driven_mode == TEGRA_DSI_DRIVEN_BY_DC) {
+	if (driven_mode == TEGRA_DSI_DRIVEN_BY_DC) {
 		tegra_dsi_set_pkt_seq(dc, dsi);
 		tegra_dsi_set_pkt_length(dc, dsi);
 		tegra_dsi_set_sol_delay(dc, dsi);
 		tegra_dsi_set_dc_clk(dc, dsi);
 	}
 
-	tegra_dsi_set_control_reg_hs(dsi);
+	tegra_dsi_set_control_reg_hs(dsi, driven_mode);
 
 	if (dsi->status.clk_out == DSI_PHYCLK_OUT_DIS ||
 		dsi->info.enable_hs_clock_on_lp_cmd_mode)
@@ -1845,35 +1909,6 @@ fail:
 	return (err < 0 ? true : false);
 }
 
-static void tegra_dsi_soft_reset(struct tegra_dc_dsi_data *dsi)
-{
-	u32 trigger;
-	u32 status;
-
-	tegra_dsi_writel(dsi,
-		DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_DISABLE),
-		DSI_POWER_CONTROL);
-	/* stabilization delay */
-	udelay(300);
-
-	tegra_dsi_writel(dsi,
-		DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_ENABLE),
-		DSI_POWER_CONTROL);
-	/* stabilization delay */
-	udelay(300);
-
-	/* dsi HW does not clear host trigger bit automatically
-	 * on dsi interface disable if host fifo is empty
-	 */
-	trigger = tegra_dsi_readl(dsi, DSI_TRIGGER);
-	status = tegra_dsi_readl(dsi, DSI_STATUS);
-	if (trigger & DSI_TRIGGER_HOST_TRIGGER(0x1) &&
-		status & DSI_STATUS_IDLE(0x1)) {
-		trigger &= ~(DSI_TRIGGER_HOST_TRIGGER(0x1));
-		tegra_dsi_writel(dsi, trigger, DSI_TRIGGER);
-	}
-}
-
 static void tegra_dsi_reset_read_count(struct tegra_dc_dsi_data *dsi)
 {
 	u32 val;
@@ -1892,49 +1927,42 @@ static struct dsi_status *tegra_dsi_save_state_switch_to_host_cmd_mode(
 						struct tegra_dc *dc,
 						u8 lp_op)
 {
-	struct dsi_status *init_status;
+	struct dsi_status *init_status = NULL;
 	int err;
 
+	if (dsi->status.init != DSI_MODULE_INIT ||
+		dsi->status.lphs == DSI_LPHS_NOT_INIT) {
+		err = -EPERM;
+		goto fail;
+	}
+
 	init_status = kzalloc(sizeof(*init_status), GFP_KERNEL);
 	if (!init_status)
 		return ERR_PTR(-ENOMEM);
 
 	*init_status = dsi->status;
 
-	if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE) {
-		if (dsi->status.driven == DSI_DRIVEN_MODE_DC) {
-			if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE)
-				tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi);
-			dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_HOST;
-			if (dsi->info.hs_cmd_mode_supported) {
-				err = tegra_dsi_set_to_hs_mode(dc, dsi);
-				if (err < 0) {
-					dev_err(&dc->ndev->dev,
-					"Switch to HS host mode failed\n");
-					goto fail;
-				}
-			}
-		}
-		if (!dsi->info.hs_cmd_mode_supported) {
-			err =
-			tegra_dsi_set_to_lp_mode(dc, dsi, lp_op);
-			if (err < 0) {
-				dev_err(&dc->ndev->dev,
-				"DSI failed to go to LP mode\n");
-				goto fail;
-			}
-		}
-	} else if (dsi->status.lphs == DSI_LPHS_IN_LP_MODE) {
-		if (dsi->status.lp_op != lp_op) {
-			err = tegra_dsi_set_to_lp_mode(dc, dsi, lp_op);
-			if (err < 0) {
-				dev_err(&dc->ndev->dev,
-				"DSI failed to go to LP mode\n");
-				goto fail;
-			}
+	if (dsi->info.hs_cmd_mode_supported) {
+		err = tegra_dsi_set_to_hs_mode(dc, dsi,
+				TEGRA_DSI_DRIVEN_BY_HOST);
+		if (err < 0) {
+			dev_err(&dc->ndev->dev,
+			"Switch to HS host mode failed\n");
+			goto fail;
 		}
+
+		goto success;
 	}
 
+	if (dsi->status.lp_op != lp_op) {
+		err = tegra_dsi_set_to_lp_mode(dc, dsi, lp_op);
+		if (err < 0) {
+			dev_err(&dc->ndev->dev,
+			"DSI failed to go to LP mode\n");
+			goto fail;
+		}
+	}
+success:
 	return init_status;
 fail:
 	kfree(init_status);
@@ -1948,6 +1976,7 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission(
 {
 	int err = 0;
 	struct dsi_status *init_status;
+	bool restart_dc_stream = false;
 
 	if (dsi->status.init != DSI_MODULE_INIT ||
 		dsi->ulpm) {
@@ -1955,12 +1984,13 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission(
 		goto fail;
 	}
 
+	if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE) {
+		restart_dc_stream = true;
+		tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi);
+	}
+
 	if (tegra_dsi_host_busy(dsi)) {
 		tegra_dsi_soft_reset(dsi);
-
-		/* WAR to stop host write in middle */
-		tegra_dsi_writel(dsi, TEGRA_DSI_DISABLE, DSI_TRIGGER);
-
 		if (tegra_dsi_host_busy(dsi)) {
 			err = -EBUSY;
 			dev_err(&dc->ndev->dev, "DSI host busy\n");
@@ -1987,6 +2017,9 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission(
 		goto fail;
 	}
 
+	if (restart_dc_stream)
+		init_status->dc_stream = DSI_DC_STREAM_ENABLE;
+
 	return init_status;
 fail:
 	return ERR_PTR(err);
@@ -1996,50 +2029,30 @@ static int tegra_dsi_restore_state(struct tegra_dc *dc,
 				struct tegra_dc_dsi_data *dsi,
 				struct dsi_status *init_status)
 {
-	bool switch_back_to_dc_mode = false;
-	bool switch_back_to_hs_mode = false;
-	bool restart_dc_stream;
 	int err = 0;
 
-	switch_back_to_dc_mode = (dsi->status.driven ==
-				DSI_DRIVEN_MODE_HOST &&
-				init_status->driven ==
-				DSI_DRIVEN_MODE_DC);
-	switch_back_to_hs_mode = (dsi->status.lphs ==
-				DSI_LPHS_IN_LP_MODE &&
-				init_status->lphs ==
-				DSI_LPHS_IN_HS_MODE);
-	restart_dc_stream = (dsi->status.dc_stream ==
-				DSI_DC_STREAM_DISABLE &&
-				init_status->dc_stream ==
-				DSI_DC_STREAM_ENABLE);
-
-	if (dsi->status.lphs == DSI_LPHS_IN_LP_MODE &&
-		init_status->lphs == DSI_LPHS_IN_LP_MODE) {
-		if (dsi->status.lp_op != init_status->lp_op) {
-			err =
-			tegra_dsi_set_to_lp_mode(dc, dsi, init_status->lp_op);
-			if (err < 0) {
-				dev_err(&dc->ndev->dev,
-					"Failed to config LP mode\n");
-				goto fail;
-			}
+	if (init_status->lphs == DSI_LPHS_IN_LP_MODE) {
+		err = tegra_dsi_set_to_lp_mode(dc, dsi, init_status->lp_op);
+		if (err < 0) {
+			dev_err(&dc->ndev->dev,
+				"Failed to config LP mode\n");
+			goto fail;
 		}
 		goto success;
 	}
 
-	if (switch_back_to_dc_mode)
-		dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_DC;
-	if (switch_back_to_dc_mode || switch_back_to_hs_mode) {
-		err = tegra_dsi_set_to_hs_mode(dc, dsi);
+	if (init_status->lphs == DSI_LPHS_IN_HS_MODE) {
+		u8 driven = (init_status->driven == DSI_DRIVEN_MODE_DC) ?
+			TEGRA_DSI_DRIVEN_BY_DC : TEGRA_DSI_DRIVEN_BY_HOST;
+		err = tegra_dsi_set_to_hs_mode(dc, dsi, driven);
 		if (err < 0) {
 			dev_err(&dc->ndev->dev, "Failed to config HS mode\n");
 			goto fail;
 		}
 	}
-	if (restart_dc_stream)
-		tegra_dsi_start_dc_stream(dc, dsi);
 
+	if (init_status->dc_stream == DSI_DC_STREAM_ENABLE)
+		tegra_dsi_start_dc_stream(dc, dsi);
 success:
 fail:
 	kfree(init_status);
@@ -2081,7 +2094,6 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi,
 					u8 *pdata, u8 data_id, u16 data_len)
 {
 	u8 virtual_channel;
-	u8 *pval;
 	u32 val;
 	int err;
 
@@ -2104,10 +2116,9 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi,
 				pdata += 4;
 			} else {
 				val = 0;
-				pval = (u8 *) &val;
-				do
-					*pval++ = *pdata++;
-				while (--data_len);
+				memcpy(&val, pdata, data_len);
+				pdata += data_len;
+				data_len = 0;
 			}
 			tegra_dsi_writel(dsi, val, DSI_WR_DATA);
 		}
@@ -2120,6 +2131,49 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi,
 	return err;
 }
 
+static void tegra_dc_dsi_hold_host(struct tegra_dc *dc)
+{
+	struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc);
+
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) {
+		spin_lock(&dsi->host_ref_lock);
+		dsi->host_ref++;
+		spin_unlock(&dsi->host_ref_lock);
+		tegra_dsi_host_resume(dc);
+
+		/*
+		 * Take an extra refrence to count for the clk_disable in
+		 * tegra_dc_release_host.
+		 */
+		clk_enable(dc->clk);
+	}
+}
+
+static void tegra_dc_dsi_release_host(struct tegra_dc *dc)
+{
+	struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc);
+	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) {
+		clk_disable(dc->clk);
+		spin_lock(&dsi->host_ref_lock);
+		dsi->host_ref--;
+
+		if (!dsi->host_ref &&
+		    (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE))
+			schedule_delayed_work(&dsi->idle_work, dsi->idle_delay);
+
+		spin_unlock(&dsi->host_ref_lock);
+	}
+}
+
+static void tegra_dc_dsi_idle_work(struct work_struct *work)
+{
+	struct tegra_dc_dsi_data *dsi = container_of(
+		to_delayed_work(work), struct tegra_dc_dsi_data, idle_work);
+
+	if (dsi->dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
+		tegra_dsi_host_suspend(dsi->dc);
+}
+
 int tegra_dsi_write_data(struct tegra_dc *dc,
 			struct tegra_dc_dsi_data *dsi,
 			u8 *pdata, u8 data_id, u16 data_len)
@@ -2143,6 +2197,7 @@ fail:
 	if (err < 0)
 		dev_err(&dc->ndev->dev, "Failed to restore prev state\n");
 	tegra_dc_io_end(dc);
+
 	return err;
 }
 EXPORT_SYMBOL(tegra_dsi_write_data);
@@ -2174,7 +2229,7 @@ static int tegra_dsi_send_panel_cmd(struct tegra_dc *dc,
 	return err;
 }
 
-static u8 get_8bit_ecc(u32 header)
+static u8 tegra_dsi_ecc(u32 header)
 {
 	char ecc_parity[24] = {
 		0x07, 0x0b, 0x0d, 0x0e, 0x13, 0x15, 0x16, 0x19,
@@ -2191,75 +2246,158 @@ static u8 get_8bit_ecc(u32 header)
 	return ecc_byte;
 }
 
-/* This function is written to send DCS short write (1 parameter) only.
- * This means the cmd will contain only 1 byte of index and 1 byte of value.
- * The data type ID is fixed at 0x15 and the ECC is calculated based on the
- * data in pdata.
- * The command will be sent by hardware every frame.
- * pdata should contain both the index + value for each cmd.
- * data_len will be the total number of bytes in pdata.
- */
-int tegra_dsi_send_panel_short_cmd(struct tegra_dc *dc, u8 *pdata, u8 data_len)
+static u16 tegra_dsi_cs(char *pdata, u16 data_len)
 {
-	u8 ecc8bits = 0, data_len_orig = 0;
-	u32 val = 0, pkthdr = 0;
-	int err = 0, count = 0;
-	struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc);
+	u16 byte_cnt;
+	u8 bit_cnt;
+	char curr_byte;
+	u16 crc = 0xFFFF;
+	u16 poly = 0x8408;
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
+	if (data_len > 0) {
+		for (byte_cnt = 0; byte_cnt < data_len; byte_cnt++) {
+			curr_byte = pdata[byte_cnt];
+			for (bit_cnt = 0; bit_cnt < 8; bit_cnt++) {
+				if (((crc & 0x0001 ) ^
+					(curr_byte & 0x0001)) > 0)
+					crc = ((crc >> 1) & 0x7FFF) ^ poly;
+				else
+					crc = (crc >> 1) & 0x7FFF;
 
-	data_len_orig = data_len;
-	if (pdata != NULL) {
-		while (data_len) {
-			if (data_len >= 2) {
-				pkthdr = (CMD_SHORTW |
-					(((u16 *)pdata)[0]) << 8 | 0x00 << 24);
-				ecc8bits = get_8bit_ecc(pkthdr);
-				val = (pkthdr | (ecc8bits << 24));
-				data_len -= 2;
-				pdata += 2;
-				count++;
+				curr_byte = (curr_byte >> 1 ) & 0x7F;
 			}
-			switch (count) {
-			case 1:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0);
-				break;
-			case 2:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_1);
-				break;
-			case 3:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_2);
-				break;
-			case 4:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_3);
-				break;
-			case 5:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_4);
-				break;
-			case 6:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_5);
-				break;
-			case 7:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_6);
-				break;
-			case 8:
-				tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_7);
-				break;
-			default:
-				err = 1;
-				break;
+		}
+	}
+	return crc;
+}
+
+static int tegra_dsi_dcs_pkt_seq_ctrl_init(struct tegra_dc_dsi_data *dsi,
+						struct tegra_dsi_cmd *cmd)
+{
+	u8 virtual_channel;
+	u32 val;
+	u16 data_len = cmd->sp_len_dly.data_len;
+	u8 seq_ctrl_reg = 0;
+
+	virtual_channel = dsi->info.virtual_channel <<
+				DSI_VIR_CHANNEL_BIT_POSITION;
+
+	val = (virtual_channel | cmd->data_id) << 0 |
+		data_len << 8;
+
+	val |= tegra_dsi_ecc(val) << 24;
+
+	tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0 + seq_ctrl_reg++);
+
+	/* if pdata != NULL, pkt type is long pkt */
+	if (cmd->pdata != NULL) {
+		u8 *pdata;
+		u8 *pdata_mem;
+		/*  allocate memory for pdata + 2 bytes checksum */
+		pdata_mem = kzalloc(sizeof(u8) * data_len + 2, GFP_KERNEL);
+		if (!pdata_mem) {
+			dev_err(&dsi->dc->ndev->dev, "dsi: memory err\n");
+			tegra_dsi_soft_reset(dsi);
+			return -ENOMEM;
+		}
+
+		memcpy(pdata_mem, cmd->pdata, data_len);
+		pdata = pdata_mem;
+		*((u16 *)(pdata + data_len)) = tegra_dsi_cs(pdata, data_len);
+
+		/* data_len = length of pdata + 2 byte checksum */
+		data_len += 2;
+
+		while (data_len) {
+			if (data_len >= 4) {
+				val = ((u32 *) pdata)[0];
+				data_len -= 4;
+				pdata += 4;
+			} else {
+				val = 0;
+				memcpy(&val, pdata, data_len);
+				pdata += data_len;
+				data_len = 0;
 			}
+			tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0 +
+							seq_ctrl_reg++);
 		}
+		kfree(pdata_mem);
 	}
 
-	val = DSI_INIT_SEQ_CONTROL_DSI_FRAME_INIT_BYTE_COUNT(data_len_orig * 2)
-		| DSI_INIT_SEQ_CONTROL_DSI_SEND_INIT_SEQUENCE(1);
+	return 0;
+}
+
+int tegra_dsi_start_host_cmd_v_blank_dcs(struct tegra_dc_dsi_data * dsi,
+						struct tegra_dsi_cmd *cmd)
+{
+#define PKT_HEADER_LEN_BYTE	4
+#define CHECKSUM_LEN_BYTE	2
+
+	int err = 0;
+	u32 val;
+	u16 tot_pkt_len = PKT_HEADER_LEN_BYTE;
+	struct tegra_dc *dc = dsi->dc;
+
+	if (cmd->cmd_type != TEGRA_DSI_PACKET_CMD)
+		return -EINVAL;
+
+	mutex_lock(&dsi->lock);
+	tegra_dc_dsi_hold_host(dc);
+
+	tegra_dc_io_start(dc);
+
+
+	err = tegra_dsi_dcs_pkt_seq_ctrl_init(dsi, cmd);
+	if (err < 0) {
+		dev_err(&dsi->dc->ndev->dev,
+			"dsi: dcs pkt seq ctrl init failed\n");
+		goto fail;
+	}
+
+	if (cmd->pdata) {
+		u16 data_len = cmd->sp_len_dly.data_len;
+		tot_pkt_len += data_len + CHECKSUM_LEN_BYTE;
+	}
+
+	val = DSI_INIT_SEQ_CONTROL_DSI_FRAME_INIT_BYTE_COUNT(tot_pkt_len) |
+		DSI_INIT_SEQ_CONTROL_DSI_SEND_INIT_SEQUENCE(
+						TEGRA_DSI_ENABLE);
 	tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_CONTROL);
 
+fail:
+	tegra_dc_io_end(dc);
+	tegra_dc_dsi_release_host(dc);
+	mutex_unlock(&dsi->lock);
 	return err;
+
+#undef PKT_HEADER_LEN_BYTE
+#undef CHECKSUM_LEN_BYTE
 }
-EXPORT_SYMBOL(tegra_dsi_send_panel_short_cmd);
+EXPORT_SYMBOL(tegra_dsi_start_host_cmd_v_blank_dcs);
+
+void tegra_dsi_stop_host_cmd_v_blank_dcs(struct tegra_dc_dsi_data * dsi)
+{
+	struct tegra_dc *dc = dsi->dc;
+	u32 cnt;
+
+	mutex_lock(&dsi->lock);
+	tegra_dc_dsi_hold_host(dc);
+
+	tegra_dc_io_start(dc);
+
+	tegra_dsi_writel(dsi, TEGRA_DSI_DISABLE, DSI_INIT_SEQ_CONTROL);
+
+	/* clear seq data registers */
+	for (cnt = 0; cnt < 8; cnt++)
+		tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_0 + cnt);
+
+	tegra_dc_io_end(dc);
+
+	tegra_dc_dsi_release_host(dc);
+	mutex_unlock(&dsi->lock);
+}
+EXPORT_SYMBOL(tegra_dsi_stop_host_cmd_v_blank_dcs);
 
 static int tegra_dsi_bta(struct tegra_dc_dsi_data *dsi)
 {
@@ -2429,6 +2567,7 @@ int tegra_dsi_read_data(struct tegra_dc *dc,
 	int err = 0;
 	struct dsi_status *init_status;
 
+	mutex_lock(&dsi->lock);
 	tegra_dc_io_start(dc);
 
 	init_status = tegra_dsi_prepare_host_transmission(
@@ -2487,6 +2626,7 @@ fail:
 	if (err < 0)
 		dev_err(&dc->ndev->dev, "Failed to restore prev state\n");
 	tegra_dc_io_end(dc);
+	mutex_unlock(&dsi->lock);
 	return err;
 }
 EXPORT_SYMBOL(tegra_dsi_read_data);
@@ -2625,7 +2765,8 @@ static void tegra_dsi_send_dc_frames(struct tegra_dc *dc,
 	bool switch_to_lp = (dsi->status.lphs == DSI_LPHS_IN_LP_MODE);
 
 	if (dsi->status.lphs != DSI_LPHS_IN_HS_MODE) {
-		err = tegra_dsi_set_to_hs_mode(dc, dsi);
+		err = tegra_dsi_set_to_hs_mode(dc, dsi,
+				TEGRA_DSI_DRIVEN_BY_DC);
 		if (err < 0) {
 			dev_err(&dc->ndev->dev,
 				"Switch to HS host mode failed\n");
@@ -2668,11 +2809,10 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc)
 	int err;
 	u32 val;
 
-	tegra_dc_io_start(dc);
 	mutex_lock(&dsi->lock);
+	tegra_dc_dsi_hold_host(dc);
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
+	tegra_dc_io_start(dc);
 	/* Stop DC stream before configuring DSI registers
 	 * to avoid visible glitches on panel during transition
 	 * from bootloader to kernel driver
@@ -2771,7 +2911,8 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc)
 			goto fail;
 		}
 
-		err = tegra_dsi_set_to_hs_mode(dc, dsi);
+		err = tegra_dsi_set_to_hs_mode(dc, dsi,
+				TEGRA_DSI_DRIVEN_BY_DC);
 		if (err < 0) {
 			dev_err(&dc->ndev->dev,
 				"dsi: not able to set to hs mode\n");
@@ -2784,8 +2925,9 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc)
 	if (dsi->status.driven == DSI_DRIVEN_MODE_DC)
 		tegra_dsi_start_dc_stream(dc, dsi);
 fail:
-	mutex_unlock(&dsi->lock);
 	tegra_dc_io_end(dc);
+	tegra_dc_dsi_release_host(dc);
+	mutex_unlock(&dsi->lock);
 }
 
 static void _tegra_dc_dsi_init(struct tegra_dc *dc)
@@ -3179,8 +3321,7 @@ fail:
 	return err;
 }
 
-
-int tegra_dsi_host_suspend(struct tegra_dc *dc)
+static int tegra_dsi_host_suspend(struct tegra_dc *dc)
 {
 	int err = 0;
 	struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc);
@@ -3188,6 +3329,10 @@ int tegra_dsi_host_suspend(struct tegra_dc *dc)
 	if (dsi->host_suspended)
 		return 0;
 
+	BUG_ON(!tegra_is_clk_enabled(dc->clk));
+	tegra_dc_io_start(dc);
+	dsi->host_suspended = true;
+
 	tegra_dsi_stop_dc_stream(dc, dsi);
 
 	err = tegra_dsi_deep_sleep(dc, dsi, dsi->info.suspend_aggr);
@@ -3195,21 +3340,26 @@ int tegra_dsi_host_suspend(struct tegra_dc *dc)
 		dev_err(&dc->ndev->dev,
 			"DSI failed to enter deep sleep\n");
 
-	dsi->host_suspended = true;
+	tegra_dc_clk_disable(dc);
 
+	tegra_dc_io_end(dc);
 	return err;
 }
 
-
-int tegra_dsi_host_resume(struct tegra_dc *dc)
+static int tegra_dsi_host_resume(struct tegra_dc *dc)
 {
 	int val = 0;
 	int err = 0;
 	struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc);
 
-	if (!dsi->host_suspended)
+	mutex_lock(&dsi->host_resume_lock);
+	cancel_delayed_work_sync(&dsi->idle_work);
+	if (!dsi->host_suspended) {
+		mutex_unlock(&dsi->host_resume_lock);
 		return 0;
+	}
 
+	tegra_dc_clk_enable(dc);
 	switch (dsi->info.suspend_aggr) {
 	case DSI_HOST_SUSPEND_LV0:
 		tegra_dsi_clk_enable(dsi);
@@ -3254,9 +3404,11 @@ int tegra_dsi_host_resume(struct tegra_dc *dc)
 						"is not supported.\n");
 	}
 
+	tegra_dsi_start_dc_stream(dc, dsi);
+
 	dsi->enabled = true;
 	dsi->host_suspended = false;
-	tegra_dsi_start_dc_stream(dc, dsi);
+	mutex_unlock(&dsi->host_resume_lock);
 fail:
 	return err;
 }
@@ -3298,7 +3450,6 @@ static void tegra_dc_dsi_disable(struct tegra_dc *dc)
 			}
 		}
 	}
-
 fail:
 	mutex_unlock(&dsi->lock);
 	tegra_dc_io_end(dc);
@@ -3350,6 +3501,8 @@ struct tegra_dc_out_ops tegra_dc_dsi_ops = {
 	.destroy = tegra_dc_dsi_destroy,
 	.enable = tegra_dc_dsi_enable,
 	.disable = tegra_dc_dsi_disable,
+	.hold = tegra_dc_dsi_hold_host,
+	.release = tegra_dc_dsi_release_host,
 #ifdef CONFIG_PM
 	.suspend = tegra_dc_dsi_suspend,
 	.resume = tegra_dc_dsi_resume,
diff --git a/drivers/video/tegra/dc/ext/dev.c b/drivers/video/tegra/dc/ext/dev.c
index f9c76f8f0d0d..88273e26c51c 100644
--- a/drivers/video/tegra/dc/ext/dev.c
+++ b/drivers/video/tegra/dc/ext/dev.c
@@ -56,6 +56,7 @@ struct tegra_dc_ext_flip_data {
 	struct tegra_dc_ext		*ext;
 	struct work_struct		work;
 	struct tegra_dc_ext_flip_win	win[DC_N_WINDOWS];
+	struct list_head		timestamp_node;
 };
 
 int tegra_dc_ext_get_num_outputs(void)
@@ -207,6 +208,7 @@ static int tegra_dc_ext_set_windowattr(struct tegra_dc_ext *ext,
 {
 	int err = 0;
 	struct tegra_dc_ext_win *ext_win = &ext->win[win->idx];
+	s64 timestamp_ns;
 
 	if (flip_win->handle[TEGRA_DC_Y] == NULL) {
 		win->flags = 0;
@@ -270,9 +272,56 @@ static int tegra_dc_ext_set_windowattr(struct tegra_dc_ext *ext,
 				msecs_to_jiffies(500), NULL);
 	}
 
+#ifndef CONFIG_TEGRA_SIMULATION_PLATFORM
+	timestamp_ns = timespec_to_ns(&flip_win->attr.timestamp);
+
+	if (timestamp_ns) {
+		/* XXX: Should timestamping be overridden by "no_vsync" flag */
+		tegra_dc_config_frame_end_intr(win->dc, true);
+		trace_printk("%s:Before timestamp wait\n", win->dc->ndev->name);
+		err = wait_event_interruptible(win->dc->timestamp_wq,
+				tegra_dc_is_within_n_vsync(win->dc, timestamp_ns));
+		trace_printk("%s:After timestamp wait\n", win->dc->ndev->name);
+		tegra_dc_config_frame_end_intr(win->dc, false);
+	}
+#endif
+	return err;
+}
+
+static void (*flip_callback)(void);
+static spinlock_t flip_callback_lock;
+static bool init_tegra_dc_flip_callback_called;
+
+static int __init init_tegra_dc_flip_callback(void)
+{
+	spin_lock_init(&flip_callback_lock);
+	init_tegra_dc_flip_callback_called = true;
+	return 0;
+}
+
+pure_initcall(init_tegra_dc_flip_callback);
+
+int tegra_dc_set_flip_callback(void (*callback)(void))
+{
+	WARN_ON(!init_tegra_dc_flip_callback_called);
+
+	spin_lock(&flip_callback_lock);
+	flip_callback = callback;
+	spin_unlock(&flip_callback_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_dc_set_flip_callback);
+
+int tegra_dc_unset_flip_callback()
+{
+	spin_lock(&flip_callback_lock);
+	flip_callback = NULL;
+	spin_unlock(&flip_callback_lock);
 
 	return 0;
 }
+EXPORT_SYMBOL(tegra_dc_unset_flip_callback);
 
 static void tegra_dc_ext_flip_worker(struct work_struct *work)
 {
@@ -288,9 +337,11 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work)
 
 	for (i = 0; i < DC_N_WINDOWS; i++) {
 		struct tegra_dc_ext_flip_win *flip_win = &data->win[i];
-		int index = flip_win->attr.index;
+		int j = 0, index = flip_win->attr.index;
 		struct tegra_dc_win *win;
 		struct tegra_dc_ext_win *ext_win;
+		struct tegra_dc_ext_flip_data *temp = NULL;
+		s64 head_timestamp = 0;
 
 		if (index < 0)
 			continue;
@@ -302,6 +353,31 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work)
 			(flip_win->attr.flags & TEGRA_DC_EXT_FLIP_FLAG_CURSOR))
 			skip_flip = true;
 
+		mutex_lock(&ext_win->queue_lock);
+		list_for_each_entry(temp, &ext_win->timestamp_queue,
+				timestamp_node) {
+			if (j == 0) {
+				if (unlikely(temp != data))
+					dev_err(&win->dc->ndev->dev,
+							"work queue did NOT dequeue head!!!");
+				else
+					head_timestamp =
+						timespec_to_ns(&flip_win->attr.timestamp);
+			} else {
+				s64 timestamp =
+					timespec_to_ns(&temp->win[i].attr.timestamp);
+
+				skip_flip = !tegra_dc_does_vsync_separate(ext->dc,
+						timestamp, head_timestamp);
+				/* Look ahead only one flip */
+				break;
+			}
+			j++;
+		}
+		if (!list_empty(&ext_win->timestamp_queue))
+			list_del(&data->timestamp_node);
+		mutex_unlock(&ext_win->queue_lock);
+
 		if (win->flags & TEGRA_WIN_FLAG_ENABLED) {
 			int j;
 			for (j = 0; j < TEGRA_DC_NUM_PLANES; j++) {
@@ -327,17 +403,23 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work)
 		tegra_dc_update_windows(wins, nr_win);
 		/* TODO: implement swapinterval here */
 		tegra_dc_sync_windows(wins, nr_win);
-	}
+		if (!tegra_dc_has_multiple_dc()) {
+			spin_lock(&flip_callback_lock);
+			if (flip_callback)
+				flip_callback();
+			spin_unlock(&flip_callback_lock);
+		}
 
-	for (i = 0; i < DC_N_WINDOWS; i++) {
-		struct tegra_dc_ext_flip_win *flip_win = &data->win[i];
-		int index = flip_win->attr.index;
+		for (i = 0; i < DC_N_WINDOWS; i++) {
+			struct tegra_dc_ext_flip_win *flip_win = &data->win[i];
+			int index = flip_win->attr.index;
 
-		if (index < 0)
-			continue;
+			if (index < 0)
+				continue;
 
-		tegra_dc_incr_syncpt_min(ext->dc, index,
-			flip_win->syncpt_max);
+			tegra_dc_incr_syncpt_min(ext->dc, index,
+					flip_win->syncpt_max);
+		}
 	}
 
 	/* unpin and deref previous front buffers */
@@ -449,6 +531,7 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user,
 	struct tegra_dc_ext_flip_data *data;
 	int work_index = -1;
 	int i, ret = 0;
+	bool has_timestamp = false;
 
 #ifdef CONFIG_ANDROID
 	int index_check[DC_N_WINDOWS] = {0, };
@@ -489,6 +572,8 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user,
 		int index = args->win[i].index;
 
 		memcpy(&flip_win->attr, &args->win[i], sizeof(flip_win->attr));
+		if (timespec_to_ns(&flip_win->attr.timestamp))
+			has_timestamp = true;
 
 		if (index < 0)
 			continue;
@@ -563,6 +648,11 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user,
 		ret = -EINVAL;
 		goto unlock;
 	}
+	if (has_timestamp) {
+		mutex_lock(&ext->win[work_index].queue_lock);
+		list_add_tail(&data->timestamp_node, &ext->win[work_index].timestamp_queue);
+		mutex_unlock(&ext->win[work_index].queue_lock);
+	}
 	queue_work(ext->win[work_index].flip_wq, &data->work);
 
 	unlock_windows_for_flip(user, args);
@@ -903,6 +993,8 @@ static int tegra_dc_ext_setup_windows(struct tegra_dc_ext *ext)
 		}
 
 		mutex_init(&win->lock);
+		mutex_init(&win->queue_lock);
+		INIT_LIST_HEAD(&win->timestamp_queue);
 	}
 
 	return 0;
diff --git a/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h b/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h
index f68c7d5c93c2..ef7361d1d933 100644
--- a/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h
+++ b/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h
@@ -58,6 +58,10 @@ struct tegra_dc_ext_win {
 	struct workqueue_struct	*flip_wq;
 
 	atomic_t		nr_pending_flips;
+
+	struct mutex		queue_lock;
+
+	struct list_head	timestamp_queue;
 };
 
 struct tegra_dc_ext {
diff --git a/drivers/video/tegra/dc/hdmi.c b/drivers/video/tegra/dc/hdmi.c
index 79478ea48f83..55d9163d4faf 100644
--- a/drivers/video/tegra/dc/hdmi.c
+++ b/drivers/video/tegra/dc/hdmi.c
@@ -1370,18 +1370,31 @@ bool tegra_dc_hdmi_detect_test(struct tegra_dc *dc, unsigned char *edid_ptr)
 
 	err = tegra_edid_get_monspecs_test(hdmi->edid, &specs, edid_ptr);
 	if (err < 0) {
-		dev_err(&dc->ndev->dev, "error reading edid\n");
-		goto fail;
-	}
+		/* Check if there's a hard-wired mode, if so, enable it */
+		if (dc->out->n_modes)
+			tegra_dc_enable(dc);
+		else {
+			dev_err(&dc->ndev->dev, "error reading edid\n");
+			goto fail;
+		}
+#ifdef CONFIG_SWITCH
+		hdmi->hpd_switch.state = 0;
+		switch_set_state(&hdmi->hpd_switch, 1);
+#endif
+		dev_info(&dc->ndev->dev, "display detected\n");
 
-	err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld);
-	if (err < 0) {
-		dev_err(&dc->ndev->dev, "error populating eld\n");
-		goto fail;
-	}
-	hdmi->eld_retrieved = true;
+		dc->connected = true;
+		tegra_dc_ext_process_hotplug(dc->ndev->id);
+	} else {
+		err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld);
+		if (err < 0) {
+			dev_err(&dc->ndev->dev, "error populating eld\n");
+			goto fail;
+		}
+		hdmi->eld_retrieved = true;
 
-	tegra_dc_hdmi_detect_config(dc, &specs);
+		tegra_dc_hdmi_detect_config(dc, &specs);
+	}
 
 	return true;
 
@@ -1406,18 +1419,30 @@ static bool tegra_dc_hdmi_detect(struct tegra_dc *dc)
 
 	err = tegra_edid_get_monspecs(hdmi->edid, &specs);
 	if (err < 0) {
-		dev_err(&dc->ndev->dev, "error reading edid\n");
-		goto fail;
-	}
+		if (dc->out->n_modes)
+			tegra_dc_enable(dc);
+		else {
+			dev_err(&dc->ndev->dev, "error reading edid\n");
+			goto fail;
+		}
+#ifdef CONFIG_SWITCH
+		hdmi->hpd_switch.state = 0;
+		switch_set_state(&hdmi->hpd_switch, 1);
+#endif
+		dev_info(&dc->ndev->dev, "display detected\n");
 
-	err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld);
-	if (err < 0) {
-		dev_err(&dc->ndev->dev, "error populating eld\n");
-		goto fail;
-	}
-	hdmi->eld_retrieved = true;
+		dc->connected = true;
+		tegra_dc_ext_process_hotplug(dc->ndev->id);
+	} else {
+		err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld);
+		if (err < 0) {
+			dev_err(&dc->ndev->dev, "error populating eld\n");
+			goto fail;
+		}
+		hdmi->eld_retrieved = true;
 
-	tegra_dc_hdmi_detect_config(dc, &specs);
+		tegra_dc_hdmi_detect_config(dc, &specs);
+	}
 
 	return true;
 
diff --git a/drivers/video/tegra/dc/mode.c b/drivers/video/tegra/dc/mode.c
index 49cc5f5abd53..3a95f2e7ab0e 100644
--- a/drivers/video/tegra/dc/mode.c
+++ b/drivers/video/tegra/dc/mode.c
@@ -137,6 +137,17 @@ static bool check_ref_to_sync(struct tegra_dc_mode *mode)
 	return true;
 }
 
+static s64 calc_frametime_ns(const struct tegra_dc_mode *m)
+{
+	long h_total, v_total;
+	h_total = m->h_active + m->h_front_porch + m->h_back_porch +
+		m->h_sync_width;
+	v_total = m->v_active + m->v_front_porch + m->v_back_porch +
+		m->v_sync_width;
+	return (!m->pclk) ? 0 : (s64)(div_s64(((s64)h_total * v_total *
+					1000000000ULL), m->pclk));
+}
+
 /* return in 1000ths of a Hertz */
 int tegra_dc_calc_refresh(const struct tegra_dc_mode *m)
 {
@@ -247,11 +258,25 @@ int tegra_dc_program_mode(struct tegra_dc *dc, struct tegra_dc_mode *mode)
 	return 0;
 }
 
+static int panel_sync_rate;
+
+int tegra_dc_get_panel_sync_rate(void)
+{
+	return panel_sync_rate;
+}
+EXPORT_SYMBOL(tegra_dc_get_panel_sync_rate);
+
 int tegra_dc_set_mode(struct tegra_dc *dc, const struct tegra_dc_mode *mode)
 {
 	memcpy(&dc->mode, mode, sizeof(dc->mode));
 
+	if (dc->out->type == TEGRA_DC_OUT_RGB)
+		panel_sync_rate = tegra_dc_calc_refresh(mode);
+	else if (dc->out->type == TEGRA_DC_OUT_DSI)
+		panel_sync_rate = dc->out->dsi->rated_refresh_rate * 1000;
+
 	print_mode(dc, mode, __func__);
+	dc->frametime_ns = calc_frametime_ns(mode);
 
 	return 0;
 }
diff --git a/drivers/video/tegra/dc/nvsd.c b/drivers/video/tegra/dc/nvsd.c
index e3058b596f69..6e76ee0f1702 100644
--- a/drivers/video/tegra/dc/nvsd.c
+++ b/drivers/video/tegra/dc/nvsd.c
@@ -809,9 +809,12 @@ static ssize_t nvsd_settings_store(struct kobject *kobj,
 				mutex_unlock(&dc->lock);
 				return -ENODEV;
 			}
-			mutex_unlock(&dc->lock);
 
+			tegra_dc_hold_dc_out(dc);
 			nvsd_init(dc, sd_settings);
+			tegra_dc_release_dc_out(dc);
+
+			mutex_unlock(&dc->lock);
 
 			/* Update backlight state IFF we're disabling! */
 			if (!sd_settings->enable && sd_settings->bl_device) {
diff --git a/drivers/video/tegra/dc/window.c b/drivers/video/tegra/dc/window.c
index 5161dd4f7003..cd91fab428ed 100644
--- a/drivers/video/tegra/dc/window.c
+++ b/drivers/video/tegra/dc/window.c
@@ -24,6 +24,7 @@
 #include "dc_priv.h"
 
 static int no_vsync;
+static atomic_t frame_end_ref = ATOMIC_INIT(0);
 
 module_param_named(no_vsync, no_vsync, int, S_IRUGO | S_IWUSR);
 
@@ -40,6 +41,17 @@ static bool tegra_dc_windows_are_clean(struct tegra_dc_win *windows[],
 	return true;
 }
 
+int tegra_dc_config_frame_end_intr(struct tegra_dc *dc, bool enable)
+{
+	tegra_dc_writel(dc, FRAME_END_INT, DC_CMD_INT_STATUS);
+	if (enable) {
+		atomic_inc(&frame_end_ref);
+		tegra_dc_unmask_interrupt(dc, FRAME_END_INT);
+	} else if (!atomic_dec_return(&frame_end_ref))
+		tegra_dc_mask_interrupt(dc, FRAME_END_INT);
+	return 0;
+}
+
 static int get_topmost_window(u32 *depths, unsigned long *wins)
 {
 	int idx, best = -1;
@@ -219,8 +231,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		return -EFAULT;
 	}
 
-	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE)
-		tegra_dc_host_resume(dc);
+	tegra_dc_hold_dc_out(dc);
 
 	if (no_vsync)
 		tegra_dc_writel(dc, WRITE_MUX_ACTIVE | READ_MUX_ACTIVE,
@@ -407,8 +418,9 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 			FRAME_END_INT | V_BLANK_INT | ALL_UF_INT);
 	} else {
 		clear_bit(V_BLANK_FLIP, &dc->vblank_ref_count);
-		tegra_dc_mask_interrupt(dc,
-			FRAME_END_INT | V_BLANK_INT | ALL_UF_INT);
+		tegra_dc_mask_interrupt(dc, V_BLANK_INT | ALL_UF_INT);
+		if (!atomic_read(&frame_end_ref))
+			tegra_dc_mask_interrupt(dc, FRAME_END_INT);
 	}
 
 	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
@@ -424,6 +436,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 	tegra_dc_writel(dc, update_mask, DC_CMD_STATE_CONTROL);
 	trace_printk("%s:update_mask=%#lx\n", dc->ndev->name, update_mask);
 
+	tegra_dc_release_dc_out(dc);
 	mutex_unlock(&dc->lock);
 	if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
 		mutex_unlock(&dc->one_shot_lock);
@@ -456,7 +469,8 @@ void tegra_dc_trigger_windows(struct tegra_dc *dc)
 	}
 
 	if (!dirty) {
-		if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE))
+		if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
+			&& !atomic_read(&frame_end_ref))
 			tegra_dc_mask_interrupt(dc, FRAME_END_INT);
 	}
 
diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c
index cb7525e049e9..59c162bb3b4c 100644
--- a/drivers/video/tegra/fb.c
+++ b/drivers/video/tegra/fb.c
@@ -96,6 +96,7 @@ static int tegra_fb_set_par(struct fb_info *info)
 {
 	struct tegra_fb_info *tegra_fb = info->par;
 	struct fb_var_screeninfo *var = &info->var;
+	struct tegra_dc *dc = tegra_fb->win->dc;
 
 	if (var->bits_per_pixel) {
 		/* we only support RGB ordering for now */
@@ -124,10 +125,13 @@ static int tegra_fb_set_par(struct fb_info *info)
 		default:
 			return -EINVAL;
 		}
-		info->fix.line_length = var->xres * var->bits_per_pixel / 8;
-		/* Pad the stride to 16-byte boundary. */
-		info->fix.line_length = round_up(info->fix.line_length,
+		/* if line_length unset, then pad the stride */
+		if (!info->fix.line_length) {
+			info->fix.line_length = var->xres * var->bits_per_pixel
+				/ 8;
+			info->fix.line_length = round_up(info->fix.line_length,
 						TEGRA_LINEAR_PITCH_ALIGNMENT);
+		}
 		tegra_fb->win->stride = info->fix.line_length;
 		tegra_fb->win->stride_uv = 0;
 		tegra_fb->win->phys_addr_u = 0;
@@ -136,17 +140,30 @@ static int tegra_fb_set_par(struct fb_info *info)
 
 	if (var->pixclock) {
 		bool stereo;
+		unsigned old_len = 0;
 		struct fb_videomode m;
+		struct fb_videomode *old_mode = NULL;
 
 		fb_var_to_videomode(&m, var);
 
+		/* Load framebuffer info with new mode details*/
+		old_mode = info->mode;
+		old_len  = info->fix.line_length;
+
 		info->mode = (struct fb_videomode *)
 			fb_find_nearest_mode(&m, &info->modelist);
 		if (!info->mode) {
 			dev_warn(&tegra_fb->ndev->dev, "can't match video mode\n");
+			info->mode = old_mode;
 			return -EINVAL;
 		}
 
+		/* Update fix line_length and window stride as per new mode */
+		info->fix.line_length = var->xres * var->bits_per_pixel / 8;
+		info->fix.line_length = round_up(info->fix.line_length,
+			TEGRA_LINEAR_PITCH_ALIGNMENT);
+		tegra_fb->win->stride = info->fix.line_length;
+
 		/*
 		 * only enable stereo if the mode supports it and
 		 * client requests it
@@ -157,10 +174,22 @@ static int tegra_fb_set_par(struct fb_info *info)
 #else
 					FB_VMODE_STEREO_LEFT_RIGHT);
 #endif
-		tegra_dc_set_fb_mode(tegra_fb->win->dc, info->mode, stereo);
-		/* Reflect the mode change on dc */
-		tegra_dc_disable(tegra_fb->win->dc);
-		tegra_dc_enable(tegra_fb->win->dc);
+
+		/* Configure DC with new mode */
+		if (tegra_dc_set_fb_mode(dc, info->mode, stereo)) {
+			/* Error while configuring DC, fallback to old mode */
+			dev_warn(&tegra_fb->ndev->dev, "can't configure dc with mode %ux%u\n",
+				info->mode->xres, info->mode->yres);
+			info->mode = old_mode;
+			info->fix.line_length = old_len;
+			tegra_fb->win->stride = old_len;
+			return -EINVAL;
+		}
+
+		/* Reflect mode chnage on DC HW */
+		if (dc->enabled)
+			tegra_dc_disable(dc);
+		tegra_dc_enable(dc);
 
 		tegra_fb->win->w.full = dfixed_const(info->mode->xres);
 		tegra_fb->win->h.full = dfixed_const(info->mode->yres);
@@ -326,8 +355,10 @@ static void tegra_fb_imageblit(struct fb_info *info,
 
 static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 {
+	struct tegra_fb_info *tegra_fb = (struct tegra_fb_info *)info->par;
 	struct tegra_fb_modedb modedb;
 	struct fb_modelist *modelist;
+	struct fb_vblank vblank = {};
 	int i;
 
 	switch (cmd) {
@@ -370,6 +401,17 @@ static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long
 			return -EFAULT;
 		break;
 
+	case FBIOGET_VBLANK:
+		tegra_dc_get_fbvblank(tegra_fb->win->dc, &vblank);
+
+		if (copy_to_user(
+			(void __user *)arg, &vblank, sizeof(vblank)))
+			return -EFAULT;
+		break;
+
+	case FBIO_WAITFORVSYNC:
+		return tegra_dc_wait_for_vsync(tegra_fb->win->dc);
+
 	default:
 		return -ENOTTY;
 	}
@@ -607,8 +649,10 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 		tegra_fb->valid = true;
 	}
 
+	info->fix.line_length = fb_data->xres * fb_data->bits_per_pixel / 8;
+
 	stride = tegra_dc_get_stride(dc, 0);
-	if (!stride) /* default to pad the stride to 16-byte boundary. */
+	if (!stride) /* default to pad the stride */
 		stride = round_up(info->fix.line_length,
 			TEGRA_LINEAR_PITCH_ALIGNMENT);
 
@@ -625,7 +669,6 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 	info->fix.accel		= FB_ACCEL_NONE;
 	info->fix.smem_start	= fb_phys;
 	info->fix.smem_len	= fb_size;
-	info->fix.line_length = fb_data->xres * fb_data->bits_per_pixel / 8;
 	info->fix.line_length = stride;
 
 	info->var.xres			= fb_data->xres;
@@ -683,6 +726,7 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 
 	if (dc->mode.pclk > 1000) {
 		struct tegra_dc_mode *mode = &dc->mode;
+		struct fb_videomode vmode;
 
 		if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)
 			info->var.pixclock = KHZ2PICOS(mode->rated_pclk / 1000);
@@ -694,6 +738,10 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 		info->var.lower_margin = mode->v_front_porch;
 		info->var.hsync_len = mode->h_sync_width;
 		info->var.vsync_len = mode->v_sync_width;
+
+		/* Keep info->var consistent with info->modelist. */
+		fb_var_to_videomode(&vmode, &info->var);
+		fb_add_videomode(&vmode, &info->modelist);
 	}
 
 	return tegra_fb;
diff --git a/drivers/video/tegra/host/bus.c b/drivers/video/tegra/host/bus.c
index 758a5ca4ad94..f22dac288051 100644
--- a/drivers/video/tegra/host/bus.c
+++ b/drivers/video/tegra/host/bus.c
@@ -96,7 +96,7 @@ static int nvhost_bus_match(struct device *_dev, struct device_driver *drv)
 	if (ndrv->id_table)
 		return nvhost_bus_match_id(dev, ndrv->id_table) != NULL;
 	else /* driver does not support id_table */
-		return !strncmp(dev->name, drv->name, strlen(drv->name));
+		return !strcmp(dev->name, drv->name);
 }
 
 static int nvhost_drv_probe(struct device *_dev)
diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c
index 0137793b39ee..aaa038221971 100644
--- a/drivers/video/tegra/host/bus_client.c
+++ b/drivers/video/tegra/host/bus_client.c
@@ -159,7 +159,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
 	}
 	filp->private_data = priv;
 	priv->ch = ch;
-	nvhost_module_add_client(ch->dev, priv);
+	if(nvhost_module_add_client(ch->dev, priv))
+		goto fail;
 
 	if (ch->ctxhandler && ch->ctxhandler->alloc) {
 		priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch);
diff --git a/drivers/video/tegra/host/chip_support.h b/drivers/video/tegra/host/chip_support.h
index f5d2811f143f..412ce8b65466 100644
--- a/drivers/video/tegra/host/chip_support.h
+++ b/drivers/video/tegra/host/chip_support.h
@@ -125,6 +125,7 @@ struct nvhost_intr_ops {
 	void (*set_syncpt_threshold)(
 		struct nvhost_intr *, u32 id, u32 thresh);
 	void (*enable_syncpt_intr)(struct nvhost_intr *, u32 id);
+	void (*disable_syncpt_intr)(struct nvhost_intr *, u32 id);
 	void (*disable_all_syncpt_intrs)(struct nvhost_intr *);
 	int  (*request_host_general_irq)(struct nvhost_intr *);
 	void (*free_host_general_irq)(struct nvhost_intr *);
diff --git a/drivers/video/tegra/host/gr3d/gr3d.c b/drivers/video/tegra/host/gr3d/gr3d.c
index 715468131d9e..775c77b0e88d 100644
--- a/drivers/video/tegra/host/gr3d/gr3d.c
+++ b/drivers/video/tegra/host/gr3d/gr3d.c
@@ -80,8 +80,10 @@ struct host1x_hwctx *nvhost_3dctx_alloc_common(struct host1x_hwctx_handler *p,
 	ctx->restore = mem_op().alloc(memmgr, p->restore_size * 4, 32,
 		map_restore ? mem_mgr_flag_write_combine
 			    : mem_mgr_flag_uncacheable);
-	if (IS_ERR_OR_NULL(ctx->restore))
+	if (IS_ERR_OR_NULL(ctx->restore)) {
+		ctx->restore = NULL;
 		goto fail;
+	}
 
 	if (map_restore) {
 		ctx->restore_virt = mem_op().mmap(ctx->restore);
diff --git a/drivers/video/tegra/host/gr3d/gr3d.h b/drivers/video/tegra/host/gr3d/gr3d.h
index 3855b237b702..61f708cea95c 100644
--- a/drivers/video/tegra/host/gr3d/gr3d.h
+++ b/drivers/video/tegra/host/gr3d/gr3d.h
@@ -29,6 +29,9 @@
 #define AR3D_PSEQ_QUAD_ID 0x545
 #define AR3D_DW_MEMORY_OUTPUT_ADDRESS 0x904
 #define AR3D_DW_MEMORY_OUTPUT_DATA 0x905
+#define AR3D_FDC_CONTROL_0 0xa00
+#define AR3D_FDC_CONTROL_0_RESET_VAL 0xe00
+#define AR3D_FDC_CONTROL_0_INVALIDATE 1
 #define AR3D_GSHIM_WRITE_MASK 0xb00
 #define AR3D_GSHIM_READ_SELECT 0xb01
 #define AR3D_GLOBAL_MEMORY_OUTPUT_READS 0xe40
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.c b/drivers/video/tegra/host/gr3d/gr3d_t20.c
index b6e3896fe50c..694b00527790 100644
--- a/drivers/video/tegra/host/gr3d/gr3d_t20.c
+++ b/drivers/video/tegra/host/gr3d/gr3d_t20.c
@@ -144,7 +144,7 @@ static void save_push_v0(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
 			p->save_phys);
 }
 
-static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	/* 3d: when done, increment syncpt to base+1 */
 	ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
@@ -162,7 +162,7 @@ static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
 			h->syncpt); /* incr 2 */
 }
 
-static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_nonincr(host1x_uclass_indoff_r(), 1);
 	ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
@@ -170,7 +170,7 @@ static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
 	ptr[2] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
 }
 
-static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
+static void save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
 			u32 data_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
@@ -183,7 +183,7 @@ static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
 	ptr[4] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
 }
 
-static void __init save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	/* Wait for context read service to finish (cpu incr 3) */
 	ptr[0] = nvhost_opcode_nonincr(host1x_uclass_wait_syncpt_base_r(), 1);
@@ -226,7 +226,7 @@ static u32 *save_regs_v0(u32 *ptr, unsigned int *pending,
 
 /*** save ***/
 
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
 			const struct hwctx_reginfo *regs,
 			unsigned int nr_regs)
 {
@@ -284,7 +284,7 @@ static void __init setup_save_regs(struct save_info *info,
 	info->restore_count = restore_count;
 }
 
-static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	struct save_info info = {
 		ptr,
@@ -371,7 +371,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t20_ctxhandler_init(
 
 	p->save_buf = mem_op().alloc(memmgr, p->save_size * sizeof(u32), 32,
 				mem_mgr_flag_write_combine);
-	if (IS_ERR(p->save_buf)) {
+	if (IS_ERR_OR_NULL(p->save_buf)) {
 		p->save_buf = NULL;
 		return NULL;
 	}
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c
index c35fea2f3ac2..664708c7fc80 100644
--- a/drivers/video/tegra/host/gr3d/gr3d_t30.c
+++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c
@@ -125,6 +125,16 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
 			nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
 			NVHOST_OPCODE_NOOP);
 
+	/* invalidate the FDC to prevent cache-coherency issues across GPUs
+	   note that we assume FDC_CONTROL_0 is left in the reset state by all
+	   contexts.  the invalidate bit will clear itself, so the register
+	   should be unchanged after this */
+	nvhost_cdma_push(cdma,
+		nvhost_opcode_imm(AR3D_FDC_CONTROL_0,
+			AR3D_FDC_CONTROL_0_RESET_VAL
+				| AR3D_FDC_CONTROL_0_INVALIDATE),
+		NVHOST_OPCODE_NOOP);
+
 	/* set register set 0 and 1 register read memory output addresses,
 	   and send their reads to memory */
 
@@ -132,7 +142,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
 		nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2),
 		nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1));
 	nvhost_cdma_push(cdma,
-		nvhost_opcode_nonincr(0x904, 1),
+		nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1),
 		ctx->restore_phys + restore_set1_offset * 4);
 
 	nvhost_cdma_push(cdma,
@@ -150,7 +160,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
 			p->save_phys);
 }
 
-static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
+static void save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
 {
 	ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA,
 			RESTORE_BEGIN_SIZE);
@@ -158,7 +168,7 @@ static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
 	ptr += RESTORE_BEGIN_SIZE;
 }
 
-static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
 			AR3D_DW_MEMORY_OUTPUT_DATA, 1);
@@ -172,7 +182,7 @@ static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
 	ptr[3] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
 }
 
-static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
+static void save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
 			u32 data_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
@@ -189,7 +199,7 @@ static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
 	ptr[5] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
 }
 
-static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
+static void save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
 {
 	/* write end of restore buffer */
 	ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
@@ -224,7 +234,7 @@ static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
 
 
 
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
 			const struct hwctx_reginfo *regs,
 			unsigned int nr_regs)
 {
@@ -282,7 +292,7 @@ static void __init setup_save_regs(struct save_info *info,
 	info->restore_count = restore_count;
 }
 
-static void __init switch_gpu(struct save_info *info,
+static void switch_gpu(struct save_info *info,
 			unsigned int save_src_set,
 			u32 save_dest_sets,
 			u32 restore_dest_sets)
@@ -303,7 +313,7 @@ static void __init switch_gpu(struct save_info *info,
 	info->restore_count += 1;
 }
 
-static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *p, u32 *ptr)
 {
 	struct save_info info = {
 		ptr,
@@ -399,7 +409,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init(
 
 	p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32,
 				mem_mgr_flag_write_combine);
-	if (IS_ERR(p->save_buf)) {
+	if (IS_ERR_OR_NULL(p->save_buf)) {
 		p->save_buf = NULL;
 		return NULL;
 	}
diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c
index 5922b55a836a..49147975a9e4 100644
--- a/drivers/video/tegra/host/gr3d/scale3d.c
+++ b/drivers/video/tegra/host/gr3d/scale3d.c
@@ -1,9 +1,9 @@
 /*
- * drivers/video/tegra/host/t20/scale3d.c
+ * drivers/video/tegra/host/gr3d/scale3d.c
  *
  * Tegra Graphics Host 3D clock scaling
  *
- * Copyright (c) 2010-2012, NVIDIA Corporation.
+ * Copyright (c) 2010-2012, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -23,12 +23,11 @@
  *
  * module3d_notify_busy() is called upon submit, module3d_notify_idle() is
  * called when all outstanding submits are completed. Idle times are measured
- * over a fixed time period (scale3d.p_period). If the 3d module idle time
- * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
- * scaled down. If the percentage goes under the minimum limit (set in
- * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
- * over the time frame given in scale3d.p_fast_response for clocking up
- * quickly in response to load peaks.
+ * over a fixed time period (scale3d.p_estimation_window). If the 3d module
+ * idle time percentage goes over the limit (set in scale3d.p_idle_max), 3d
+ * clocks are scaled down. If the percentage goes under the minimum limit (set
+ * in scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
+ * for clocking up quickly in response to load peaks.
  *
  * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
  * bezier-like factor added to pull 3d.emc rate a bit lower.
@@ -37,10 +36,31 @@
 #include <linux/debugfs.h>
 #include <linux/types.h>
 #include <linux/clk.h>
+#include <linux/slab.h>
 #include <mach/clk.h>
 #include <mach/hardware.h>
 #include "scale3d.h"
 #include "dev.h"
+#include <media/tegra_camera.h>
+
+#define GR3D_PRINT_STATS   BIT(1)
+#define GR3D_PRINT_BUSY    BIT(2)
+#define GR3D_PRINT_IDLE    BIT(3)
+#define GR3D_PRINT_HINT    BIT(4)
+#define GR3D_PRINT_TARGET  BIT(5)
+
+/* time frame for load and hint tracking - when events come in at a larger
+ * interval, this probably indicates the current estimates are stale
+ */
+#define GR3D_TIMEFRAME 1000000 /* 1 sec */
+
+/* the number of frames to use in the running average of load estimates and
+ * throughput hints. Choosing 6 frames targets a window of about 100 msec.
+ * Large flucutuations in frame times require a window that's large enough to
+ * prevent spiky scaling behavior, which in turn exacerbates frame rate
+ * instability.
+ */
+#define GR3D_FRAME_SPAN 6
 
 static int scale3d_is_enabled(void);
 static void scale3d_enable(int enable);
@@ -48,54 +68,73 @@ static void scale3d_enable(int enable);
 #define POW2(x) ((x) * (x))
 
 /*
+ * 3D clock scaling should be treated differently when camera is on in AP37.
+ * 3D in AP37 requires 1.3V and combining it with MPE reaches to EDP limit.
+ * 3D clock really needs to be set to lower frequency which requires 1.0V.
+ * The same thing applies to 3D EMC clock.
+ */
+#define CAMERA_3D_CLK 300000000
+#define CAMERA_3D_EMC_CLK 437000000
+
+/*
  * debugfs parameters to control 3d clock scaling test
  *
- * period        - time period for clock rate evaluation
- * fast_response - time period for evaluation of 'busy' spikes
- * idle_min      - if less than [idle_min] percent idle over [fast_response]
- *                 microseconds, clock up.
- * idle_max      - if over [idle_max] percent idle over [period] microseconds,
- *                 clock down.
+ * estimation_window  - time period for clock rate evaluation
+ * idle_min           - if less than [idle_min / 10] percent idle over
+ *                      [estimation_window] microseconds, clock up.
+ * idle_max      - if over [idle_max] percent idle over [estimation_window]
+ *                 microseconds, clock down.
  * max_scale     - limits rate changes to no less than (100 - max_scale)% or
  *                 (100 + 2 * max_scale)% of current clock rate
- * verbosity     - set above 5 for debug printouts
+ * verbosity     - bit flag to control debug printouts:
+ *                 1 - stats
+ *                 2 - busy
+ *                 3 - idle
+ *                 4 - hints
+ *                 5 - target frequencies
  */
 
 struct scale3d_info_rec {
 	struct mutex lock; /* lock for timestamps etc */
 	int enable;
 	int init;
-	ktime_t idle_frame;
-	ktime_t fast_frame;
-	ktime_t last_idle;
-	ktime_t last_short_term_idle;
+	ktime_t last_scale;
 	int is_idle;
-	ktime_t last_tweak;
-	ktime_t last_down;
+	ktime_t last_adjust;
 	int fast_up_count;
 	int slow_down_count;
 	int is_scaled;
-	int fast_responses;
-	unsigned long idle_total;
-	unsigned long idle_short_term_total;
-	unsigned long max_rate_3d;
 	long emc_slope;
 	long emc_offset;
 	long emc_dip_slope;
 	long emc_dip_offset;
 	long emc_xmid;
+	unsigned long max_rate_3d;
 	unsigned long min_rate_3d;
+	ktime_t last_throughput_hint;
+
 	struct work_struct work;
 	struct delayed_work idle_timer;
+
+	ktime_t last_estimation_window;
+	long last_total_idle;
+	long total_idle;
+	ktime_t estimation_window;
+	ktime_t last_notification;
+	long idle_estimate;
+
 	unsigned int scale;
-	unsigned int p_period;
-	unsigned int period;
+	unsigned int p_busy_cutoff;
+	unsigned int p_estimation_window;
+	unsigned int p_use_throughput_hint;
+	unsigned int p_throughput_lo_limit;
+	unsigned int p_throughput_lower_limit;
+	unsigned int p_throughput_hi_limit;
+	unsigned int p_scale_step;
 	unsigned int p_idle_min;
 	unsigned int idle_min;
 	unsigned int p_idle_max;
 	unsigned int idle_max;
-	unsigned int p_fast_response;
-	unsigned int fast_response;
 	unsigned int p_adjust;
 	unsigned int p_scale_emc;
 	unsigned int p_emc_dip;
@@ -103,13 +142,15 @@ struct scale3d_info_rec {
 	struct clk *clk_3d;
 	struct clk *clk_3d2;
 	struct clk *clk_3d_emc;
+	int *freqlist;
+	int freq_count;
 };
 
 static struct scale3d_info_rec scale3d;
 
-static void scale3d_clocks(unsigned long percent)
+static void scale_to_freq(unsigned long hz)
 {
-	unsigned long hz, curr;
+	unsigned long curr;
 
 	if (!tegra_is_clk_enabled(scale3d.clk_3d))
 		return;
@@ -119,7 +160,8 @@ static void scale3d_clocks(unsigned long percent)
 			return;
 
 	curr = clk_get_rate(scale3d.clk_3d);
-	hz = percent * (curr / 100);
+	if (hz == curr)
+		return;
 
 	if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
 		if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
@@ -139,6 +181,16 @@ static void scale3d_clocks(unsigned long percent)
 	}
 }
 
+static void scale3d_clocks(unsigned long percent)
+{
+	unsigned long hz, curr;
+
+	curr = clk_get_rate(scale3d.clk_3d);
+	hz = percent * (curr / 100);
+
+	scale_to_freq(hz);
+}
+
 static void scale3d_clocks_handler(struct work_struct *work)
 {
 	unsigned int scale;
@@ -164,12 +216,26 @@ void nvhost_scale3d_suspend(struct nvhost_device *dev)
 static void reset_3d_clocks(void)
 {
 	if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) {
-		clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
-		if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
-			clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d);
-		if (scale3d.p_scale_emc)
-			clk_set_rate(scale3d.clk_3d_emc,
-				clk_round_rate(scale3d.clk_3d_emc, UINT_MAX));
+		if (is_tegra_camera_on())
+			clk_set_rate(scale3d.clk_3d, CAMERA_3D_CLK);
+		else
+			clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
+		if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
+			if (is_tegra_camera_on())
+				clk_set_rate(scale3d.clk_3d2, CAMERA_3D_CLK);
+			else
+				clk_set_rate(scale3d.clk_3d2,
+							scale3d.max_rate_3d);
+		}
+		if (scale3d.p_scale_emc) {
+			if (is_tegra_camera_on())
+				clk_set_rate(scale3d.clk_3d_emc,
+					CAMERA_3D_EMC_CLK);
+			else
+				clk_set_rate(scale3d.clk_3d_emc,
+					clk_round_rate(scale3d.clk_3d_emc,
+								UINT_MAX));
+		}
 	}
 }
 
@@ -207,15 +273,6 @@ static void scale3d_enable(int enable)
 		reset_3d_clocks();
 }
 
-static void reset_scaling_counters(ktime_t time)
-{
-	scale3d.idle_total = 0;
-	scale3d.idle_short_term_total = 0;
-	scale3d.last_idle = time;
-	scale3d.last_short_term_idle = time;
-	scale3d.idle_frame = time;
-}
-
 /* scaling_adjust - use scale up / scale down hint counts to adjust scaling
  * parameters.
  *
@@ -228,8 +285,6 @@ static void reset_scaling_counters(ktime_t time)
  *
  * the parameters adjusted are
  *
- * * fast_response time
- * * period - time for scaling down estimate
  * * idle_min percentage
  * * idle_max percentage
  */
@@ -242,13 +297,11 @@ static void reset_scaling_counters(ktime_t time)
 static void scaling_adjust(ktime_t time)
 {
 	long hint_ratio;
-	long fast_response_adjustment;
-	long period_adjustment;
 	int idle_min_adjustment;
 	int idle_max_adjustment;
 	unsigned long dt;
 
-	dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
+	dt = (unsigned long) ktime_us_delta(time, scale3d.last_adjust);
 	if (dt < SCALING_ADJUST_PERIOD)
 		return;
 
@@ -256,13 +309,9 @@ static void scaling_adjust(ktime_t time)
 				 (scale3d.slow_down_count + 1);
 
 	if (hint_ratio > HINT_RATIO_MAX) {
-		fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
-		period_adjustment = scale3d.p_period / 2;
 		idle_min_adjustment = scale3d.p_idle_min;
 		idle_max_adjustment = scale3d.p_idle_max;
 	} else if (hint_ratio < HINT_RATIO_MIN) {
-		fast_response_adjustment = scale3d.p_fast_response / 2;
-		period_adjustment = -((int) scale3d.p_period) / 4;
 		idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
 		idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
 	} else {
@@ -277,33 +326,23 @@ static void scaling_adjust(ktime_t time)
 			diff *= 2;
 		}
 
-		fast_response_adjustment = diff *
-			(scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
-		period_adjustment =
-			diff * (scale3d.p_period / HINT_RATIO_DIFF);
 		idle_min_adjustment =
 			(factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
 		idle_max_adjustment =
 			(factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
 	}
 
-	scale3d.fast_response =
-		scale3d.p_fast_response + fast_response_adjustment;
-	scale3d.period = scale3d.p_period + period_adjustment;
-		scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
+	scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
 	scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
 
-	if (scale3d.p_verbosity >= 10)
-		pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
+	if (scale3d.p_verbosity & GR3D_PRINT_STATS)
+		pr_info("scale3d stats: + %d - %d min %u max %u\n",
 			scale3d.fast_up_count, scale3d.slow_down_count,
-			scale3d.fast_responses, scale3d.fast_response,
-			scale3d.period, scale3d.idle_min, scale3d.idle_max);
+			scale3d.idle_min, scale3d.idle_max);
 
 	scale3d.fast_up_count = 0;
 	scale3d.slow_down_count = 0;
-	scale3d.fast_responses = 0;
-	scale3d.last_down = time;
-	scale3d.last_tweak = time;
+	scale3d.last_adjust = time;
 }
 
 #undef SCALING_ADJUST_PERIOD
@@ -316,61 +355,101 @@ static void scaling_state_check(ktime_t time)
 {
 	unsigned long dt;
 
-	/* adjustment: set scale parameters (fast_response, period) +/- 25%
+	/* adjustment: set scale parameters (idle_min, idle_max) +/- 25%
 	 * based on ratio of scale up to scale down hints
 	 */
 	if (scale3d.p_adjust)
 		scaling_adjust(time);
 	else {
-		scale3d.fast_response = scale3d.p_fast_response;
-		scale3d.period = scale3d.p_period;
 		scale3d.idle_min = scale3d.p_idle_min;
 		scale3d.idle_max = scale3d.p_idle_max;
 	}
 
-	/* check for load peaks */
-	dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
-	if (dt > scale3d.fast_response) {
-		unsigned long idleness =
-			(scale3d.idle_short_term_total * 100) / dt;
-		scale3d.fast_responses++;
-		scale3d.fast_frame = time;
-		/* if too busy, scale up */
-		if (idleness < scale3d.idle_min) {
-			scale3d.is_scaled = 0;
-			scale3d.fast_up_count++;
-			if (scale3d.p_verbosity >= 5)
-				pr_info("scale3d: %ld%% busy\n",
-					100 - idleness);
-
-			reset_3d_clocks();
-			reset_scaling_counters(time);
-			return;
-		}
-		scale3d.idle_short_term_total = 0;
-		scale3d.last_short_term_idle = time;
+	dt = (unsigned long) ktime_us_delta(time, scale3d.last_scale);
+	if (dt < scale3d.p_estimation_window)
+		return;
+
+	scale3d.last_scale = time;
+
+	/* if too busy, scale up */
+	if (scale3d.idle_estimate < scale3d.idle_min) {
+		scale3d.is_scaled = 0;
+		scale3d.fast_up_count++;
+		if (scale3d.p_verbosity & GR3D_PRINT_BUSY)
+			pr_info("scale3d: %ld/1000 busy\n",
+				1000 - scale3d.idle_estimate);
+
+		reset_3d_clocks();
+		return;
 	}
 
-	dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
-	if (dt > scale3d.period) {
-		unsigned long idleness = (scale3d.idle_total * 100) / dt;
+	if (scale3d.p_verbosity & GR3D_PRINT_IDLE)
+		pr_info("scale3d: idle %lu/1000\n",
+			scale3d.idle_estimate);
 
-		if (scale3d.p_verbosity >= 5)
-			pr_info("scale3d: idle %lu, ~%lu%%\n",
-				scale3d.idle_total, idleness);
+	if (scale3d.idle_estimate > scale3d.idle_max) {
+		if (!scale3d.is_scaled)
+			scale3d.is_scaled = 1;
 
-		if (idleness > scale3d.idle_max) {
-			if (!scale3d.is_scaled) {
-				scale3d.is_scaled = 1;
-				scale3d.last_down = time;
-			}
-			scale3d.slow_down_count++;
-			/* if idle time is high, clock down */
-			scale3d.scale = 100 - (idleness - scale3d.idle_min);
-			schedule_work(&scale3d.work);
-		}
+		scale3d.slow_down_count++;
+		/* if idle time is high, clock down */
+		scale3d.scale =
+			100 - (scale3d.idle_estimate - scale3d.idle_min) / 10;
+		schedule_work(&scale3d.work);
+	}
+}
+
+/* the idle estimate is done by keeping 2 time stamps, initially set to the
+ * same time. Once the estimation_window time has been exceeded, one time
+ * stamp is moved up to the current time. The idle estimate is calculated
+ * based on the idle time percentage from the earlier estimate. The next time
+ * an estimation_window time is exceeded, the previous idle time and estimates
+ * are moved up - this is intended to prevent abrupt changes to the idle
+ * estimate.
+ */
+static void update_load_estimate(int idle)
+{
+	unsigned long window;
+	unsigned long t;
+
+	ktime_t now = ktime_get();
+	t = ktime_us_delta(now, scale3d.last_notification);
 
-		reset_scaling_counters(time);
+	/* if the last event was over GR3D_TIMEFRAME usec ago (1 sec), the
+	 * current load tracking data is probably stale
+	 */
+	if (t > GR3D_TIMEFRAME) {
+		scale3d.is_idle = idle;
+		scale3d.last_notification = now;
+		scale3d.estimation_window = now;
+		scale3d.last_estimation_window = now;
+		scale3d.total_idle = 0;
+		scale3d.last_total_idle = 0;
+		scale3d.idle_estimate = idle ? 1000 : 0;
+		return;
+	}
+
+	if (scale3d.is_idle) {
+		scale3d.total_idle += t;
+		scale3d.last_total_idle += t;
+	}
+
+	scale3d.is_idle = idle;
+	scale3d.last_notification = now;
+
+	window = ktime_us_delta(now, scale3d.last_estimation_window);
+	/* prevent division by 0 if events come in less than 1 usec apart */
+	if (window > 0)
+		scale3d.idle_estimate =
+			(1000 * scale3d.last_total_idle) / window;
+
+	/* move up to the last estimation window */
+	if (ktime_us_delta(now, scale3d.estimation_window) >
+		scale3d.p_estimation_window) {
+		scale3d.last_estimation_window = scale3d.estimation_window;
+		scale3d.last_total_idle = scale3d.total_idle;
+		scale3d.total_idle = 0;
+		scale3d.estimation_window = now;
 	}
 }
 
@@ -378,65 +457,226 @@ void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
 {
 	ktime_t t;
 	unsigned long dt;
+	int delay;
 
 	if (!scale3d.enable)
 		return;
 
-	mutex_lock(&scale3d.lock);
+	update_load_estimate(1);
 
 	t = ktime_get();
 
-	if (scale3d.is_idle) {
-		dt = ktime_us_delta(t, scale3d.last_idle);
-		scale3d.idle_total += dt;
-		dt = ktime_us_delta(t, scale3d.last_short_term_idle);
-		scale3d.idle_short_term_total += dt;
-	} else
-		scale3d.is_idle = 1;
+	/* if throughput hint enabled, and last hint is recent enough, return */
+	if (scale3d.p_use_throughput_hint) {
+		dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+		if (dt < GR3D_TIMEFRAME)
+			return;
+	}
 
-	scale3d.last_idle = t;
-	scale3d.last_short_term_idle = t;
+	mutex_lock(&scale3d.lock);
 
-	scaling_state_check(scale3d.last_idle);
+	scaling_state_check(t);
 
-	/* delay idle_max % of 2 * fast_response time (given in microseconds) */
-	schedule_delayed_work(&scale3d.idle_timer,
-		msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
-			/ 50000));
+	/* delay idle_max % of 2 * estimation_window (given in microseconds) */
+	delay = (scale3d.idle_max * scale3d.p_estimation_window) / 500000;
+	schedule_delayed_work(&scale3d.idle_timer, msecs_to_jiffies(delay));
 
 	mutex_unlock(&scale3d.lock);
 }
 
 void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
 {
-	unsigned long idle;
-	unsigned long short_term_idle;
 	ktime_t t;
 
 	if (!scale3d.enable)
 		return;
 
-	mutex_lock(&scale3d.lock);
-
-	cancel_delayed_work(&scale3d.idle_timer);
+	update_load_estimate(0);
 
 	t = ktime_get();
 
-	if (scale3d.is_idle) {
-		idle = (unsigned long)
-			ktime_us_delta(t, scale3d.last_idle);
-		scale3d.idle_total += idle;
-		short_term_idle =
-			ktime_us_delta(t, scale3d.last_short_term_idle);
-		scale3d.idle_short_term_total += short_term_idle;
-		scale3d.is_idle = 0;
+	/* if throughput hint enabled, and last hint is recent enough, return */
+	if (scale3d.p_use_throughput_hint) {
+		unsigned long dt;
+		dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+		if (dt < GR3D_TIMEFRAME)
+			return;
 	}
 
+	mutex_lock(&scale3d.lock);
+
+	cancel_delayed_work(&scale3d.idle_timer);
 	scaling_state_check(t);
 
 	mutex_unlock(&scale3d.lock);
 }
 
+struct score {
+	int size;		/* number of elements */
+	int pos;		/* position in ring buffer */
+	int count;		/* actual item count */
+	unsigned int sum;	/* running sum */
+	unsigned int prev;	/* previous score after 'reset' operation */
+	unsigned int list[];	/* ring buffer */
+};
+
+static struct score *score_init(int capacity)
+{
+	struct score *s;
+
+	s = kzalloc(sizeof(struct score) + capacity * sizeof(int), GFP_KERNEL);
+	if (s == NULL)
+		return NULL;
+
+	s->size = capacity;
+
+	return s;
+}
+
+static void score_delete(struct score *s)
+{
+	kfree(s);
+}
+
+#define score_get_average(s) ((s)->count ? (s)->sum / (s)->count : 0)
+
+static void score_add(struct score *s, unsigned int reading)
+{
+	if (s->count < s->size) {
+		s->sum += reading;
+		s->count++;
+	} else
+		s->sum = s->sum - s->list[s->pos] + reading;
+
+	s->list[s->pos] = reading;
+	s->pos = (s->pos + 1) % s->size;
+}
+
+
+static unsigned int score_reset(struct score *s)
+{
+	s->prev = s->sum;
+
+	s->count = 0;
+	s->pos = 0;
+	s->sum = 0;
+
+	return s->prev;
+}
+
+int freqlist_up(long target, int steps)
+{
+	int i, pos;
+
+	for (i = 0; i < scale3d.freq_count; i++)
+		if (scale3d.freqlist[i] >= target)
+			break;
+
+	pos = min(scale3d.freq_count - 1, i + steps);
+	return scale3d.freqlist[pos];
+}
+
+int freqlist_down(long target, int steps)
+{
+	int i, pos;
+
+	for (i = scale3d.freq_count - 1; i >= 0; i--)
+		if (scale3d.freqlist[i] <= target)
+			break;
+
+	pos = max(0, i - steps);
+	return scale3d.freqlist[pos];
+}
+
+static struct score *busy_history;
+static struct score *hint_history;
+
+/* When a throughput hint is given, perform scaling based on the hint and on
+ * the current idle estimation. This is done as follows:
+ *
+ * 1. On moderate loads force min frequency if the throughput hint is not too
+ *    low.
+ * 2. Otherwise, calculate target-rate = max-rate * load-percentage
+ * 3. Unless the current or average throughput hint is below the minimum
+ *    limit, in which case, choose a higher rate
+ * 4. Or the average throughput hint is above the maximum limit, in which case,
+ *    choose a lower rate.
+ */
+void nvhost_scale3d_set_throughput_hint(int hint)
+{
+	ktime_t now;
+	long busy;
+	long curr;
+	long target;
+	long dt;
+	int avg_busy, avg_hint;
+
+	if (!scale3d.enable)
+		return;
+
+	if (!scale3d.p_use_throughput_hint)
+		return;
+
+	if (scale3d.p_verbosity & GR3D_PRINT_HINT)
+		pr_info("3fds: idle %ld, hint %d\n",
+			scale3d.idle_estimate, hint);
+
+	now = ktime_get();
+	dt = ktime_us_delta(now, scale3d.last_throughput_hint);
+	if (dt > GR3D_TIMEFRAME) {
+		score_reset(busy_history);
+		score_reset(hint_history);
+	}
+
+	scale3d.last_throughput_hint = now;
+
+	busy = 1000 - scale3d.idle_estimate;
+	curr = clk_get_rate(scale3d.clk_3d);
+	target = scale3d.min_rate_3d;
+
+	score_add(busy_history, busy);
+	score_add(hint_history, hint);
+
+	avg_busy = score_get_average(busy_history);
+	avg_hint = score_get_average(hint_history);
+
+	if (busy > 0)
+		target = (curr / 1000) * busy;
+
+	/* In practice, running the gpu at min frequency is typically
+	 * sufficient to keep up performance at loads up to 70% on cases,
+	 * but the average hint value is tested to keep performance up if
+	 * needed.
+	 */
+	if (avg_busy <= scale3d.p_busy_cutoff &&
+	    avg_hint >= scale3d.p_throughput_lower_limit)
+		target = scale3d.min_rate_3d;
+	else {
+		target = (scale3d.max_rate_3d / 1000) * avg_busy;
+
+		/* Scale up if either the current hint or the running average
+		 * are below the target to prevent performance drop.
+		 */
+		if (hint <= scale3d.p_throughput_lo_limit ||
+		    avg_hint <= scale3d.p_throughput_lo_limit) {
+			if (target < curr)
+				target = curr;
+			target = freqlist_up(target, scale3d.p_scale_step);
+		} else if (avg_hint >= scale3d.p_throughput_hi_limit) {
+			if (target > curr)
+				target = curr;
+			target = freqlist_down(target, scale3d.p_scale_step);
+		}
+	}
+
+	scale_to_freq(target);
+
+	if (scale3d.p_verbosity & GR3D_PRINT_TARGET)
+		pr_info("3dfs: busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n",
+			busy, avg_busy, curr / 1000000, target, hint, avg_hint);
+}
+EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint);
+
 static void scale3d_idle_handler(struct work_struct *work)
 {
 	int notify_idle = 0;
@@ -458,19 +698,6 @@ static void scale3d_idle_handler(struct work_struct *work)
 		nvhost_scale3d_notify_idle(NULL);
 }
 
-void nvhost_scale3d_reset()
-{
-	ktime_t t;
-
-	if (!scale3d.enable)
-		return;
-
-	t = ktime_get();
-	mutex_lock(&scale3d.lock);
-	reset_scaling_counters(t);
-	mutex_unlock(&scale3d.lock);
-}
-
 /*
  * debugfs parameters to control 3d clock scaling
  */
@@ -495,13 +722,17 @@ void nvhost_scale3d_debug_init(struct dentry *de)
 		} \
 	} while (0)
 
-	CREATE_SCALE3D_FILE(fast_response);
+	CREATE_SCALE3D_FILE(estimation_window);
 	CREATE_SCALE3D_FILE(idle_min);
 	CREATE_SCALE3D_FILE(idle_max);
-	CREATE_SCALE3D_FILE(period);
 	CREATE_SCALE3D_FILE(adjust);
 	CREATE_SCALE3D_FILE(scale_emc);
 	CREATE_SCALE3D_FILE(emc_dip);
+	CREATE_SCALE3D_FILE(use_throughput_hint);
+	CREATE_SCALE3D_FILE(throughput_hi_limit);
+	CREATE_SCALE3D_FILE(throughput_lo_limit);
+	CREATE_SCALE3D_FILE(throughput_lower_limit);
+	CREATE_SCALE3D_FILE(scale_step);
 	CREATE_SCALE3D_FILE(verbosity);
 #undef CREATE_SCALE3D_FILE
 }
@@ -532,12 +763,17 @@ static ssize_t enable_3d_scaling_store(struct device *dev,
 static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
 	enable_3d_scaling_show, enable_3d_scaling_store);
 
+#define MAX_FREQ_COUNT 0x40 /* 64 frequencies should be enough for anyone */
+
 void nvhost_scale3d_init(struct nvhost_device *d)
 {
 	if (!scale3d.init) {
 		int error;
 		unsigned long max_emc, min_emc;
 		long correction;
+		long rate;
+		int freqs[MAX_FREQ_COUNT];
+
 		mutex_init(&scale3d.lock);
 
 		INIT_WORK(&scale3d.work, scale3d_clocks_handler);
@@ -632,30 +868,74 @@ void nvhost_scale3d_init(struct nvhost_device *d)
 				POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
 		scale3d.emc_dip_offset -= correction;
 
+		scale3d.is_idle = 1;
+
 		/* set scaling parameter defaults */
 		scale3d.enable = 1;
-		scale3d.period = scale3d.p_period = 100000;
-		scale3d.idle_min = scale3d.p_idle_min = 10;
-		scale3d.idle_max = scale3d.p_idle_max = 15;
-		scale3d.fast_response = scale3d.p_fast_response = 7000;
+		scale3d.idle_min = scale3d.p_idle_min = 100;
+		scale3d.idle_max = scale3d.p_idle_max = 150;
 		scale3d.p_scale_emc = 1;
 		scale3d.p_emc_dip = 1;
 		scale3d.p_verbosity = 0;
 		scale3d.p_adjust = 1;
+		scale3d.p_use_throughput_hint = 1;
+		scale3d.p_throughput_lower_limit = 940;
+		scale3d.p_throughput_lo_limit = 990;
+		scale3d.p_throughput_hi_limit = 1010;
+		scale3d.p_scale_step = 1;
+		scale3d.p_estimation_window = 8000;
+		scale3d.p_busy_cutoff = 750;
 
 		error = device_create_file(&d->dev,
 				&dev_attr_enable_3d_scaling);
 		if (error)
 			dev_err(&d->dev, "failed to create sysfs attributes");
 
+		rate = 0;
+		scale3d.freq_count = 0;
+		while (rate <= scale3d.max_rate_3d) {
+			long rounded_rate;
+			if (unlikely(scale3d.freq_count == MAX_FREQ_COUNT)) {
+				pr_err("%s: too many frequencies\n", __func__);
+				break;
+			}
+			rounded_rate =
+				clk_round_rate(scale3d.clk_3d, rate);
+			freqs[scale3d.freq_count++] = rounded_rate;
+			rate = rounded_rate + 2000;
+		}
+		scale3d.freqlist =
+			kmalloc(scale3d.freq_count * sizeof(int), GFP_KERNEL);
+		if (scale3d.freqlist == NULL)
+			pr_err("%s: can\'t allocate freq table\n", __func__);
+
+		memcpy(scale3d.freqlist, freqs,
+			scale3d.freq_count * sizeof(int));
+
+		busy_history = score_init(GR3D_FRAME_SPAN);
+		if (busy_history == NULL)
+			pr_err("%s: can\'t init load tracking array\n",
+			       __func__);
+
+		hint_history = score_init(GR3D_FRAME_SPAN);
+		if (hint_history == NULL)
+			pr_err("%s: can\'t init throughput tracking array\n",
+			       __func__);
+
 		scale3d.init = 1;
 	}
-
-	nvhost_scale3d_reset();
 }
 
 void nvhost_scale3d_deinit(struct nvhost_device *dev)
 {
 	device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
 	scale3d.init = 0;
+	if (scale3d.freqlist != NULL) {
+		kfree(scale3d.freqlist);
+		scale3d.freq_count = 0;
+		scale3d.freqlist = NULL;
+	}
+
+	score_delete(busy_history);
+	score_delete(hint_history);
 }
diff --git a/drivers/video/tegra/host/host1x/host1x.c b/drivers/video/tegra/host/host1x/host1x.c
index 33ebc1ff5d22..31899c78065b 100644
--- a/drivers/video/tegra/host/host1x/host1x.c
+++ b/drivers/video/tegra/host/host1x/host1x.c
@@ -308,6 +308,19 @@ static int power_off_host(struct nvhost_device *dev)
 	return 0;
 }
 
+static void clock_on_host(struct nvhost_device *dev)
+{
+	struct nvhost_master *host = nvhost_get_drvdata(dev);
+	nvhost_intr_start(&host->intr, clk_get_rate(dev->clk[0]));
+}
+
+static int clock_off_host(struct nvhost_device *dev)
+{
+	struct nvhost_master *host = nvhost_get_drvdata(dev);
+	nvhost_intr_stop(&host->intr);
+	return 0;
+}
+
 static int __devinit nvhost_user_init(struct nvhost_master *host)
 {
 	int err, devno;
@@ -516,6 +529,8 @@ static struct nvhost_driver nvhost_driver = {
 	},
 	.finalize_poweron = power_on_host,
 	.prepare_poweroff = power_off_host,
+	.finalize_clockon = clock_on_host,
+	.prepare_clockoff = clock_off_host,
 };
 
 static int __init nvhost_mod_init(void)
diff --git a/drivers/video/tegra/host/host1x/host1x_cdma.c b/drivers/video/tegra/host/host1x/host1x_cdma.c
index 2e7ff5783a37..5a29ff652efe 100644
--- a/drivers/video/tegra/host/host1x/host1x_cdma.c
+++ b/drivers/video/tegra/host/host1x/host1x_cdma.c
@@ -233,12 +233,15 @@ static void cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr,
 	/* after CPU incr, ensure shadow is up to date */
 	nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id);
 
-	/* update WAITBASE_3D by same number of incrs */
-	if (waitbases) {
+	/* Synchronize wait bases. 2D wait bases are synchronized with
+	 * syncpoint 19. Hence wait bases are not updated when syncptid=18. */
+
+	if (cdma->timeout.syncpt_id != NVSYNCPT_2D_0 && waitbases) {
 		void __iomem *p;
 		p = dev->sync_aperture + host1x_sync_syncpt_base_0_r() +
-				(ffs(waitbases) * sizeof(u32));
+				(__ffs(waitbases) * sizeof(u32));
 		writel(syncval, p);
+		dev->syncpt.base_val[__ffs(waitbases)] = syncval;
 	}
 
 	/* NOP all the PB slots */
@@ -486,7 +489,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 	/* stop HW, resetting channel/module */
 	cdma_op().timeout_teardown_begin(cdma);
 
-	nvhost_cdma_update_sync_queue(cdma, sp, dev->dev);
+	nvhost_cdma_update_sync_queue(cdma, sp, ch->dev);
 	mutex_unlock(&cdma->lock);
 }
 
diff --git a/drivers/video/tegra/host/host1x/host1x_channel.c b/drivers/video/tegra/host/host1x/host1x_channel.c
index 9e9fc25dc966..0274413ff698 100644
--- a/drivers/video/tegra/host/host1x/host1x_channel.c
+++ b/drivers/video/tegra/host/host1x/host1x_channel.c
@@ -365,7 +365,7 @@ static int host1x_channel_read_3d_reg(
 		if (hwctx_to_save) {
 			syncpt_incrs += hwctx_to_save->save_incrs;
 			hwctx_to_save->hwctx.valid = true;
-			channel->ctxhandler->get(&hwctx_to_save->hwctx);
+			nvhost_job_get_hwctx(job, &hwctx_to_save->hwctx);
 		}
 		channel->cur_ctx = hwctx;
 		if (channel->cur_ctx && channel->cur_ctx->valid) {
@@ -470,7 +470,8 @@ static int host1x_channel_read_3d_reg(
 	wait_event(wq,
 		nvhost_syncpt_is_expired(&nvhost_get_host(channel->dev)->syncpt,
 				p->syncpt, syncval - 2));
-	nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, ref);
+	nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, p->syncpt,
+			ref);
 
 	/* Read the register value from FIFO */
 	err = host1x_drain_read_fifo(channel, value, 1, &pending);
@@ -580,7 +581,6 @@ static int host1x_save_context(struct nvhost_channel *ch)
 	}
 
 	hwctx_to_save->valid = true;
-	ch->ctxhandler->get(hwctx_to_save);
 	ch->cur_ctx = NULL;
 	syncpt_id = to_host1x_hwctx_handler(hwctx_to_save->h)->syncpt;
 
@@ -623,7 +623,7 @@ static int host1x_save_context(struct nvhost_channel *ch)
 		nvhost_syncpt_is_expired(&nvhost_get_host(ch->dev)->syncpt,
 				syncpt_id, syncpt_val));
 
-	nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, ref);
+	nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, syncpt_id, ref);
 
 	nvhost_cdma_update(&ch->cdma);
 
diff --git a/drivers/video/tegra/host/host1x/host1x_intr.c b/drivers/video/tegra/host/host1x/host1x_intr.c
index 62fd07cbb9ba..facb818a0c24 100644
--- a/drivers/video/tegra/host/host1x/host1x_intr.c
+++ b/drivers/video/tegra/host/host1x/host1x_intr.c
@@ -131,6 +131,16 @@ static void t20_intr_enable_syncpt_intr(struct nvhost_intr *intr, u32 id)
 			BIT_WORD(id) * REGISTER_STRIDE);
 }
 
+static void t20_intr_disable_syncpt_intr(struct nvhost_intr *intr, u32 id)
+{
+	struct nvhost_master *dev = intr_to_dev(intr);
+	void __iomem *sync_regs = dev->sync_aperture;
+
+	writel(BIT_MASK(id), sync_regs +
+			host1x_sync_syncpt_thresh_int_disable_r() +
+			BIT_WORD(id) * REGISTER_STRIDE);
+}
+
 static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr)
 {
 	struct nvhost_master *dev = intr_to_dev(intr);
@@ -140,7 +150,7 @@ static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr)
 	for (reg = 0; reg <= BIT_WORD(dev->info.nb_pts) * REGISTER_STRIDE;
 			reg += REGISTER_STRIDE) {
 		/* disable interrupts for both cpu's */
-		writel(0, sync_regs +
+		writel(0xffffffffu, sync_regs +
 				host1x_sync_syncpt_thresh_int_disable_r() +
 				reg);
 
@@ -276,6 +286,7 @@ static const struct nvhost_intr_ops host1x_intr_ops = {
 	.set_host_clocks_per_usec = t20_intr_set_host_clocks_per_usec,
 	.set_syncpt_threshold = t20_intr_set_syncpt_threshold,
 	.enable_syncpt_intr = t20_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = t20_intr_disable_syncpt_intr,
 	.disable_all_syncpt_intrs = t20_intr_disable_all_syncpt_intrs,
 	.request_host_general_irq = t20_intr_request_host_general_irq,
 	.free_host_general_irq = t20_intr_free_host_general_irq,
diff --git a/drivers/video/tegra/host/mpe/mpe.c b/drivers/video/tegra/host/mpe/mpe.c
index c738700469c6..d76ee0108eef 100644
--- a/drivers/video/tegra/host/mpe/mpe.c
+++ b/drivers/video/tegra/host/mpe/mpe.c
@@ -212,7 +212,7 @@ struct save_info {
 	unsigned int restore_count;
 };
 
-static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	/* MPE: when done, increment syncpt to base+1 */
 	ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, 0, 0);
@@ -229,7 +229,7 @@ static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
 }
 #define SAVE_BEGIN_SIZE 5
 
-static void __init save_direct(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct(u32 *ptr, u32 start_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
 					host1x_uclass_indoff_r(), 1);
@@ -239,7 +239,7 @@ static void __init save_direct(u32 *ptr, u32 start_reg, u32 count)
 }
 #define SAVE_DIRECT_SIZE 3
 
-static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
+static void save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID,
 					cmd_reg, 1);
@@ -247,7 +247,7 @@ static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
 }
 #define SAVE_SET_RAM_CMD_SIZE 2
 
-static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
+static void save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
 {
 	ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
 					host1x_uclass_indoff_r(), 1);
@@ -261,7 +261,7 @@ static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
 }
 #define SAVE_READ_RAM_DATA_NASTY_SIZE 5
 
-static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_end(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	/* Wait for context read service to finish (cpu incr 3) */
 	ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
@@ -275,7 +275,7 @@ static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr)
 }
 #define SAVE_END_SIZE 5
 
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
 			const struct hwctx_reginfo *regs,
 			unsigned int nr_regs)
 {
@@ -304,7 +304,7 @@ static void __init setup_save_regs(struct save_info *info,
 	info->restore_count = restore_count;
 }
 
-static void __init setup_save_ram_nasty(struct save_info *info,	unsigned words,
+static void setup_save_ram_nasty(struct save_info *info,	unsigned words,
 					unsigned cmd_reg, unsigned data_reg)
 {
 	u32 *ptr = info->ptr;
@@ -330,7 +330,7 @@ static void __init setup_save_ram_nasty(struct save_info *info,	unsigned words,
 	info->restore_count = restore_count;
 }
 
-static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
 {
 	struct save_info info = {
 		ptr,
@@ -553,7 +553,7 @@ struct nvhost_hwctx_handler *nvhost_mpe_ctxhandler_init(u32 syncpt,
 
 	p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32,
 				mem_mgr_flag_write_combine);
-	if (IS_ERR(p->save_buf)) {
+	if (IS_ERR_OR_NULL(p->save_buf)) {
 		p->save_buf = NULL;
 		return NULL;
 	}
diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c
index 06005c423a21..5bde55ad2ff5 100644
--- a/drivers/video/tegra/host/nvhost_acm.c
+++ b/drivers/video/tegra/host/nvhost_acm.c
@@ -101,8 +101,17 @@ void nvhost_module_reset(struct nvhost_device *dev)
 
 static void to_state_clockgated_locked(struct nvhost_device *dev)
 {
+	struct nvhost_driver *drv = to_nvhost_driver(dev->dev.driver);
+
 	if (dev->powerstate == NVHOST_POWER_STATE_RUNNING) {
-		int i;
+		int i, err;
+		if (drv->prepare_clockoff) {
+			err = drv->prepare_clockoff(dev);
+			if (err) {
+				dev_err(&dev->dev, "error clock gating");
+				return;
+			}
+		}
 		for (i = 0; i < dev->num_clks; i++)
 			clk_disable(dev->clk[i]);
 		if (dev->dev.parent)
@@ -141,6 +150,14 @@ static void to_state_running_locked(struct nvhost_device *dev)
 			}
 		}
 
+		/* Invoke callback after enabling clock. This is used for
+		 * re-enabling host1x interrupts. */
+		if (prev_state == NVHOST_POWER_STATE_CLOCKGATED
+				&& drv->finalize_clockon)
+			drv->finalize_clockon(dev);
+
+		/* Invoke callback after power un-gating. This is used for
+		 * restoring context. */
 		if (prev_state == NVHOST_POWER_STATE_POWERGATED
 				&& drv->finalize_poweron)
 			drv->finalize_poweron(dev);
@@ -343,15 +360,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv)
 {
 	int i;
 	struct nvhost_module_client *m;
+	int found = 0;
 
 	mutex_lock(&client_list_lock);
 	list_for_each_entry(m, &dev->client_list, node) {
 		if (priv == m->priv) {
 			list_del(&m->node);
+			found = 1;
 			break;
 		}
 	}
-	if (m) {
+	if (found) {
 		kfree(m);
 		for (i = 0; i < dev->num_clks; i++)
 			nvhost_module_update_rate(dev, i);
diff --git a/drivers/video/tegra/host/nvhost_intr.c b/drivers/video/tegra/host/nvhost_intr.c
index 38a04f151e87..9788d32bd4a9 100644
--- a/drivers/video/tegra/host/nvhost_intr.c
+++ b/drivers/video/tegra/host/nvhost_intr.c
@@ -210,7 +210,9 @@ static int process_wait_list(struct nvhost_intr *intr,
 	remove_completed_waiters(&syncpt->wait_head, threshold, completed);
 
 	empty = list_empty(&syncpt->wait_head);
-	if (!empty)
+	if (empty)
+		intr_op().disable_syncpt_intr(intr, syncpt->id);
+	else
 		reset_threshold_interrupt(intr, &syncpt->wait_head,
 					  syncpt->id);
 
@@ -327,14 +329,20 @@ void *nvhost_intr_alloc_waiter()
 			GFP_KERNEL|__GFP_REPEAT);
 }
 
-void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref)
+void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref)
 {
 	struct nvhost_waitlist *waiter = ref;
+	struct nvhost_intr_syncpt *syncpt;
+	struct nvhost_master *host = intr_to_dev(intr);
 
 	while (atomic_cmpxchg(&waiter->state,
 				WLS_PENDING, WLS_CANCELLED) == WLS_REMOVED)
 		schedule();
 
+	syncpt = intr->syncpt + id;
+	(void)process_wait_list(intr, syncpt,
+				nvhost_syncpt_update_min(&host->syncpt, id));
+
 	kref_put(&waiter->refcount, waiter_release);
 }
 
diff --git a/drivers/video/tegra/host/nvhost_intr.h b/drivers/video/tegra/host/nvhost_intr.h
index cf0b6b9e8934..d4a6157eced1 100644
--- a/drivers/video/tegra/host/nvhost_intr.h
+++ b/drivers/video/tegra/host/nvhost_intr.h
@@ -104,7 +104,7 @@ void *nvhost_intr_alloc_waiter(void);
  * You must call this if you passed non-NULL as ref.
  * @ref the ref returned from nvhost_intr_add_action()
  */
-void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref);
+void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref);
 
 int nvhost_intr_init(struct nvhost_intr *intr, u32 irq_gen, u32 irq_sync);
 void nvhost_intr_deinit(struct nvhost_intr *intr);
diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c
index f93d7df1a552..f0f7e64d4504 100644
--- a/drivers/video/tegra/host/nvhost_job.c
+++ b/drivers/video/tegra/host/nvhost_job.c
@@ -34,19 +34,27 @@
 /* Magic to use to fill freed handle slots */
 #define BAD_MAGIC 0xdeadbeef
 
-static int job_size(struct nvhost_submit_hdr_ext *hdr)
+static size_t job_size(struct nvhost_submit_hdr_ext *hdr)
 {
-	int num_relocs = hdr ? hdr->num_relocs : 0;
-	int num_waitchks = hdr ? hdr->num_waitchks : 0;
-	int num_cmdbufs = hdr ? hdr->num_cmdbufs : 0;
-	int num_unpins = num_cmdbufs + num_relocs;
+	s64 num_relocs = hdr ? (int)hdr->num_relocs : 0;
+	s64 num_waitchks = hdr ? (int)hdr->num_waitchks : 0;
+	s64 num_cmdbufs = hdr ? (int)hdr->num_cmdbufs : 0;
+	s64 num_unpins = num_cmdbufs + num_relocs;
+	s64 total;
 
-	return sizeof(struct nvhost_job)
+	if(num_relocs < 0 || num_waitchks < 0 || num_cmdbufs < 0)
+		return 0;
+
+	total = sizeof(struct nvhost_job)
 			+ num_relocs * sizeof(struct nvhost_reloc)
 			+ num_relocs * sizeof(struct nvhost_reloc_shift)
 			+ num_unpins * sizeof(struct mem_handle *)
 			+ num_waitchks * sizeof(struct nvhost_waitchk)
 			+ num_cmdbufs * sizeof(struct nvhost_job_gather);
+
+	if(total > ULONG_MAX)
+		return 0;
+	return (size_t)total;
 }
 
 static void init_fields(struct nvhost_job *job,
@@ -63,7 +71,11 @@ static void init_fields(struct nvhost_job *job,
 	job->priority = priority;
 	job->clientid = clientid;
 
-	/* Redistribute memory to the structs */
+	/*
+	 * Redistribute memory to the structs.
+	 * Overflows and negative conditions have
+	 * already been checked in job_alloc().
+	 */
 	mem += sizeof(struct nvhost_job);
 	job->relocarray = num_relocs ? mem : NULL;
 	mem += num_relocs * sizeof(struct nvhost_reloc);
@@ -91,8 +103,11 @@ struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch,
 		int clientid)
 {
 	struct nvhost_job *job = NULL;
+	size_t size = job_size(hdr);
 
-	job = vzalloc(job_size(hdr));
+	if(!size)
+		goto error;
+	job = vzalloc(size);
 	if (!job)
 		goto error;
 
diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c
index 9fa7d0652c1f..38c28ca116e7 100644
--- a/drivers/video/tegra/host/nvhost_syncpt.c
+++ b/drivers/video/tegra/host/nvhost_syncpt.c
@@ -235,7 +235,7 @@ int nvhost_syncpt_wait_timeout(struct nvhost_syncpt *sp, u32 id,
 			check_count++;
 		}
 	}
-	nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), ref);
+	nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), id, ref);
 
 done:
 	nvhost_module_idle(syncpt_to_dev(sp)->dev);
@@ -344,7 +344,7 @@ static ssize_t syncpt_min_show(struct kobject *kobj,
 	struct nvhost_syncpt_attr *syncpt_attr =
 		container_of(attr, struct nvhost_syncpt_attr, attr);
 
-	return snprintf(buf, PAGE_SIZE, "%d",
+	return snprintf(buf, PAGE_SIZE, "%u",
 			nvhost_syncpt_read(&syncpt_attr->host->syncpt,
 				syncpt_attr->id));
 }
@@ -355,7 +355,7 @@ static ssize_t syncpt_max_show(struct kobject *kobj,
 	struct nvhost_syncpt_attr *syncpt_attr =
 		container_of(attr, struct nvhost_syncpt_attr, attr);
 
-	return snprintf(buf, PAGE_SIZE, "%d",
+	return snprintf(buf, PAGE_SIZE, "%u",
 			nvhost_syncpt_read_max(&syncpt_attr->host->syncpt,
 				syncpt_attr->id));
 }
diff --git a/drivers/video/tegra/host/t30/t30.c b/drivers/video/tegra/host/t30/t30.c
index 0c8d626a4d67..334d598d5c0b 100644
--- a/drivers/video/tegra/host/t30/t30.c
+++ b/drivers/video/tegra/host/t30/t30.c
@@ -142,7 +142,7 @@ static struct nvhost_device tegra_gr2d02_device = {
 	.waitbases	= BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1),
 	.modulemutexes	= BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) |
 			  BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B),
-	.clocks		= { {"gr2d", UINT_MAX},
+	.clocks		= { {"gr2d", 0},
 			  {"epp", 0},
 			  {"emc", 300000000} },
 	NVHOST_MODULE_NO_POWERGATE_IDS,
diff --git a/drivers/video/tegra/nvmap/nvmap.c b/drivers/video/tegra/nvmap/nvmap.c
index b7fd695d04ee..a0dcf26337f8 100644
--- a/drivers/video/tegra/nvmap/nvmap.c
+++ b/drivers/video/tegra/nvmap/nvmap.c
@@ -3,7 +3,7 @@
  *
  * Memory manager for Tegra GPU
  *
- * Copyright (c) 2009-2011, NVIDIA Corporation.
+ * Copyright (c) 2009-2012, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -271,7 +271,7 @@ int nvmap_pin_ids(struct nvmap_client *client,
 	 * if the caller crashes after pinning a global handle, the handle
 	 * will be permanently leaked. */
 	nvmap_ref_lock(client);
-	for (i = 0; i < nr && !ret; i++) {
+	for (i = 0; i < nr; i++) {
 		ref = _nvmap_validate_id_locked(client, ids[i]);
 		if (ref) {
 			atomic_inc(&ref->pin);
@@ -280,19 +280,19 @@ int nvmap_pin_ids(struct nvmap_client *client,
 			struct nvmap_handle *verify;
 			nvmap_ref_unlock(client);
 			verify = nvmap_validate_get(client, ids[i]);
-			if (verify)
+			if (verify) {
 				nvmap_warn(client, "%s pinning unreferenced "
 					   "handle %p\n",
 					   current->group_leader->comm, h[i]);
-			else
+			} else {
+				h[i] = NULL;
 				ret = -EPERM;
+			}
 			nvmap_ref_lock(client);
 		}
 	}
 	nvmap_ref_unlock(client);
 
-	nr = i;
-
 	if (ret)
 		goto out;
 
@@ -317,6 +317,9 @@ out:
 	if (ret) {
 		nvmap_ref_lock(client);
 		for (i = 0; i < nr; i++) {
+			if(!ids[i])
+				continue;
+
 			ref = _nvmap_validate_id_locked(client, ids[i]);
 			if (!ref) {
 				nvmap_warn(client, "%s freed handle %p "
@@ -330,7 +333,8 @@ out:
 		nvmap_ref_unlock(client);
 
 		for (i = 0; i < nr; i++)
-			nvmap_handle_put(h[i]);
+			if(h[i])
+				nvmap_handle_put(h[i]);
 	}
 
 	return ret;
diff --git a/drivers/video/tegra/nvmap/nvmap_common.h b/drivers/video/tegra/nvmap/nvmap_common.h
index 6da010720bb2..2a1e2b4b3c4b 100644
--- a/drivers/video/tegra/nvmap/nvmap_common.h
+++ b/drivers/video/tegra/nvmap/nvmap_common.h
@@ -23,7 +23,8 @@
 extern void v7_flush_kern_cache_all(void *);
 extern void v7_clean_kern_cache_all(void *);
 
-#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD (8 * PAGE_SIZE)
+#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER (8 * PAGE_SIZE)
+#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_OUTER (1024 * 1024)
 
 static inline void inner_flush_cache_all(void)
 {
diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c
index 98b0bcc18ba5..0c12348db88e 100644
--- a/drivers/video/tegra/nvmap/nvmap_dev.c
+++ b/drivers/video/tegra/nvmap/nvmap_dev.c
@@ -290,7 +290,7 @@ int nvmap_flush_heap_block(struct nvmap_client *client,
 	if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
 		goto out;
 
-	if (len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) {
+	if (len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER) {
 		inner_flush_cache_all();
 		if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
 			outer_flush_range(block->base, block->base + len);
@@ -886,10 +886,11 @@ static void nvmap_vma_open(struct vm_area_struct *vma)
 	struct nvmap_vma_priv *priv;
 
 	priv = vma->vm_private_data;
-
 	BUG_ON(!priv);
 
 	atomic_inc(&priv->count);
+	if(priv->handle)
+		nvmap_usecount_inc(priv->handle);
 }
 
 static void nvmap_vma_close(struct vm_area_struct *vma)
@@ -898,8 +899,8 @@ static void nvmap_vma_close(struct vm_area_struct *vma)
 
 	if (priv) {
 		if (priv->handle) {
+			BUG_ON(priv->handle->usecount == 0);
 			nvmap_usecount_dec(priv->handle);
-			BUG_ON(priv->handle->usecount < 0);
 		}
 		if (!atomic_dec_return(&priv->count)) {
 			if (priv->handle)
diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c
index 05046ed8ba79..4b7760b22190 100644
--- a/drivers/video/tegra/nvmap/nvmap_handle.c
+++ b/drivers/video/tegra/nvmap/nvmap_handle.c
@@ -3,7 +3,7 @@
  *
  * Handle allocation and freeing routines for nvmap
  *
- * Copyright (c) 2009-2012, NVIDIA Corporation.
+ * Copyright (c) 2009-2012, NVIDIA CORPORATION. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,6 +36,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/outercache.h>
+#include <asm/tlbflush.h>
 #include <asm/pgtable.h>
 
 #include <mach/iovmm.h>
@@ -56,7 +57,7 @@
  * the kernel (i.e., not a carveout handle) includes its array of pages. to
  * preserve kmalloc space, if the array of pages exceeds PAGELIST_VMALLOC_MIN,
  * the array is allocated using vmalloc. */
-#define PAGELIST_VMALLOC_MIN	(PAGE_SIZE * 2)
+#define PAGELIST_VMALLOC_MIN	(PAGE_SIZE)
 
 #ifdef CONFIG_NVMAP_PAGE_POOLS
 
@@ -85,8 +86,11 @@ static struct page *nvmap_page_pool_alloc_locked(struct nvmap_page_pool *pool)
 {
 	struct page *page = NULL;
 
-	if (pool->npages > 0)
+	if (pool->npages > 0) {
 		page = pool->page_array[--pool->npages];
+		atomic_dec(&page->_count);
+		BUG_ON(atomic_read(&page->_count) != 1);
+	}
 	return page;
 }
 
@@ -107,7 +111,9 @@ static bool nvmap_page_pool_release_locked(struct nvmap_page_pool *pool,
 {
 	int ret = false;
 
+	BUG_ON(atomic_read(&page->_count) != 1);
 	if (enable_pp && pool->npages < pool->max_pages) {
+		atomic_inc(&page->_count);
 		pool->page_array[pool->npages++] = page;
 		ret = true;
 	}
@@ -134,6 +140,7 @@ static int nvmap_page_pool_get_available_count(struct nvmap_page_pool *pool)
 
 static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free)
 {
+	int err;
 	int i = nr_free;
 	int idx = 0;
 	struct page *page;
@@ -149,8 +156,12 @@ static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free)
 		i--;
 	}
 
-	if (idx)
-		set_pages_array_wb(pool->shrink_array, idx);
+	if (idx) {
+		/* This op should never fail. */
+		err = set_pages_array_wb(pool->shrink_array, idx);
+		BUG_ON(err);
+	}
+
 	while (idx--)
 		__free_page(pool->shrink_array[idx]);
 	nvmap_page_pool_unlock(pool);
@@ -367,8 +378,9 @@ POOL_SIZE_MOUDLE_PARAM_CB(wb, NVMAP_HANDLE_CACHEABLE);
 
 int nvmap_page_pool_init(struct nvmap_page_pool *pool, int flags)
 {
-	struct page *page;
 	int i;
+	int err;
+	struct page *page;
 	static int reg = 1;
 	struct sysinfo info;
 	int highmem_pages = 0;
@@ -431,7 +443,8 @@ int nvmap_page_pool_init(struct nvmap_page_pool *pool, int flags)
 		s_memtype_str[flags], highmem_pages, pool->max_pages,
 		info.totalram, info.freeram, info.totalhigh, info.freehigh);
 do_cpa:
-	(*s_cpa[flags])(pool->page_array, pool->npages);
+	err = (*s_cpa[flags])(pool->page_array, pool->npages);
+	BUG_ON(err);
 	nvmap_page_pool_unlock(pool);
 	return 0;
 fail:
@@ -444,7 +457,7 @@ fail:
 
 static inline void *altalloc(size_t len)
 {
-	if (len >= PAGELIST_VMALLOC_MIN)
+	if (len > PAGELIST_VMALLOC_MIN)
 		return vmalloc(len);
 	else
 		return kmalloc(len, GFP_KERNEL);
@@ -455,7 +468,7 @@ static inline void altfree(void *ptr, size_t len)
 	if (!ptr)
 		return;
 
-	if (len >= PAGELIST_VMALLOC_MIN)
+	if (len > PAGELIST_VMALLOC_MIN)
 		vfree(ptr);
 	else
 		kfree(ptr);
@@ -463,6 +476,7 @@ static inline void altfree(void *ptr, size_t len)
 
 void _nvmap_handle_free(struct nvmap_handle *h)
 {
+	int err;
 	struct nvmap_share *share = nvmap_get_share_from_dev(h->dev);
 	unsigned int i, nr_page, page_index = 0;
 #ifdef CONFIG_NVMAP_PAGE_POOLS
@@ -506,9 +520,12 @@ void _nvmap_handle_free(struct nvmap_handle *h)
 	/* Restore page attributes. */
 	if (h->flags == NVMAP_HANDLE_WRITE_COMBINE ||
 	    h->flags == NVMAP_HANDLE_UNCACHEABLE ||
-	    h->flags == NVMAP_HANDLE_INNER_CACHEABLE)
-		set_pages_array_wb(&h->pgalloc.pages[page_index],
+	    h->flags == NVMAP_HANDLE_INNER_CACHEABLE) {
+		/* This op should never fail. */
+		err = set_pages_array_wb(&h->pgalloc.pages[page_index],
 				nr_page - page_index);
+		BUG_ON(err);
+	}
 
 skip_attr_restore:
 	if (h->pgalloc.area)
@@ -546,6 +563,7 @@ static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size)
 static int handle_page_alloc(struct nvmap_client *client,
 			     struct nvmap_handle *h, bool contiguous)
 {
+	int err = 0;
 	size_t size = PAGE_ALIGN(h->size);
 	unsigned int nr_page = size >> PAGE_SHIFT;
 	pgprot_t prot;
@@ -555,6 +573,17 @@ static int handle_page_alloc(struct nvmap_client *client,
 	struct nvmap_page_pool *pool = NULL;
 	struct nvmap_share *share = nvmap_get_share_from_dev(h->dev);
 #endif
+	gfp_t gfp = GFP_NVMAP;
+	unsigned long kaddr, paddr;
+	pte_t **pte = NULL;
+
+	if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) {
+		gfp |= __GFP_ZERO;
+		prot = nvmap_pgprot(h, pgprot_kernel);
+		pte = nvmap_alloc_pte(client->dev, (void **)&kaddr);
+		if (IS_ERR(pte))
+			return -ENOMEM;
+	}
 
 	pages = altalloc(nr_page * sizeof(*pages));
 	if (!pages)
@@ -565,7 +594,7 @@ static int handle_page_alloc(struct nvmap_client *client,
 	h->pgalloc.area = NULL;
 	if (contiguous) {
 		struct page *page;
-		page = nvmap_alloc_pages_exact(GFP_NVMAP, size);
+		page = nvmap_alloc_pages_exact(gfp, size);
 		if (!page)
 			goto fail;
 
@@ -582,12 +611,29 @@ static int handle_page_alloc(struct nvmap_client *client,
 			pages[i] = nvmap_page_pool_alloc(pool);
 			if (!pages[i])
 				break;
+			if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) {
+				/*
+				 * Just memset low mem pages; they will for
+				 * sure have a virtual address. Otherwise, build
+				 * a mapping for the page in the kernel.
+				 */
+				if (!PageHighMem(pages[i])) {
+					memset(page_address(pages[i]), 0,
+					       PAGE_SIZE);
+				} else {
+					paddr = page_to_phys(pages[i]);
+					set_pte_at(&init_mm, kaddr, *pte,
+						   pfn_pte(__phys_to_pfn(paddr),
+							   prot));
+					flush_tlb_kernel_page(kaddr);
+					memset((char *)kaddr, 0, PAGE_SIZE);
+				}
+			}
 			page_index++;
 		}
 #endif
 		for (; i < nr_page; i++) {
-			pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP,
-				PAGE_SIZE);
+			pages[i] = nvmap_alloc_pages_exact(gfp,	PAGE_SIZE);
 			if (!pages[i])
 				goto fail;
 		}
@@ -608,16 +654,21 @@ static int handle_page_alloc(struct nvmap_client *client,
 
 	/* Update the pages mapping in kernel page table. */
 	if (h->flags == NVMAP_HANDLE_WRITE_COMBINE)
-		set_pages_array_wc(&pages[page_index],
-				nr_page - page_index);
+		err = set_pages_array_wc(&pages[page_index],
+					nr_page - page_index);
 	else if (h->flags == NVMAP_HANDLE_UNCACHEABLE)
-		set_pages_array_uc(&pages[page_index],
-				nr_page - page_index);
+		err = set_pages_array_uc(&pages[page_index],
+					nr_page - page_index);
 	else if (h->flags == NVMAP_HANDLE_INNER_CACHEABLE)
-		set_pages_array_iwb(&pages[page_index],
-				nr_page - page_index);
+		err = set_pages_array_iwb(&pages[page_index],
+					nr_page - page_index);
+
+	if (err)
+		goto fail;
 
 skip_attr_change:
+	if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES)
+		nvmap_free_pte(client->dev, pte);
 	h->size = size;
 	h->pgalloc.pages = pages;
 	h->pgalloc.contig = contiguous;
@@ -625,10 +676,12 @@ skip_attr_change:
 	return 0;
 
 fail:
-	while (i--) {
-		set_pages_array_wb(&pages[i], 1);
+	if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES)
+		nvmap_free_pte(client->dev, pte);
+	err = set_pages_array_wb(pages, i);
+	BUG_ON(err);
+	while (i--)
 		__free_page(pages[i]);
-	}
 	altfree(pages, nr_page * sizeof(*pages));
 	wmb();
 	return -ENOMEM;
diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c
index a6fe78c42f87..738ba26232d3 100644
--- a/drivers/video/tegra/nvmap/nvmap_heap.c
+++ b/drivers/video/tegra/nvmap/nvmap_heap.c
@@ -3,7 +3,7 @@
  *
  * GPU heap allocator.
  *
- * Copyright (c) 2011, NVIDIA Corporation.
+ * Copyright (c) 2012, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -420,6 +420,9 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap,
 		list_for_each_entry(i, &heap->free_list, free_list) {
 			size_t fix_size;
 			fix_base = ALIGN(i->block.base, align);
+			if(!fix_base || fix_base >= i->block.base + i->size)
+				continue;
+
 			fix_size = i->size - (fix_base - i->block.base);
 
 			/* needed for compaction. relocated chunk
diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c
index 44f00d2951a0..da974b2c6846 100644
--- a/drivers/video/tegra/nvmap/nvmap_ioctl.c
+++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c
@@ -3,7 +3,7 @@
  *
  * User-space interface to nvmap
  *
- * Copyright (c) 2011, NVIDIA Corporation.
+ * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -65,10 +65,10 @@ int nvmap_ioctl_pinop(struct file *filp, bool is_pin, void __user *arg)
 		return -EINVAL;
 
 	if (op.count > 1) {
-		size_t bytes = op.count * sizeof(unsigned long *);
+		size_t bytes = op.count * sizeof(*refs); /* kcalloc below will catch overflow. */
 
 		if (op.count > ARRAY_SIZE(on_stack))
-			refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
+			refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL);
 		else
 			refs = on_stack;
 
@@ -175,6 +175,9 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg)
 	/* user-space handles are aligned to page boundaries, to prevent
 	 * data leakage. */
 	op.align = max_t(size_t, op.align, PAGE_SIZE);
+#if defined(CONFIG_NVMAP_FORCE_ZEROED_USER_PAGES)
+	op.flags |= NVMAP_HANDLE_ZEROED_PAGES;
+#endif
 
 	return nvmap_alloc_handle_id(client, op.handle, op.heap_mask,
 				     op.align, op.flags);
@@ -236,6 +239,11 @@ int nvmap_map_into_caller_ptr(struct file *filp, void __user *arg)
 	if (!h)
 		return -EPERM;
 
+	if(!h->alloc) {
+		nvmap_handle_put(h);
+		return -EFAULT;
+	}
+
 	trace_nvmap_map_into_caller_ptr(client, h, op.offset,
 					op.length, op.flags);
 	down_read(&current->mm->mmap_sem);
@@ -251,7 +259,7 @@ int nvmap_map_into_caller_ptr(struct file *filp, void __user *arg)
 		goto out;
 	}
 
-	if ((op.offset + op.length) > h->size) {
+	if (op.offset > h->size || (op.offset + op.length) > h->size) {
 		err = -EADDRNOTAVAIL;
 		goto out;
 	}
@@ -542,14 +550,32 @@ static void heap_page_cache_maint(struct nvmap_client *client,
 	}
 }
 
+static bool fast_cache_maint_outer(unsigned long start,
+		unsigned long end, unsigned int op)
+{
+	bool result = false;
+#if defined(CONFIG_NVMAP_OUTER_CACHE_MAINT_BY_SET_WAYS)
+	if (end - start >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_OUTER) {
+		if (op == NVMAP_CACHE_OP_WB_INV) {
+			outer_flush_all();
+			result = true;
+		}
+		if (op == NVMAP_CACHE_OP_WB) {
+			outer_clean_all();
+			result = true;
+		}
+	}
+#endif
+	return result;
+}
+
 static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h,
 	unsigned long start, unsigned long end, unsigned int op)
 {
 	int ret = false;
-
 #if defined(CONFIG_NVMAP_CACHE_MAINT_BY_SET_WAYS)
 	if ((op == NVMAP_CACHE_OP_INV) ||
-		((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD))
+		((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER))
 		goto out;
 
 	if (op == NVMAP_CACHE_OP_WB_INV)
@@ -557,13 +583,19 @@ static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h
 	else if (op == NVMAP_CACHE_OP_WB)
 		inner_clean_cache_all();
 
-	if (h->heap_pgalloc && (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)) {
-		heap_page_cache_maint(client, h, start, end, op,
-				false, true, NULL, 0, 0);
-	} else if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) {
-		start += h->carveout->base;
-		end += h->carveout->base;
-		outer_cache_maint(op, start, end - start);
+	/* outer maintenance */
+	if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE ) {
+		if(!fast_cache_maint_outer(start, end, op))
+		{
+			if (h->heap_pgalloc) {
+				heap_page_cache_maint(client, h, start,
+					end, op, false, true, NULL, 0, 0);
+			} else  {
+				start += h->carveout->base;
+				end += h->carveout->base;
+				outer_cache_maint(op, start, end - start);
+			}
+		}
 	}
 	ret = true;
 out:
author	Marcel Ziswiler <marcel.ziswiler@toradex.com>	2012-11-12 15:28:39 +0100
committer	Marcel Ziswiler <marcel.ziswiler@toradex.com>	2012-11-12 15:28:39 +0100
commit	f987e832a9e79d2ce8009a5ea9c7b677624b3b30 (patch)
tree	0dd09a5e6b4c60ee0a9916907dfc2cda83f3e496 /drivers/video
parent	f737b7f46a72c099cf8ac88baff02fbf61b1a47c (diff)
parent	fc993d9bc48f772133d8cd156c67c296477db070 (diff)