tegra: dc: set EMC clock dynamically

If the screen is idle (no POST for some time), reduce the DC EMC clock according the windows size. If external display connected, the EMC clock will not be reduced. BUG 828306 Change-Id: I6fb62ce6baf3380737c76b71f16e38ad6465a667 Reviewed-on: http://git-master/r/37106 Reviewed-by: Niket Sirsi <nsirsi@nvidia.com> Tested-by: Niket Sirsi <nsirsi@nvidia.com>
author: Xin Xie <xxie@nvidia.com> 2011-06-03 20:47:14 -0700
committer: Niket Sirsi <nsirsi@nvidia.com> 2011-06-24 21:30:03 -0700
commit: 08bb9eed94479a37999be169c434d561d0074c49 (patch)
tree: a0b678a3146c1084049134fe0490cff1273998c0
parent: 683092ca422d64cb7d72d70763d1100812abfd58 (diff)
5 files changed, 276 insertions, 12 deletions
diff --git a/arch/arm/mach-tegra/include/mach/dc.h b/arch/arm/mach-tegra/include/mach/dc.h
index 05ab8953366c..ebd8259e5bc6 100644
--- a/arch/arm/mach-tegra/include/mach/dc.h
+++ b/arch/arm/mach-tegra/include/mach/dc.h
@@ -432,6 +432,9 @@ u32 tegra_dc_get_syncpt_id(const struct tegra_dc *dc);
 u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc);
 void tegra_dc_incr_syncpt_min(struct tegra_dc *dc, u32 val);
 
+int tegra_dc_set_default_emc(struct tegra_dc *dc);
+int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n);
+
 /* tegra_dc_update_windows and tegra_dc_sync_windows do not support windows
  * with differenct dcs in one call
  */
diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c
index 148438df4b1d..ae2c7416115c 100644
--- a/drivers/video/tegra/dc/dc.c
+++ b/drivers/video/tegra/dc/dc.c
@@ -34,6 +34,7 @@
 #include <linux/seq_file.h>
 #include <linux/backlight.h>
 #include <linux/switch.h>
+#include <video/tegrafb.h>
 
 #include <mach/clk.h>
 #include <mach/dc.h>
@@ -50,6 +51,16 @@ static int no_vsync;
 
 module_param_named(no_vsync, no_vsync, int, S_IRUGO | S_IWUSR);
 
+static int use_dynamic_emc = 1;
+
+module_param_named(use_dynamic_emc, use_dynamic_emc, int, S_IRUGO | S_IWUSR);
+
+/* set default windows idle time as 2000ms for power saving purpose */
+static int windows_idle_detection_time = 2000;
+
+module_param_named(windows_idle_detection_time, windows_idle_detection_time,
+		   int, S_IRUGO | S_IWUSR);
+
 struct tegra_dc *tegra_dcs[TEGRA_MAX_DC];
 
 DEFINE_MUTEX(tegra_dc_lock);
@@ -446,7 +457,7 @@ static inline void tegra_dc_create_debugfs(struct tegra_dc *dc) { };
 static inline void __devexit tegra_dc_remove_debugfs(struct tegra_dc *dc) { };
 #endif /* CONFIG_DEBUGFS */
 
-static int tegra_dc_add(struct tegra_dc *dc, int index)
+static int tegra_dc_set(struct tegra_dc *dc, int index)
 {
 	int ret = 0;
 
@@ -456,7 +467,7 @@ static int tegra_dc_add(struct tegra_dc *dc, int index)
 		goto out;
 	}
 
-	if (tegra_dcs[index] != NULL) {
+	if (dc != NULL && tegra_dcs[index] != NULL) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -469,6 +480,20 @@ out:
 	return ret;
 }
 
+static unsigned int tegra_dc_has_multiple_dc(void)
+{
+	unsigned int idx;
+	unsigned int cnt = 0;
+	struct tegra_dc *dc;
+
+	mutex_lock(&tegra_dc_lock);
+	for (idx = 0; idx < TEGRA_MAX_DC; idx++)
+		cnt += ((dc = tegra_dcs[idx]) != NULL && dc->enabled) ? 1 : 0;
+	mutex_unlock(&tegra_dc_lock);
+
+	return (cnt > 1);
+}
+
 struct tegra_dc *tegra_dc_get_dc(unsigned idx)
 {
 	if (idx < TEGRA_MAX_DC)
@@ -593,6 +618,186 @@ static void tegra_dc_set_scaling_filter(struct tegra_dc *dc)
 	}
 }
 
+static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a,
+						   struct tegra_dc_win *b)
+{
+	if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b))
+		return 0;
+	return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) ||
+	       ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
+}
+
+static unsigned int tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
+						unsigned int bw[], int n)
+{
+	/* We have n windows and knows their geometries and bandwidthes. If any
+	 * of them overlapped vertically, the overlapped area bandwidth get
+	 * combined.
+	 *
+	 * This function will find the maximum bandwidth of overlapped area.
+	 * If there is no windows overlapped, then return the maximum
+	 * bandwidth of windows.
+	 */
+
+	/* We know win_2 is always overlapped with win_0 and win_1. */
+	if (tegra_dc_windows_is_overlapped(wins[0], wins[1]))
+		return bw[0] + bw[1] + bw[2];
+	else
+		return max(bw[0], bw[1]) + bw[2];
+
+}
+
+/* 8 bits per byte (1 << 3) */
+#define BIT_TO_BYTE_SHIFT 3
+/*
+ * Assuming 50% (X >> 1) efficiency: i.e. if we calculate we need 70MBps, we
+ * will request 140MBps from EMC.
+ */
+#define MEM_EFFICIENCY_SHIFT 1
+static unsigned long tegra_dc_get_emc_rate(struct tegra_dc_win *wins[], int n)
+{
+	int i;
+	unsigned int bw[TEGRA_FB_FLIP_N_WINDOWS];
+	struct tegra_dc_win *w;
+	struct tegra_dc *dc;
+	unsigned int max;
+	unsigned int ret;
+
+	dc = wins[0]->dc;
+
+	if (tegra_dc_has_multiple_dc())
+		return tegra_dc_get_default_emc_clk_rate(dc);
+
+	BUG_ON(n > ARRAY_SIZE(bw));
+	/*
+	 * Calculate peak EMC bandwidth for each enabled window =
+	 * pixel_clock * win_bpp * (use_v_filter ? 2 : 1)) * H_scale_factor *
+	 * (windows_tiling ? 2 : 1)
+	 *
+	 *
+	 * note:
+	 * (*) We use 2 tap V filter, so need double BW if use V filter
+	 * (*) Tiling mode on T30 and DDR3 requires double BW
+	 */
+	for (i = 0; w = wins[i], bw[i] = 0, i < n; i++) {
+		if (!WIN_IS_ENABLED(w))
+			continue;
+		bw[i] = dc->mode.pclk *
+			(tegra_dc_fmt_bpp(w->fmt) >> BIT_TO_BYTE_SHIFT) *
+			(WIN_USE_V_FILTER(w) ? 2 : 1) /
+			w->out_w * w->w *
+			(WIN_IS_TILED(w) ? TILED_WINDOWS_BW_MULTIPLIER : 1);
+	}
+
+	max = tegra_dc_find_max_bandwidth(wins, bw, n) << MEM_EFFICIENCY_SHIFT;
+
+	ret = EMC_BW_TO_FREQ(max);
+
+	/*
+	 * If the calculated peak BW is bigger than board specified BW, then
+	 * either the above calculation is wrong, or board specified BW is
+	 * wrong.
+	 */
+	WARN_ON(ret > tegra_dc_get_default_emc_clk_rate(dc));
+
+	return ret;
+}
+#undef BIT_TO_BYTE_SHIFT
+#undef MEM_EFFICIENCY_SHIFT
+
+static void tegra_dc_change_emc(struct tegra_dc *dc)
+{
+	if (dc->emc_clk_rate != dc->new_emc_clk_rate) {
+		dc->emc_clk_rate = dc->new_emc_clk_rate;
+		clk_set_rate(dc->emc_clk, dc->emc_clk_rate);
+	}
+}
+
+static void tegra_dc_reduce_emc_worker(struct work_struct *work)
+{
+	struct tegra_dc *dc;
+
+	dc = container_of(to_delayed_work(work), struct tegra_dc,
+	    reduce_emc_clk_work);
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return;
+	}
+
+	tegra_dc_change_emc(dc);
+
+	mutex_unlock(&dc->lock);
+}
+
+int  tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n)
+{
+	unsigned long new_rate;
+	struct tegra_dc *dc;
+
+	if (!use_dynamic_emc)
+		return 0;
+
+	dc = windows[0]->dc;
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return -EFAULT;
+	}
+
+	/* calculate the new rate based on this POST */
+	new_rate = tegra_dc_get_emc_rate(windows, n);
+
+	dc->new_emc_clk_rate = new_rate;
+
+	/*
+	 * If we don't need set EMC immediately after a frame POST, we schedule
+	 * a work_queue to reduce EMC in the future. This work_queue task will
+	 * not be executed if the another POST comes before the idle time
+	 * expired.
+	 */
+	if (NEED_UPDATE_EMC_ON_EVERY_FRAME)
+		tegra_dc_change_emc(dc);
+	else
+		schedule_delayed_work(&dc->reduce_emc_clk_work,
+			msecs_to_jiffies(windows_idle_detection_time));
+
+	mutex_unlock(&dc->lock);
+
+	return 0;
+}
+
+int  tegra_dc_set_default_emc(struct tegra_dc *dc)
+{
+	/*
+	 * POST happens whenever this function is called, we first delete any
+	 * reduce_emc_clk_work, then we always set the DC EMC clock to default
+	 * value.
+	 */
+	cancel_delayed_work_sync(&dc->reduce_emc_clk_work);
+
+	if (NEED_UPDATE_EMC_ON_EVERY_FRAME)
+		return 0;
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return -EFAULT;
+	}
+
+	dc->new_emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+	tegra_dc_change_emc(dc);
+
+	mutex_unlock(&dc->lock);
+
+	return 0;
+}
+
 /* does not support updating windows on multiple dcs in one call */
 int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 {
@@ -643,7 +848,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		if (!no_vsync)
 			update_mask |= WIN_A_ACT_REQ << win->idx;
 
-		if (!(win->flags & TEGRA_WIN_FLAG_ENABLED)) {
+		if (!WIN_IS_ENABLED(win)) {
 			tegra_dc_writel(dc, 0, DC_WIN_WIN_OPTIONS);
 			continue;
 		}
@@ -707,7 +912,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
 		tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
 
-		if (win->flags & TEGRA_WIN_FLAG_TILED)
+		if (WIN_IS_TILED(win))
 			tegra_dc_writel(dc,
 					DC_WIN_BUFFER_ADDR_MODE_TILE |
 					DC_WIN_BUFFER_ADDR_MODE_TILE_UV,
@@ -724,9 +929,9 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		else if (tegra_dc_fmt_bpp(win->fmt) < 24)
 			val |= COLOR_EXPAND;
 
-		if (win->w != win->out_w)
+		if (WIN_USE_H_FILTER(win))
 			val |= H_FILTER_ENABLE;
-		if (win->h != win->out_h)
+		if (WIN_USE_V_FILTER(win))
 			val |= V_FILTER_ENABLE;
 
 		if (invert_h)
@@ -1870,7 +2075,6 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 	void __iomem *base;
 	int irq;
 	int i;
-	unsigned long emc_clk_rate;
 
 	if (!ndev->dev.platform_data) {
 		dev_err(&ndev->dev, "no platform data\n");
@@ -1929,6 +2133,8 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 
 	dc->clk = clk;
 	dc->emc_clk = emc_clk;
+	INIT_DELAYED_WORK(&dc->reduce_emc_clk_work, tegra_dc_reduce_emc_worker);
+
 	dc->base_res = base_res;
 	dc->base = base;
 	dc->irq = irq;
@@ -1939,8 +2145,8 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 	 * The emc is a shared clock, it will be set based on
 	 * the requirements for each user on the bus.
 	 */
-	emc_clk_rate = dc->pdata->emc_clk_rate;
-	clk_set_rate(emc_clk, emc_clk_rate ? emc_clk_rate : ULONG_MAX);
+	dc->emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+	clk_set_rate(emc_clk, dc->emc_clk_rate);
 
 	if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED)
 		dc->enabled = true;
@@ -1969,7 +2175,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 	/* hack to balance enable_irq calls in _tegra_dc_enable() */
 	disable_dc_irq(dc->irq);
 
-	ret = tegra_dc_add(dc, ndev->id);
+	ret = tegra_dc_set(dc, ndev->id);
 	if (ret < 0) {
 		dev_err(&ndev->dev, "can't add dc\n");
 		goto err_free_irq;
@@ -2076,6 +2282,7 @@ static int tegra_dc_remove(struct nvhost_device *ndev)
 	if (dc->fb_mem)
 		release_resource(dc->base_res);
 	kfree(dc);
+	tegra_dc_set(NULL, ndev->id);
 	return 0;
 }
 
diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h
index d9488a93f986..b3f6eb53c38a 100644
--- a/drivers/video/tegra/dc/dc_priv.h
+++ b/drivers/video/tegra/dc/dc_priv.h
@@ -27,6 +27,34 @@
 
 #include "../host/dev.h"
 
+#define WIN_IS_TILED(win)	((win)->flags & TEGRA_WIN_FLAG_TILED)
+#define WIN_IS_ENABLED(win)	((win)->flags & TEGRA_WIN_FLAG_ENABLED)
+#define WIN_USE_V_FILTER(win)	((win)->h != (win)->out_h)
+#define WIN_USE_H_FILTER(win)	((win)->w != (win)->out_w)
+
+#define NEED_UPDATE_EMC_ON_EVERY_FRAME (windows_idle_detection_time == 0)
+
+/* DDR: 8 bytes transfer per clock */
+#define DDR_BW_TO_FREQ(bw) ((bw) / 8)
+
+#if defined(CONFIG_TEGRA_EMC_TO_DDR_CLOCK)
+#define EMC_BW_TO_FREQ(bw) (DDR_BW_TO_FREQ(bw) * CONFIG_TEGRA_EMC_TO_DDR_CLOCK)
+#else
+#define EMC_BW_TO_FREQ(bw) (DDR_BW_TO_FREQ(bw) * 2)
+#endif
+
+/*
+ * If using T30/DDR3, the 2nd 16 bytes part of DDR3 atom is 2nd line and is
+ * discarded in tiling mode.
+ */
+#if defined(CONFIG_ARCH_TEGRA_2x_SOC)
+#define TILED_WINDOWS_BW_MULTIPLIER 1
+#elif defined(CONFIG_ARCH_TEGRA_3x_SOC)
+#define TILED_WINDOWS_BW_MULTIPLIER 2
+#else
+#warning "need to revisit memory tiling effects on DC"
+#endif
+
 struct tegra_dc;
 
 struct tegra_dc_blend {
@@ -53,8 +81,6 @@ struct tegra_dc_out_ops {
 };
 
 struct tegra_dc {
-	struct list_head		list;
-
 	struct nvhost_device		*ndev;
 	struct tegra_dc_platform_data	*pdata;
 
@@ -64,6 +90,8 @@ struct tegra_dc {
 
 	struct clk			*clk;
 	struct clk			*emc_clk;
+	int				emc_clk_rate;
+	int				new_emc_clk_rate;
 
 	bool				enabled;
 	bool				suspended;
@@ -93,6 +121,7 @@ struct tegra_dc {
 
 	unsigned long			underflow_mask;
 	struct work_struct		reset_work;
+	struct delayed_work		reduce_emc_clk_work;
 
 	struct switch_dev		modeset_switch;
 
@@ -158,6 +187,12 @@ static inline void *tegra_dc_get_outdata(struct tegra_dc *dc)
 	return dc->out_data;
 }
 
+static inline unsigned long tegra_dc_get_default_emc_clk_rate(
+							struct tegra_dc *dc)
+{
+	return dc->pdata->emc_clk_rate ? dc->pdata->emc_clk_rate : ULONG_MAX;
+}
+
 void tegra_dc_setup_clk(struct tegra_dc *dc, struct clk *clk);
 
 extern struct tegra_dc_out_ops tegra_dc_rgb_ops;
diff --git a/drivers/video/tegra/dc/overlay.c b/drivers/video/tegra/dc/overlay.c
index 530944013f58..0272bd8e4107 100644
--- a/drivers/video/tegra/dc/overlay.c
+++ b/drivers/video/tegra/dc/overlay.c
@@ -310,9 +310,11 @@ static void tegra_overlay_flip_worker(struct work_struct *work)
 			dcwins[i] = tegra_dc_get_window(overlay->dc, i);
 
 		tegra_overlay_blend_reorder(&overlay->blend, dcwins);
+		tegra_dc_set_dynamic_emc(dcwins, DC_N_WINDOWS);
 		tegra_dc_update_windows(dcwins, DC_N_WINDOWS);
 		tegra_dc_sync_windows(dcwins, DC_N_WINDOWS);
 	} else {
+		tegra_dc_set_dynamic_emc(wins, nr_win);
 		tegra_dc_update_windows(wins, nr_win);
 		/* TODO: implement swapinterval here */
 		tegra_dc_sync_windows(wins, nr_win);
@@ -380,6 +382,12 @@ static int tegra_overlay_flip(struct tegra_overlay_info *overlay,
 
 	queue_work(overlay->flip_wq, &data->work);
 
+	/*
+	 * Before the queued flip_wq get scheduled, we set the EMC clock to the
+	 * default value in order to do FLIP without glitch.
+	 */
+	tegra_dc_set_default_emc(overlay->dc);
+
 	args->post_syncpt_val = syncpt_max;
 	args->post_syncpt_id = tegra_dc_get_syncpt_id(overlay->dc);
 	mutex_unlock(&tegra_flip_lock);
diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c
index 913feeb49796..2954661a4064 100644
--- a/drivers/video/tegra/fb.c
+++ b/drivers/video/tegra/fb.c
@@ -279,6 +279,8 @@ static int tegra_fb_pan_display(struct fb_var_screeninfo *var,
 		tegra_fb->win->phys_addr = addr;
 		/* TODO: update virt_addr */
 
+		tegra_dc_set_default_emc(tegra_fb->win->dc);
+		tegra_dc_set_dynamic_emc(&tegra_fb->win, 1);
 		tegra_dc_update_windows(&tegra_fb->win, 1);
 		tegra_dc_sync_windows(&tegra_fb->win, 1);
 	}
@@ -489,6 +491,7 @@ static void tegra_fb_flip_worker(struct work_struct *work)
 #endif
 	}
 
+	tegra_dc_set_dynamic_emc(wins, nr_win);
 	tegra_dc_update_windows(wins, nr_win);
 	/* TODO: implement swapinterval here */
 	tegra_dc_sync_windows(wins, nr_win);
@@ -546,6 +549,12 @@ static int tegra_fb_flip(struct tegra_fb_info *tegra_fb,
 
 	queue_work(tegra_fb->flip_wq, &data->work);
 
+	/*
+	 * Before the queued flip_wq get scheduled, we set the EMC clock to the
+	 * default value in order to do FLIP without glitch.
+	 */
+	tegra_dc_set_default_emc(tegra_fb->win->dc);
+
 	args->post_syncpt_val = syncpt_max;
 	args->post_syncpt_id = tegra_dc_get_syncpt_id(tegra_fb->win->dc);
 
@@ -840,6 +849,8 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 	dev_info(&ndev->dev, "probed\n");
 
 	if (fb_data->flags & TEGRA_FB_FLIP_ON_PROBE) {
+		tegra_dc_set_default_emc(tegra_fb->win->dc);
+		tegra_dc_set_dynamic_emc(&tegra_fb->win, 1);
 		tegra_dc_update_windows(&tegra_fb->win, 1);
 		tegra_dc_sync_windows(&tegra_fb->win, 1);
 	}
author	Xin Xie <xxie@nvidia.com>	2011-06-03 20:47:14 -0700
committer	Niket Sirsi <nsirsi@nvidia.com>	2011-06-24 21:30:03 -0700
commit	08bb9eed94479a37999be169c434d561d0074c49 (patch)
tree	a0b678a3146c1084049134fe0490cff1273998c0
parent	683092ca422d64cb7d72d70763d1100812abfd58 (diff)