diff options
author | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2012-11-12 15:28:39 +0100 |
---|---|---|
committer | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2012-11-12 15:28:39 +0100 |
commit | f987e832a9e79d2ce8009a5ea9c7b677624b3b30 (patch) | |
tree | 0dd09a5e6b4c60ee0a9916907dfc2cda83f3e496 /drivers/video | |
parent | f737b7f46a72c099cf8ac88baff02fbf61b1a47c (diff) | |
parent | fc993d9bc48f772133d8cd156c67c296477db070 (diff) |
Merge branch 'l4t/l4t-r16-r2' into colibri
Conflicts:
arch/arm/mach-tegra/tegra3_usb_phy.c
arch/arm/mach-tegra/usb_phy.c
drivers/usb/gadget/tegra_udc.c
drivers/usb/otg/Makefile
drivers/video/tegra/fb.c
sound/soc/tegra/tegra_pcm.c
Diffstat (limited to 'drivers/video')
39 files changed, 1540 insertions, 555 deletions
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index debd41c9313e..400cf43c5972 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_PROGEAR) += progear_bl.o obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o obj-$(CONFIG_BACKLIGHT_PWM) += pwm_bl.o +CFLAGS_tegra_pwm_bl.o = -Werror obj-$(CONFIG_BACKLIGHT_TEGRA_PWM) += tegra_pwm_bl.o obj-$(CONFIG_BACKLIGHT_DA903X) += da903x_bl.o obj-$(CONFIG_BACKLIGHT_MAX8925) += max8925_bl.o diff --git a/drivers/video/tegra/Kconfig b/drivers/video/tegra/Kconfig index b5540a5793b2..68cf54264403 100644 --- a/drivers/video/tegra/Kconfig +++ b/drivers/video/tegra/Kconfig @@ -119,7 +119,7 @@ config NVMAP_PAGE_POOL_SIZE default 0x0 config NVMAP_CACHE_MAINT_BY_SET_WAYS - bool "Enalbe cache maintenance by set/ways" + bool "Enable cache maintenance by set/ways" depends on TEGRA_NVMAP help Say Y here to reduce cache maintenance overhead by MVA. @@ -127,6 +127,14 @@ config NVMAP_CACHE_MAINT_BY_SET_WAYS where inner cache includes only L1. For the systems, where inner cache includes L1 and L2, keep this option disabled. +config NVMAP_OUTER_CACHE_MAINT_BY_SET_WAYS + bool "Enable outer cache maintenance by set/ways" + depends on TEGRA_NVMAP + help + Say Y here if you want to optimize cache maintenance for ranges + bigger than size of outer cache. This option has no effect on + system without outer cache. + config NVMAP_VPR bool "Enable VPR Heap." depends on TEGRA_NVMAP @@ -135,6 +143,15 @@ config NVMAP_VPR Say Y here to enable Video Protection Region(VPR) heap. if unsure, say N. +config NVMAP_FORCE_ZEROED_USER_PAGES + bool "Only alloc zeroed pages for user space" + depends on TEGRA_NVMAP + help + Say Y here to force zeroing of pages allocated for user space. This + avoids leaking kernel secure data to user space. This can add + significant overhead to allocation operations depending on the + allocation size requested. + config TEGRA_DSI bool "Enable DSI panel." default n diff --git a/drivers/video/tegra/dc/csc.c b/drivers/video/tegra/dc/csc.c index 74fa900352a1..09db5fee4c6f 100644 --- a/drivers/video/tegra/dc/csc.c +++ b/drivers/video/tegra/dc/csc.c @@ -54,10 +54,12 @@ int tegra_dc_update_csc(struct tegra_dc *dc, int win_idx) return -EFAULT; } + tegra_dc_hold_dc_out(dc); tegra_dc_writel(dc, WINDOW_A_SELECT << win_idx, DC_CMD_DISPLAY_WINDOW_HEADER); tegra_dc_set_csc(dc, &dc->windows[win_idx].csc); + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c index 1f7e2ce67682..d01df2f520b8 100644 --- a/drivers/video/tegra/dc/dc.c +++ b/drivers/video/tegra/dc/dc.c @@ -82,7 +82,7 @@ struct tegra_dc *tegra_dcs[TEGRA_MAX_DC]; DEFINE_MUTEX(tegra_dc_lock); DEFINE_MUTEX(shared_lock); -static inline void tegra_dc_clk_enable(struct tegra_dc *dc) +void tegra_dc_clk_enable(struct tegra_dc *dc) { if (!tegra_is_clk_enabled(dc->clk)) { clk_enable(dc->clk); @@ -90,7 +90,7 @@ static inline void tegra_dc_clk_enable(struct tegra_dc *dc) } } -static inline void tegra_dc_clk_disable(struct tegra_dc *dc) +void tegra_dc_clk_disable(struct tegra_dc *dc) { if (tegra_is_clk_enabled(dc->clk)) { clk_disable(dc->clk); @@ -98,6 +98,18 @@ static inline void tegra_dc_clk_disable(struct tegra_dc *dc) } } +void tegra_dc_hold_dc_out(struct tegra_dc *dc) +{ + if (dc->out_ops->hold) + dc->out_ops->hold(dc); +} + +void tegra_dc_release_dc_out(struct tegra_dc *dc) +{ + if (dc->out_ops->release) + dc->out_ops->release(dc); +} + #define DUMP_REG(a) do { \ snprintf(buff, sizeof(buff), "%-32s\t%03x\t%08lx\n", \ #a, a, tegra_dc_readl(dc, a)); \ @@ -121,8 +133,9 @@ static void _dump_regs(struct tegra_dc *dc, void *data, int i; char buff[256]; + mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); tegra_dc_io_start(dc); - tegra_dc_clk_enable(dc); DUMP_REG(DC_CMD_DISPLAY_COMMAND_OPTION0); DUMP_REG(DC_CMD_DISPLAY_COMMAND); @@ -272,8 +285,9 @@ static void _dump_regs(struct tegra_dc *dc, void *data, DUMP_REG(DC_COM_PM1_DUTY_CYCLE); DUMP_REG(DC_DISP_SD_CONTROL); - tegra_dc_clk_disable(dc); tegra_dc_io_end(dc); + tegra_dc_release_dc_out(dc); + mutex_unlock(&dc->lock); } #undef DUMP_REG @@ -488,9 +502,13 @@ int tegra_dc_get_stride(struct tegra_dc *dc, unsigned win) if (!dc->enabled) return 0; BUG_ON(win > DC_N_WINDOWS); + mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); tegra_dc_writel(dc, WINDOW_A_SELECT << win, DC_CMD_DISPLAY_WINDOW_HEADER); stride = tegra_dc_readl(dc, DC_WIN_LINE_STRIDE); + tegra_dc_release_dc_out(dc); + mutex_unlock(&dc->lock); return GET_LINE_STRIDE(stride); } EXPORT_SYMBOL(tegra_dc_get_stride); @@ -550,17 +568,6 @@ static void tegra_dc_set_scaling_filter(struct tegra_dc *dc) } } -void tegra_dc_host_suspend(struct tegra_dc *dc) -{ - tegra_dsi_host_suspend(dc); - tegra_dc_clk_disable(dc); -} - -void tegra_dc_host_resume(struct tegra_dc *dc) { - tegra_dc_clk_enable(dc); - tegra_dsi_host_resume(dc); -} - static inline void disable_dc_irq(unsigned int irq) { disable_irq(irq); @@ -577,9 +584,11 @@ u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc, int i) u32 max; mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); max = nvhost_syncpt_incr_max_ext(dc->ndev, dc->syncpt[i].id, ((dc->enabled) ? 1 : 0)); dc->syncpt[i].max = max; + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); return max; @@ -588,11 +597,14 @@ u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc, int i) void tegra_dc_incr_syncpt_min(struct tegra_dc *dc, int i, u32 val) { mutex_lock(&dc->lock); - if ( dc->enabled ) + if (dc->enabled) { + tegra_dc_hold_dc_out(dc); while (dc->syncpt[i].min < val) { dc->syncpt[i].min++; nvhost_syncpt_cpu_incr_ext(dc->ndev, dc->syncpt[i].id); } + tegra_dc_release_dc_out(dc); + } mutex_unlock(&dc->lock); } @@ -609,8 +621,7 @@ tegra_dc_config_pwm(struct tegra_dc *dc, struct tegra_dc_pwm_params *cfg) return; } - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); + tegra_dc_hold_dc_out(dc); ctrl = ((cfg->period << PM_PERIOD_SHIFT) | (cfg->clk_div << PM_CLK_DIVIDER_SHIFT) | @@ -644,6 +655,7 @@ tegra_dc_config_pwm(struct tegra_dc *dc, struct tegra_dc_pwm_params *cfg) break; } tegra_dc_writel(dc, cmd_state, DC_CMD_STATE_ACCESS); + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); } EXPORT_SYMBOL(tegra_dc_config_pwm); @@ -790,6 +802,9 @@ EXPORT_SYMBOL(tegra_dc_get_out_max_pixclock); void tegra_dc_enable_crc(struct tegra_dc *dc) { u32 val; + + mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); tegra_dc_io_start(dc); val = CRC_ALWAYS_ENABLE | CRC_INPUT_DATA_ACTIVE_DATA | @@ -797,15 +812,21 @@ void tegra_dc_enable_crc(struct tegra_dc *dc) tegra_dc_writel(dc, val, DC_COM_CRC_CONTROL); tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL); tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL); + tegra_dc_release_dc_out(dc); + mutex_unlock(&dc->lock); } void tegra_dc_disable_crc(struct tegra_dc *dc) { + mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); tegra_dc_writel(dc, 0x0, DC_COM_CRC_CONTROL); tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL); tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL); tegra_dc_io_end(dc); + tegra_dc_release_dc_out(dc); + mutex_unlock(&dc->lock); } u32 tegra_dc_read_checksum_latched(struct tegra_dc *dc) @@ -821,7 +842,11 @@ u32 tegra_dc_read_checksum_latched(struct tegra_dc *dc) * DC_COM_CRC_CHECKSUM_LATCHED is available after VBLANK */ mdelay(TEGRA_CRC_LATCHED_DELAY); + mutex_lock(&dc->lock); + tegra_dc_hold_dc_out(dc); crc = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM_LATCHED); + tegra_dc_release_dc_out(dc); + mutex_unlock(&dc->lock); crc_error: return crc; } @@ -848,6 +873,36 @@ static inline void enable_dc_irq(unsigned int irq) #endif } +void tegra_dc_get_fbvblank(struct tegra_dc *dc, struct fb_vblank *vblank) +{ + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + vblank->flags = FB_VBLANK_HAVE_VSYNC; +} + +int tegra_dc_wait_for_vsync(struct tegra_dc *dc) +{ + int ret = -ENOTTY; + + if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) || !dc->enabled) + return ret; + + /* + * Logic is as follows + * a) Indicate we need a vblank. + * b) Wait for completion to be signalled from isr. + * c) Initialize completion for next iteration. + */ + + tegra_dc_hold_dc_out(dc); + dc->out->user_needs_vblank = true; + + ret = wait_for_completion_interruptible(&dc->out->user_vblank_comp); + init_completion(&dc->out->user_vblank_comp); + tegra_dc_release_dc_out(dc); + + return ret; +} + static void tegra_dc_vblank(struct work_struct *work) { struct tegra_dc *dc = container_of(work, struct tegra_dc, vblank_work); @@ -860,6 +915,7 @@ static void tegra_dc_vblank(struct work_struct *work) return; } + tegra_dc_hold_dc_out(dc); /* use the new frame's bandwidth setting instead of max(current, new), * skip this if we're using tegra_dc_one_shot_worker() */ if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)) @@ -886,6 +942,7 @@ static void tegra_dc_vblank(struct work_struct *work) if (!dc->vblank_ref_count) tegra_dc_mask_interrupt(dc, V_BLANK_INT); + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); /* Do the actual brightness update outside of the mutex */ @@ -908,8 +965,8 @@ static void tegra_dc_one_shot_worker(struct work_struct *work) /* memory client has gone idle */ tegra_dc_clear_bandwidth(dc); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_suspend(dc); + if (dc->out_ops->idle) + dc->out_ops->idle(dc); mutex_unlock(&dc->lock); } @@ -962,9 +1019,9 @@ static void tegra_dc_underflow_handler(struct tegra_dc *dc) #endif #ifdef CONFIG_ARCH_TEGRA_3x_SOC if (dc->windows[i].underflows > 4) { - printk("%s:dc in underflow state." + trace_printk("%s:window %c in underflow state." " enable UF_LINE_FLUSH to clear up\n", - __func__); + dc->ndev->name, (65 + i)); tegra_dc_writel(dc, UF_LINE_FLUSH, DC_DISP_DISP_MISC_CONTROL); tegra_dc_writel(dc, GENERAL_UPDATE, @@ -996,6 +1053,13 @@ static void tegra_dc_underflow_handler(struct tegra_dc *dc) #ifndef CONFIG_TEGRA_FPGA_PLATFORM static void tegra_dc_one_shot_irq(struct tegra_dc *dc, unsigned long status) { + /* pending user vblank, so wakeup */ + if ((status & (V_BLANK_INT | MSF_INT)) && + (dc->out->user_needs_vblank)) { + dc->out->user_needs_vblank = false; + complete(&dc->out->user_vblank_comp); + } + if (status & V_BLANK_INT) { /* Sync up windows. */ tegra_dc_trigger_windows(dc); @@ -1018,6 +1082,10 @@ static void tegra_dc_continuous_irq(struct tegra_dc *dc, unsigned long status) queue_work(system_freezable_wq, &dc->vblank_work); if (status & FRAME_END_INT) { + struct timespec tm = CURRENT_TIME; + dc->frame_end_timestamp = timespec_to_ns(&tm); + wake_up(&dc->timestamp_wq); + /* Mark the frame_end as complete. */ if (!completion_done(&dc->frame_end_complete)) complete(&dc->frame_end_complete); @@ -1025,6 +1093,22 @@ static void tegra_dc_continuous_irq(struct tegra_dc *dc, unsigned long status) tegra_dc_trigger_windows(dc); } } + +/* XXX: Not sure if we limit look ahead to 1 frame */ +bool tegra_dc_is_within_n_vsync(struct tegra_dc *dc, s64 ts) +{ + BUG_ON(!dc->frametime_ns); + return ((ts - dc->frame_end_timestamp) < dc->frametime_ns); +} + +bool tegra_dc_does_vsync_separate(struct tegra_dc *dc, s64 new_ts, s64 old_ts) +{ + BUG_ON(!dc->frametime_ns); + return (((new_ts - old_ts) > dc->frametime_ns) + || (div_s64((new_ts - dc->frame_end_timestamp), dc->frametime_ns) + != div_s64((old_ts - dc->frame_end_timestamp), + dc->frametime_ns))); +} #endif static irqreturn_t tegra_dc_irq(int irq, void *ptr) @@ -1187,6 +1271,7 @@ static u32 get_syncpt(struct tegra_dc *dc, int idx) static int tegra_dc_init(struct tegra_dc *dc) { int i; + int int_enable; tegra_dc_writel(dc, 0x00000100, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL); if (dc->ndev->id == 0) { @@ -1222,8 +1307,12 @@ static int tegra_dc_init(struct tegra_dc *dc) tegra_dc_writel(dc, 0x00000000, DC_DISP_DISP_MISC_CONTROL); #endif /* enable interrupts for vblank, frame_end and underflows */ - tegra_dc_writel(dc, (FRAME_END_INT | V_BLANK_INT | ALL_UF_INT), - DC_CMD_INT_ENABLE); + int_enable = (FRAME_END_INT | V_BLANK_INT | ALL_UF_INT); + /* for panels with one-shot mode enable tearing effect interrupt */ + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + int_enable |= MSF_INT; + + tegra_dc_writel(dc, int_enable, DC_CMD_INT_ENABLE); tegra_dc_writel(dc, ALL_UF_INT, DC_CMD_INT_MASK); tegra_dc_writel(dc, 0x00000000, DC_DISP_BORDER_COLOR); @@ -1272,14 +1361,18 @@ static bool _tegra_dc_controller_enable(struct tegra_dc *dc) tegra_dc_clk_enable(dc); /* do not accept interrupts during initialization */ - tegra_dc_writel(dc, 0, DC_CMD_INT_ENABLE); tegra_dc_writel(dc, 0, DC_CMD_INT_MASK); enable_dc_irq(dc->irq); failed_init = tegra_dc_init(dc); if (failed_init) { - _tegra_dc_controller_disable(dc); + tegra_dc_writel(dc, 0, DC_CMD_INT_MASK); + disable_irq(dc->irq); + tegra_dc_clear_bandwidth(dc); + tegra_dc_clk_disable(dc); + if (dc->out && dc->out->disable) + dc->out->disable(); return false; } @@ -1367,19 +1460,14 @@ static bool _tegra_dc_controller_reset_enable(struct tegra_dc *dc) static int _tegra_dc_set_default_videomode(struct tegra_dc *dc) { - return tegra_dc_set_fb_mode(dc, &tegra_dc_hdmi_fallback_mode, 0); -} - -static bool _tegra_dc_enable(struct tegra_dc *dc) -{ if (dc->mode.pclk == 0) { switch (dc->out->type) { case TEGRA_DC_OUT_HDMI: /* DC enable called but no videomode is loaded. Check if HDMI is connected, then set fallback mdoe */ if (tegra_dc_hpd(dc)) { - if (_tegra_dc_set_default_videomode(dc)) - return false; + return tegra_dc_set_fb_mode(dc, + &tegra_dc_hdmi_fallback_mode, 0); } else return false; @@ -1395,12 +1483,24 @@ static bool _tegra_dc_enable(struct tegra_dc *dc) } } + return false; +} + +static bool _tegra_dc_enable(struct tegra_dc *dc) +{ + if (dc->mode.pclk == 0) + return false; + if (!dc->out) return false; tegra_dc_io_start(dc); - return _tegra_dc_controller_enable(dc); + if (!_tegra_dc_controller_enable(dc)) { + tegra_dc_io_end(dc); + return false; + } + return true; } void tegra_dc_enable(struct tegra_dc *dc) @@ -1505,8 +1605,20 @@ void tegra_dc_blank(struct tegra_dc *dc) static void _tegra_dc_disable(struct tegra_dc *dc) { + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) { + mutex_lock(&dc->one_shot_lock); + cancel_delayed_work_sync(&dc->one_shot_work); + } + + tegra_dc_hold_dc_out(dc); + _tegra_dc_controller_disable(dc); tegra_dc_io_end(dc); + + tegra_dc_release_dc_out(dc); + + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + mutex_unlock(&dc->one_shot_lock); } void tegra_dc_disable(struct tegra_dc *dc) @@ -1516,16 +1628,9 @@ void tegra_dc_disable(struct tegra_dc *dc) /* it's important that new underflow work isn't scheduled before the * lock is acquired. */ cancel_delayed_work_sync(&dc->underflow_work); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) { - mutex_lock(&dc->one_shot_lock); - cancel_delayed_work_sync(&dc->one_shot_work); - } mutex_lock(&dc->lock); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); - if (dc->enabled) { dc->enabled = false; @@ -1538,8 +1643,6 @@ void tegra_dc_disable(struct tegra_dc *dc) #endif mutex_unlock(&dc->lock); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) - mutex_unlock(&dc->one_shot_lock); print_mode_info(dc, dc->mode); } @@ -1605,12 +1708,12 @@ static void tegra_dc_underflow_worker(struct work_struct *work) to_delayed_work(work), struct tegra_dc, underflow_work); mutex_lock(&dc->lock); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); + tegra_dc_hold_dc_out(dc); if (dc->enabled) { tegra_dc_underflow_handler(dc); } + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); } @@ -1721,6 +1824,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev, mutex_init(&dc->one_shot_lock); init_completion(&dc->frame_end_complete); init_waitqueue_head(&dc->wq); + init_waitqueue_head(&dc->timestamp_wq); #ifdef CONFIG_ARCH_TEGRA_2x_SOC INIT_WORK(&dc->reset_work, tegra_dc_reset_worker); #endif @@ -1772,8 +1876,10 @@ static int tegra_dc_probe(struct nvhost_device *ndev, } mutex_lock(&dc->lock); - if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED) + if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED) { + _tegra_dc_set_default_videomode(dc); dc->enabled = _tegra_dc_enable(dc); + } mutex_unlock(&dc->lock); /* interrupt handler must be registered before tegra_fb_register() */ @@ -1789,7 +1895,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev, dev_info(&ndev->dev, "probed\n"); if (dc->pdata->fb) { - if (dc->pdata->fb->bits_per_pixel == -1) { + if (dc->enabled && dc->pdata->fb->bits_per_pixel == -1) { unsigned long fmt; tegra_dc_writel(dc, WINDOW_A_SELECT << dc->pdata->fb->win, @@ -1921,8 +2027,10 @@ static int tegra_dc_resume(struct nvhost_device *ndev) mutex_lock(&dc->lock); dc->suspended = false; - if (dc->enabled) + if (dc->enabled) { + _tegra_dc_set_default_videomode(dc); _tegra_dc_enable(dc); + } if (dc->out && dc->out->hotplug_init) dc->out->hotplug_init(); diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h index fb1243593587..75c3a2a29658 100644 --- a/drivers/video/tegra/dc/dc_priv.h +++ b/drivers/video/tegra/dc/dc_priv.h @@ -75,6 +75,12 @@ struct tegra_dc_out_ops { void (*enable)(struct tegra_dc *dc); /* disable output. dc clocks are on at this point */ void (*disable)(struct tegra_dc *dc); + /* hold output. keeps dc clocks on. */ + void (*hold)(struct tegra_dc *dc); + /* release output. dc clocks may turn off after this. */ + void (*release)(struct tegra_dc *dc); + /* idle routine of output. dc clocks may turn off after this. */ + void (*idle)(struct tegra_dc *dc); /* suspend output. dc clocks are on at this point */ void (*suspend)(struct tegra_dc *dc); /* resume output. dc clocks are on at this point */ @@ -107,12 +113,14 @@ struct tegra_dc { void *out_data; struct tegra_dc_mode mode; + s64 frametime_ns; struct tegra_dc_win windows[DC_N_WINDOWS]; struct tegra_dc_blend blend; int n_windows; wait_queue_head_t wq; + wait_queue_head_t timestamp_wq; struct mutex lock; struct mutex one_shot_lock; @@ -157,6 +165,7 @@ struct tegra_dc { struct delayed_work underflow_work; u32 one_shot_delay_ms; struct delayed_work one_shot_work; + s64 frame_end_timestamp; }; #define print_mode_info(dc, mode) do { \ @@ -366,9 +375,17 @@ void tegra_dc_disable_crc(struct tegra_dc *dc); void tegra_dc_set_out_pin_polars(struct tegra_dc *dc, const struct tegra_dc_out_pin *pins, const unsigned int n_pins); -/* defined in dc.c, used in bandwidth.c */ +/* defined in dc.c, used in bandwidth.c and ext/dev.c */ unsigned int tegra_dc_has_multiple_dc(void); +/* defined in dc.c, used in dsi.c */ +void tegra_dc_clk_enable(struct tegra_dc *dc); +void tegra_dc_clk_disable(struct tegra_dc *dc); + +/* defined in dc.c, used in nvsd.c and dsi.c */ +void tegra_dc_hold_dc_out(struct tegra_dc *dc); +void tegra_dc_release_dc_out(struct tegra_dc *dc); + /* defined in bandwidth.c, used in dc.c */ void tegra_dc_clear_bandwidth(struct tegra_dc *dc); void tegra_dc_program_bandwidth(struct tegra_dc *dc, bool use_new); diff --git a/drivers/video/tegra/dc/dc_sysfs.c b/drivers/video/tegra/dc/dc_sysfs.c index bf27e963f233..09a8e2dbb5b1 100644 --- a/drivers/video/tegra/dc/dc_sysfs.c +++ b/drivers/video/tegra/dc/dc_sysfs.c @@ -313,6 +313,14 @@ static ssize_t nvdps_store(struct device *dev, static DEVICE_ATTR(nvdps, S_IRUGO|S_IWUSR, nvdps_show, nvdps_store); +static ssize_t smart_panel_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "1\n"); +} + +static DEVICE_ATTR(smart_panel, S_IRUGO, smart_panel_show, NULL); + void __devexit tegra_dc_remove_sysfs(struct device *dev) { struct nvhost_device *ndev = to_nvhost_device(dev); @@ -332,6 +340,9 @@ void __devexit tegra_dc_remove_sysfs(struct device *dev) if (sd_settings) nvsd_remove_sysfs(dev); + + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + device_remove_file(dev, &dev_attr_smart_panel); } void tegra_dc_create_sysfs(struct device *dev) @@ -355,6 +366,9 @@ void tegra_dc_create_sysfs(struct device *dev) if (sd_settings) error |= nvsd_create_sysfs(dev); + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + error |= device_create_file(dev, &dev_attr_smart_panel); + if (error) dev_err(&ndev->dev, "Failed to create sysfs attributes!\n"); } diff --git a/drivers/video/tegra/dc/dsi.c b/drivers/video/tegra/dc/dsi.c index 7ee9375f58f1..d3b1d40d535b 100644 --- a/drivers/video/tegra/dc/dsi.c +++ b/drivers/video/tegra/dc/dsi.c @@ -79,6 +79,8 @@ #define DSI_LP_OP_WRITE 0x1 #define DSI_LP_OP_READ 0x2 +#define DSI_HOST_IDLE_PERIOD 1000 + static bool enable_read_debug; module_param(enable_read_debug, bool, 0644); MODULE_PARM_DESC(enable_read_debug, @@ -129,6 +131,11 @@ struct tegra_dc_dsi_data { bool ulpm; bool enabled; bool host_suspended; + struct mutex host_resume_lock; + struct delayed_work idle_work; + unsigned long idle_delay; + spinlock_t host_ref_lock; + u8 host_ref; u8 driven_mode; u8 controller_index; @@ -294,6 +301,10 @@ const u32 init_reg[] = { DSI_PKT_LEN_6_7, }; +static int tegra_dsi_host_suspend(struct tegra_dc *dc); +static int tegra_dsi_host_resume(struct tegra_dc *dc); +static void tegra_dc_dsi_idle_work(struct work_struct *work); + inline unsigned long tegra_dsi_readl(struct tegra_dc_dsi_data *dsi, u32 reg) { unsigned long ret; @@ -331,6 +342,7 @@ static int dbg_dsi_show(struct seq_file *s, void *unused) DUMP_REG(DSI_CTXSW); DUMP_REG(DSI_POWER_CONTROL); DUMP_REG(DSI_INT_ENABLE); + DUMP_REG(DSI_HOST_DSI_CONTROL); DUMP_REG(DSI_CONTROL); DUMP_REG(DSI_SOL_DELAY); DUMP_REG(DSI_MAX_THRESHOLD); @@ -650,6 +662,13 @@ static void tegra_dsi_init_sw(struct tegra_dc *dc, dsi->info.video_clock_mode = TEGRA_DSI_VIDEO_CLOCK_CONTINUOUS; } + dsi->host_ref = 0; + dsi->host_suspended = false; + spin_lock_init(&dsi->host_ref_lock); + mutex_init(&dsi->host_resume_lock); + init_completion(&dc->out->user_vblank_comp); + INIT_DELAYED_WORK(&dsi->idle_work, tegra_dc_dsi_idle_work); + dsi->idle_delay = msecs_to_jiffies(DSI_HOST_IDLE_PERIOD); } #define SELECT_T_PHY(platform_t_phy_ns, default_phy, clk_ns, hw_inc) ( \ @@ -1341,9 +1360,38 @@ static void tegra_dsi_reset_underflow_overflow } } +static void tegra_dsi_soft_reset(struct tegra_dc_dsi_data *dsi) +{ + u32 trigger; + + tegra_dsi_writel(dsi, + DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_DISABLE), + DSI_POWER_CONTROL); + /* stabilization delay */ + udelay(300); + + tegra_dsi_writel(dsi, + DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_ENABLE), + DSI_POWER_CONTROL); + /* stabilization delay */ + udelay(300); + + /* dsi HW does not clear host trigger bit automatically + * on dsi interface disable if host fifo is empty or in mid + * of host transmission + */ + trigger = tegra_dsi_readl(dsi, DSI_TRIGGER); + if (trigger) + tegra_dsi_writel(dsi, 0x0, DSI_TRIGGER); +} + static void tegra_dsi_stop_dc_stream(struct tegra_dc *dc, struct tegra_dc_dsi_data *dsi) { + /* Mask the MSF interrupt. */ + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + tegra_dc_mask_interrupt(dc, MSF_INT); + tegra_dc_writel(dc, DISP_CTRL_MODE_STOP, DC_CMD_DISPLAY_COMMAND); tegra_dc_writel(dc, 0, DC_DISP_DISP_WIN_OPTIONS); tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL); @@ -1359,13 +1407,13 @@ static void tegra_dsi_stop_dc_stream_at_frame_end(struct tegra_dc *dc, long timeout; u32 frame_period = DIV_ROUND_UP(S_TO_MS(1), dsi->info.refresh_rate); - /* stop dc */ - tegra_dsi_stop_dc_stream(dc, dsi); + INIT_COMPLETION(dc->frame_end_complete); - /* enable frame end interrupt */ + /* unmask frame end interrupt */ val = tegra_dc_readl(dc, DC_CMD_INT_MASK); - val |= FRAME_END_INT; - tegra_dc_writel(dc, val, DC_CMD_INT_MASK); + tegra_dc_writel(dc, val | FRAME_END_INT, DC_CMD_INT_MASK); + + tegra_dsi_stop_dc_stream(dc, dsi); /* wait for frame_end completion. * timeout is 2 frame duration to accomodate for @@ -1375,9 +1423,14 @@ static void tegra_dsi_stop_dc_stream_at_frame_end(struct tegra_dc *dc, &dc->frame_end_complete, msecs_to_jiffies(2 * frame_period)); - /* disable frame end interrupt */ - val = tegra_dc_readl(dc, DC_CMD_INT_MASK); - val &= ~FRAME_END_INT; + /* give 2 line time to dsi HW to catch up + * with pixels sent by dc + */ + udelay(50); + + tegra_dsi_soft_reset(dsi); + + /* reinstate interrupt mask */ tegra_dc_writel(dc, val, DC_CMD_INT_MASK); if (timeout == 0) @@ -1419,6 +1472,9 @@ static void tegra_dsi_start_dc_stream(struct tegra_dc *dc, tegra_dc_writel(dc, GENERAL_UPDATE, DC_CMD_STATE_CONTROL); tegra_dc_writel(dc, GENERAL_ACT_REQ | NC_HOST_TRIG, DC_CMD_STATE_CONTROL); + + /* Unmask the MSF interrupt. */ + tegra_dc_unmask_interrupt(dc, MSF_INT); } else { /* set continuous mode */ tegra_dc_writel(dc, DISP_CTRL_MODE_C_DISPLAY, @@ -1559,7 +1615,8 @@ static void tegra_dsi_set_control_reg_lp(struct tegra_dc_dsi_data *dsi) dsi->status.vtype = DSI_VIDEO_TYPE_NOT_INIT; } -static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi) +static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi, + u8 driven_mode) { u32 dsi_control; u32 host_dsi_control; @@ -1571,7 +1628,7 @@ static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi) max_threshold = 0; dcs_cmd = 0; - if (dsi->driven_mode == TEGRA_DSI_DRIVEN_BY_HOST) { + if (driven_mode == TEGRA_DSI_DRIVEN_BY_HOST) { dsi_control |= DSI_CTRL_HOST_DRIVEN; host_dsi_control |= HOST_DSI_CTRL_HOST_DRIVEN; max_threshold = @@ -1583,17 +1640,19 @@ static void tegra_dsi_set_control_reg_hs(struct tegra_dc_dsi_data *dsi) max_threshold = DSI_MAX_THRESHOLD_MAX_THRESHOLD(DSI_VIDEO_FIFO_DEPTH); dsi->status.driven = DSI_DRIVEN_MODE_DC; - } - if (dsi->info.video_data_type == TEGRA_DSI_VIDEO_TYPE_COMMAND_MODE) { - dsi_control |= DSI_CTRL_CMD_MODE; - dcs_cmd = DSI_DCS_CMDS_LT5_DCS_CMD(DSI_WRITE_MEMORY_START)| - DSI_DCS_CMDS_LT3_DCS_CMD(DSI_WRITE_MEMORY_CONTINUE); - dsi->status.vtype = DSI_VIDEO_TYPE_CMD_MODE; - - } else { - dsi_control |= DSI_CTRL_VIDEO_MODE; - dsi->status.vtype = DSI_VIDEO_TYPE_VIDEO_MODE; + if (dsi->info.video_data_type == + TEGRA_DSI_VIDEO_TYPE_COMMAND_MODE) { + dsi_control |= DSI_CTRL_CMD_MODE; + dcs_cmd = DSI_DCS_CMDS_LT5_DCS_CMD( + DSI_WRITE_MEMORY_START)| + DSI_DCS_CMDS_LT3_DCS_CMD( + DSI_WRITE_MEMORY_CONTINUE); + dsi->status.vtype = DSI_VIDEO_TYPE_CMD_MODE; + } else { + dsi_control |= DSI_CTRL_VIDEO_MODE; + dsi->status.vtype = DSI_VIDEO_TYPE_VIDEO_MODE; + } } tegra_dsi_writel(dsi, max_threshold, DSI_MAX_THRESHOLD); @@ -1734,6 +1793,7 @@ static int tegra_dsi_set_to_lp_mode(struct tegra_dc *dc, dsi->status.lphs = DSI_LPHS_IN_LP_MODE; dsi->status.lp_op = lp_op; + dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_HOST; success: err = 0; fail: @@ -1741,7 +1801,8 @@ fail: } static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc, - struct tegra_dc_dsi_data *dsi) + struct tegra_dc_dsi_data *dsi, + u8 driven_mode) { int err; @@ -1750,9 +1811,12 @@ static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc, goto fail; } - if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE) + if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE && + dsi->driven_mode == driven_mode) goto success; + dsi->driven_mode = driven_mode; + if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE) tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi); @@ -1767,14 +1831,14 @@ static int tegra_dsi_set_to_hs_mode(struct tegra_dc *dc, tegra_dsi_set_phy_timing(dsi, DSI_LPHS_IN_HS_MODE); - if (dsi->driven_mode == TEGRA_DSI_DRIVEN_BY_DC) { + if (driven_mode == TEGRA_DSI_DRIVEN_BY_DC) { tegra_dsi_set_pkt_seq(dc, dsi); tegra_dsi_set_pkt_length(dc, dsi); tegra_dsi_set_sol_delay(dc, dsi); tegra_dsi_set_dc_clk(dc, dsi); } - tegra_dsi_set_control_reg_hs(dsi); + tegra_dsi_set_control_reg_hs(dsi, driven_mode); if (dsi->status.clk_out == DSI_PHYCLK_OUT_DIS || dsi->info.enable_hs_clock_on_lp_cmd_mode) @@ -1845,35 +1909,6 @@ fail: return (err < 0 ? true : false); } -static void tegra_dsi_soft_reset(struct tegra_dc_dsi_data *dsi) -{ - u32 trigger; - u32 status; - - tegra_dsi_writel(dsi, - DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_DISABLE), - DSI_POWER_CONTROL); - /* stabilization delay */ - udelay(300); - - tegra_dsi_writel(dsi, - DSI_POWER_CONTROL_LEG_DSI_ENABLE(TEGRA_DSI_ENABLE), - DSI_POWER_CONTROL); - /* stabilization delay */ - udelay(300); - - /* dsi HW does not clear host trigger bit automatically - * on dsi interface disable if host fifo is empty - */ - trigger = tegra_dsi_readl(dsi, DSI_TRIGGER); - status = tegra_dsi_readl(dsi, DSI_STATUS); - if (trigger & DSI_TRIGGER_HOST_TRIGGER(0x1) && - status & DSI_STATUS_IDLE(0x1)) { - trigger &= ~(DSI_TRIGGER_HOST_TRIGGER(0x1)); - tegra_dsi_writel(dsi, trigger, DSI_TRIGGER); - } -} - static void tegra_dsi_reset_read_count(struct tegra_dc_dsi_data *dsi) { u32 val; @@ -1892,49 +1927,42 @@ static struct dsi_status *tegra_dsi_save_state_switch_to_host_cmd_mode( struct tegra_dc *dc, u8 lp_op) { - struct dsi_status *init_status; + struct dsi_status *init_status = NULL; int err; + if (dsi->status.init != DSI_MODULE_INIT || + dsi->status.lphs == DSI_LPHS_NOT_INIT) { + err = -EPERM; + goto fail; + } + init_status = kzalloc(sizeof(*init_status), GFP_KERNEL); if (!init_status) return ERR_PTR(-ENOMEM); *init_status = dsi->status; - if (dsi->status.lphs == DSI_LPHS_IN_HS_MODE) { - if (dsi->status.driven == DSI_DRIVEN_MODE_DC) { - if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE) - tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi); - dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_HOST; - if (dsi->info.hs_cmd_mode_supported) { - err = tegra_dsi_set_to_hs_mode(dc, dsi); - if (err < 0) { - dev_err(&dc->ndev->dev, - "Switch to HS host mode failed\n"); - goto fail; - } - } - } - if (!dsi->info.hs_cmd_mode_supported) { - err = - tegra_dsi_set_to_lp_mode(dc, dsi, lp_op); - if (err < 0) { - dev_err(&dc->ndev->dev, - "DSI failed to go to LP mode\n"); - goto fail; - } - } - } else if (dsi->status.lphs == DSI_LPHS_IN_LP_MODE) { - if (dsi->status.lp_op != lp_op) { - err = tegra_dsi_set_to_lp_mode(dc, dsi, lp_op); - if (err < 0) { - dev_err(&dc->ndev->dev, - "DSI failed to go to LP mode\n"); - goto fail; - } + if (dsi->info.hs_cmd_mode_supported) { + err = tegra_dsi_set_to_hs_mode(dc, dsi, + TEGRA_DSI_DRIVEN_BY_HOST); + if (err < 0) { + dev_err(&dc->ndev->dev, + "Switch to HS host mode failed\n"); + goto fail; } + + goto success; } + if (dsi->status.lp_op != lp_op) { + err = tegra_dsi_set_to_lp_mode(dc, dsi, lp_op); + if (err < 0) { + dev_err(&dc->ndev->dev, + "DSI failed to go to LP mode\n"); + goto fail; + } + } +success: return init_status; fail: kfree(init_status); @@ -1948,6 +1976,7 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission( { int err = 0; struct dsi_status *init_status; + bool restart_dc_stream = false; if (dsi->status.init != DSI_MODULE_INIT || dsi->ulpm) { @@ -1955,12 +1984,13 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission( goto fail; } + if (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE) { + restart_dc_stream = true; + tegra_dsi_stop_dc_stream_at_frame_end(dc, dsi); + } + if (tegra_dsi_host_busy(dsi)) { tegra_dsi_soft_reset(dsi); - - /* WAR to stop host write in middle */ - tegra_dsi_writel(dsi, TEGRA_DSI_DISABLE, DSI_TRIGGER); - if (tegra_dsi_host_busy(dsi)) { err = -EBUSY; dev_err(&dc->ndev->dev, "DSI host busy\n"); @@ -1987,6 +2017,9 @@ static struct dsi_status *tegra_dsi_prepare_host_transmission( goto fail; } + if (restart_dc_stream) + init_status->dc_stream = DSI_DC_STREAM_ENABLE; + return init_status; fail: return ERR_PTR(err); @@ -1996,50 +2029,30 @@ static int tegra_dsi_restore_state(struct tegra_dc *dc, struct tegra_dc_dsi_data *dsi, struct dsi_status *init_status) { - bool switch_back_to_dc_mode = false; - bool switch_back_to_hs_mode = false; - bool restart_dc_stream; int err = 0; - switch_back_to_dc_mode = (dsi->status.driven == - DSI_DRIVEN_MODE_HOST && - init_status->driven == - DSI_DRIVEN_MODE_DC); - switch_back_to_hs_mode = (dsi->status.lphs == - DSI_LPHS_IN_LP_MODE && - init_status->lphs == - DSI_LPHS_IN_HS_MODE); - restart_dc_stream = (dsi->status.dc_stream == - DSI_DC_STREAM_DISABLE && - init_status->dc_stream == - DSI_DC_STREAM_ENABLE); - - if (dsi->status.lphs == DSI_LPHS_IN_LP_MODE && - init_status->lphs == DSI_LPHS_IN_LP_MODE) { - if (dsi->status.lp_op != init_status->lp_op) { - err = - tegra_dsi_set_to_lp_mode(dc, dsi, init_status->lp_op); - if (err < 0) { - dev_err(&dc->ndev->dev, - "Failed to config LP mode\n"); - goto fail; - } + if (init_status->lphs == DSI_LPHS_IN_LP_MODE) { + err = tegra_dsi_set_to_lp_mode(dc, dsi, init_status->lp_op); + if (err < 0) { + dev_err(&dc->ndev->dev, + "Failed to config LP mode\n"); + goto fail; } goto success; } - if (switch_back_to_dc_mode) - dsi->driven_mode = TEGRA_DSI_DRIVEN_BY_DC; - if (switch_back_to_dc_mode || switch_back_to_hs_mode) { - err = tegra_dsi_set_to_hs_mode(dc, dsi); + if (init_status->lphs == DSI_LPHS_IN_HS_MODE) { + u8 driven = (init_status->driven == DSI_DRIVEN_MODE_DC) ? + TEGRA_DSI_DRIVEN_BY_DC : TEGRA_DSI_DRIVEN_BY_HOST; + err = tegra_dsi_set_to_hs_mode(dc, dsi, driven); if (err < 0) { dev_err(&dc->ndev->dev, "Failed to config HS mode\n"); goto fail; } } - if (restart_dc_stream) - tegra_dsi_start_dc_stream(dc, dsi); + if (init_status->dc_stream == DSI_DC_STREAM_ENABLE) + tegra_dsi_start_dc_stream(dc, dsi); success: fail: kfree(init_status); @@ -2081,7 +2094,6 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi, u8 *pdata, u8 data_id, u16 data_len) { u8 virtual_channel; - u8 *pval; u32 val; int err; @@ -2104,10 +2116,9 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi, pdata += 4; } else { val = 0; - pval = (u8 *) &val; - do - *pval++ = *pdata++; - while (--data_len); + memcpy(&val, pdata, data_len); + pdata += data_len; + data_len = 0; } tegra_dsi_writel(dsi, val, DSI_WR_DATA); } @@ -2120,6 +2131,49 @@ static int _tegra_dsi_write_data(struct tegra_dc_dsi_data *dsi, return err; } +static void tegra_dc_dsi_hold_host(struct tegra_dc *dc) +{ + struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc); + + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) { + spin_lock(&dsi->host_ref_lock); + dsi->host_ref++; + spin_unlock(&dsi->host_ref_lock); + tegra_dsi_host_resume(dc); + + /* + * Take an extra refrence to count for the clk_disable in + * tegra_dc_release_host. + */ + clk_enable(dc->clk); + } +} + +static void tegra_dc_dsi_release_host(struct tegra_dc *dc) +{ + struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc); + if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) { + clk_disable(dc->clk); + spin_lock(&dsi->host_ref_lock); + dsi->host_ref--; + + if (!dsi->host_ref && + (dsi->status.dc_stream == DSI_DC_STREAM_ENABLE)) + schedule_delayed_work(&dsi->idle_work, dsi->idle_delay); + + spin_unlock(&dsi->host_ref_lock); + } +} + +static void tegra_dc_dsi_idle_work(struct work_struct *work) +{ + struct tegra_dc_dsi_data *dsi = container_of( + to_delayed_work(work), struct tegra_dc_dsi_data, idle_work); + + if (dsi->dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) + tegra_dsi_host_suspend(dsi->dc); +} + int tegra_dsi_write_data(struct tegra_dc *dc, struct tegra_dc_dsi_data *dsi, u8 *pdata, u8 data_id, u16 data_len) @@ -2143,6 +2197,7 @@ fail: if (err < 0) dev_err(&dc->ndev->dev, "Failed to restore prev state\n"); tegra_dc_io_end(dc); + return err; } EXPORT_SYMBOL(tegra_dsi_write_data); @@ -2174,7 +2229,7 @@ static int tegra_dsi_send_panel_cmd(struct tegra_dc *dc, return err; } -static u8 get_8bit_ecc(u32 header) +static u8 tegra_dsi_ecc(u32 header) { char ecc_parity[24] = { 0x07, 0x0b, 0x0d, 0x0e, 0x13, 0x15, 0x16, 0x19, @@ -2191,75 +2246,158 @@ static u8 get_8bit_ecc(u32 header) return ecc_byte; } -/* This function is written to send DCS short write (1 parameter) only. - * This means the cmd will contain only 1 byte of index and 1 byte of value. - * The data type ID is fixed at 0x15 and the ECC is calculated based on the - * data in pdata. - * The command will be sent by hardware every frame. - * pdata should contain both the index + value for each cmd. - * data_len will be the total number of bytes in pdata. - */ -int tegra_dsi_send_panel_short_cmd(struct tegra_dc *dc, u8 *pdata, u8 data_len) +static u16 tegra_dsi_cs(char *pdata, u16 data_len) { - u8 ecc8bits = 0, data_len_orig = 0; - u32 val = 0, pkthdr = 0; - int err = 0, count = 0; - struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc); + u16 byte_cnt; + u8 bit_cnt; + char curr_byte; + u16 crc = 0xFFFF; + u16 poly = 0x8408; - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); + if (data_len > 0) { + for (byte_cnt = 0; byte_cnt < data_len; byte_cnt++) { + curr_byte = pdata[byte_cnt]; + for (bit_cnt = 0; bit_cnt < 8; bit_cnt++) { + if (((crc & 0x0001 ) ^ + (curr_byte & 0x0001)) > 0) + crc = ((crc >> 1) & 0x7FFF) ^ poly; + else + crc = (crc >> 1) & 0x7FFF; - data_len_orig = data_len; - if (pdata != NULL) { - while (data_len) { - if (data_len >= 2) { - pkthdr = (CMD_SHORTW | - (((u16 *)pdata)[0]) << 8 | 0x00 << 24); - ecc8bits = get_8bit_ecc(pkthdr); - val = (pkthdr | (ecc8bits << 24)); - data_len -= 2; - pdata += 2; - count++; + curr_byte = (curr_byte >> 1 ) & 0x7F; } - switch (count) { - case 1: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0); - break; - case 2: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_1); - break; - case 3: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_2); - break; - case 4: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_3); - break; - case 5: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_4); - break; - case 6: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_5); - break; - case 7: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_6); - break; - case 8: - tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_7); - break; - default: - err = 1; - break; + } + } + return crc; +} + +static int tegra_dsi_dcs_pkt_seq_ctrl_init(struct tegra_dc_dsi_data *dsi, + struct tegra_dsi_cmd *cmd) +{ + u8 virtual_channel; + u32 val; + u16 data_len = cmd->sp_len_dly.data_len; + u8 seq_ctrl_reg = 0; + + virtual_channel = dsi->info.virtual_channel << + DSI_VIR_CHANNEL_BIT_POSITION; + + val = (virtual_channel | cmd->data_id) << 0 | + data_len << 8; + + val |= tegra_dsi_ecc(val) << 24; + + tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0 + seq_ctrl_reg++); + + /* if pdata != NULL, pkt type is long pkt */ + if (cmd->pdata != NULL) { + u8 *pdata; + u8 *pdata_mem; + /* allocate memory for pdata + 2 bytes checksum */ + pdata_mem = kzalloc(sizeof(u8) * data_len + 2, GFP_KERNEL); + if (!pdata_mem) { + dev_err(&dsi->dc->ndev->dev, "dsi: memory err\n"); + tegra_dsi_soft_reset(dsi); + return -ENOMEM; + } + + memcpy(pdata_mem, cmd->pdata, data_len); + pdata = pdata_mem; + *((u16 *)(pdata + data_len)) = tegra_dsi_cs(pdata, data_len); + + /* data_len = length of pdata + 2 byte checksum */ + data_len += 2; + + while (data_len) { + if (data_len >= 4) { + val = ((u32 *) pdata)[0]; + data_len -= 4; + pdata += 4; + } else { + val = 0; + memcpy(&val, pdata, data_len); + pdata += data_len; + data_len = 0; } + tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_DATA_0 + + seq_ctrl_reg++); } + kfree(pdata_mem); } - val = DSI_INIT_SEQ_CONTROL_DSI_FRAME_INIT_BYTE_COUNT(data_len_orig * 2) - | DSI_INIT_SEQ_CONTROL_DSI_SEND_INIT_SEQUENCE(1); + return 0; +} + +int tegra_dsi_start_host_cmd_v_blank_dcs(struct tegra_dc_dsi_data * dsi, + struct tegra_dsi_cmd *cmd) +{ +#define PKT_HEADER_LEN_BYTE 4 +#define CHECKSUM_LEN_BYTE 2 + + int err = 0; + u32 val; + u16 tot_pkt_len = PKT_HEADER_LEN_BYTE; + struct tegra_dc *dc = dsi->dc; + + if (cmd->cmd_type != TEGRA_DSI_PACKET_CMD) + return -EINVAL; + + mutex_lock(&dsi->lock); + tegra_dc_dsi_hold_host(dc); + + tegra_dc_io_start(dc); + + + err = tegra_dsi_dcs_pkt_seq_ctrl_init(dsi, cmd); + if (err < 0) { + dev_err(&dsi->dc->ndev->dev, + "dsi: dcs pkt seq ctrl init failed\n"); + goto fail; + } + + if (cmd->pdata) { + u16 data_len = cmd->sp_len_dly.data_len; + tot_pkt_len += data_len + CHECKSUM_LEN_BYTE; + } + + val = DSI_INIT_SEQ_CONTROL_DSI_FRAME_INIT_BYTE_COUNT(tot_pkt_len) | + DSI_INIT_SEQ_CONTROL_DSI_SEND_INIT_SEQUENCE( + TEGRA_DSI_ENABLE); tegra_dsi_writel(dsi, val, DSI_INIT_SEQ_CONTROL); +fail: + tegra_dc_io_end(dc); + tegra_dc_dsi_release_host(dc); + mutex_unlock(&dsi->lock); return err; + +#undef PKT_HEADER_LEN_BYTE +#undef CHECKSUM_LEN_BYTE } -EXPORT_SYMBOL(tegra_dsi_send_panel_short_cmd); +EXPORT_SYMBOL(tegra_dsi_start_host_cmd_v_blank_dcs); + +void tegra_dsi_stop_host_cmd_v_blank_dcs(struct tegra_dc_dsi_data * dsi) +{ + struct tegra_dc *dc = dsi->dc; + u32 cnt; + + mutex_lock(&dsi->lock); + tegra_dc_dsi_hold_host(dc); + + tegra_dc_io_start(dc); + + tegra_dsi_writel(dsi, TEGRA_DSI_DISABLE, DSI_INIT_SEQ_CONTROL); + + /* clear seq data registers */ + for (cnt = 0; cnt < 8; cnt++) + tegra_dsi_writel(dsi, 0, DSI_INIT_SEQ_DATA_0 + cnt); + + tegra_dc_io_end(dc); + + tegra_dc_dsi_release_host(dc); + mutex_unlock(&dsi->lock); +} +EXPORT_SYMBOL(tegra_dsi_stop_host_cmd_v_blank_dcs); static int tegra_dsi_bta(struct tegra_dc_dsi_data *dsi) { @@ -2429,6 +2567,7 @@ int tegra_dsi_read_data(struct tegra_dc *dc, int err = 0; struct dsi_status *init_status; + mutex_lock(&dsi->lock); tegra_dc_io_start(dc); init_status = tegra_dsi_prepare_host_transmission( @@ -2487,6 +2626,7 @@ fail: if (err < 0) dev_err(&dc->ndev->dev, "Failed to restore prev state\n"); tegra_dc_io_end(dc); + mutex_unlock(&dsi->lock); return err; } EXPORT_SYMBOL(tegra_dsi_read_data); @@ -2625,7 +2765,8 @@ static void tegra_dsi_send_dc_frames(struct tegra_dc *dc, bool switch_to_lp = (dsi->status.lphs == DSI_LPHS_IN_LP_MODE); if (dsi->status.lphs != DSI_LPHS_IN_HS_MODE) { - err = tegra_dsi_set_to_hs_mode(dc, dsi); + err = tegra_dsi_set_to_hs_mode(dc, dsi, + TEGRA_DSI_DRIVEN_BY_DC); if (err < 0) { dev_err(&dc->ndev->dev, "Switch to HS host mode failed\n"); @@ -2668,11 +2809,10 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc) int err; u32 val; - tegra_dc_io_start(dc); mutex_lock(&dsi->lock); + tegra_dc_dsi_hold_host(dc); - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); + tegra_dc_io_start(dc); /* Stop DC stream before configuring DSI registers * to avoid visible glitches on panel during transition * from bootloader to kernel driver @@ -2771,7 +2911,8 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc) goto fail; } - err = tegra_dsi_set_to_hs_mode(dc, dsi); + err = tegra_dsi_set_to_hs_mode(dc, dsi, + TEGRA_DSI_DRIVEN_BY_DC); if (err < 0) { dev_err(&dc->ndev->dev, "dsi: not able to set to hs mode\n"); @@ -2784,8 +2925,9 @@ static void tegra_dc_dsi_enable(struct tegra_dc *dc) if (dsi->status.driven == DSI_DRIVEN_MODE_DC) tegra_dsi_start_dc_stream(dc, dsi); fail: - mutex_unlock(&dsi->lock); tegra_dc_io_end(dc); + tegra_dc_dsi_release_host(dc); + mutex_unlock(&dsi->lock); } static void _tegra_dc_dsi_init(struct tegra_dc *dc) @@ -3179,8 +3321,7 @@ fail: return err; } - -int tegra_dsi_host_suspend(struct tegra_dc *dc) +static int tegra_dsi_host_suspend(struct tegra_dc *dc) { int err = 0; struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc); @@ -3188,6 +3329,10 @@ int tegra_dsi_host_suspend(struct tegra_dc *dc) if (dsi->host_suspended) return 0; + BUG_ON(!tegra_is_clk_enabled(dc->clk)); + tegra_dc_io_start(dc); + dsi->host_suspended = true; + tegra_dsi_stop_dc_stream(dc, dsi); err = tegra_dsi_deep_sleep(dc, dsi, dsi->info.suspend_aggr); @@ -3195,21 +3340,26 @@ int tegra_dsi_host_suspend(struct tegra_dc *dc) dev_err(&dc->ndev->dev, "DSI failed to enter deep sleep\n"); - dsi->host_suspended = true; + tegra_dc_clk_disable(dc); + tegra_dc_io_end(dc); return err; } - -int tegra_dsi_host_resume(struct tegra_dc *dc) +static int tegra_dsi_host_resume(struct tegra_dc *dc) { int val = 0; int err = 0; struct tegra_dc_dsi_data *dsi = tegra_dc_get_outdata(dc); - if (!dsi->host_suspended) + mutex_lock(&dsi->host_resume_lock); + cancel_delayed_work_sync(&dsi->idle_work); + if (!dsi->host_suspended) { + mutex_unlock(&dsi->host_resume_lock); return 0; + } + tegra_dc_clk_enable(dc); switch (dsi->info.suspend_aggr) { case DSI_HOST_SUSPEND_LV0: tegra_dsi_clk_enable(dsi); @@ -3254,9 +3404,11 @@ int tegra_dsi_host_resume(struct tegra_dc *dc) "is not supported.\n"); } + tegra_dsi_start_dc_stream(dc, dsi); + dsi->enabled = true; dsi->host_suspended = false; - tegra_dsi_start_dc_stream(dc, dsi); + mutex_unlock(&dsi->host_resume_lock); fail: return err; } @@ -3298,7 +3450,6 @@ static void tegra_dc_dsi_disable(struct tegra_dc *dc) } } } - fail: mutex_unlock(&dsi->lock); tegra_dc_io_end(dc); @@ -3350,6 +3501,8 @@ struct tegra_dc_out_ops tegra_dc_dsi_ops = { .destroy = tegra_dc_dsi_destroy, .enable = tegra_dc_dsi_enable, .disable = tegra_dc_dsi_disable, + .hold = tegra_dc_dsi_hold_host, + .release = tegra_dc_dsi_release_host, #ifdef CONFIG_PM .suspend = tegra_dc_dsi_suspend, .resume = tegra_dc_dsi_resume, diff --git a/drivers/video/tegra/dc/ext/dev.c b/drivers/video/tegra/dc/ext/dev.c index f9c76f8f0d0d..88273e26c51c 100644 --- a/drivers/video/tegra/dc/ext/dev.c +++ b/drivers/video/tegra/dc/ext/dev.c @@ -56,6 +56,7 @@ struct tegra_dc_ext_flip_data { struct tegra_dc_ext *ext; struct work_struct work; struct tegra_dc_ext_flip_win win[DC_N_WINDOWS]; + struct list_head timestamp_node; }; int tegra_dc_ext_get_num_outputs(void) @@ -207,6 +208,7 @@ static int tegra_dc_ext_set_windowattr(struct tegra_dc_ext *ext, { int err = 0; struct tegra_dc_ext_win *ext_win = &ext->win[win->idx]; + s64 timestamp_ns; if (flip_win->handle[TEGRA_DC_Y] == NULL) { win->flags = 0; @@ -270,9 +272,56 @@ static int tegra_dc_ext_set_windowattr(struct tegra_dc_ext *ext, msecs_to_jiffies(500), NULL); } +#ifndef CONFIG_TEGRA_SIMULATION_PLATFORM + timestamp_ns = timespec_to_ns(&flip_win->attr.timestamp); + + if (timestamp_ns) { + /* XXX: Should timestamping be overridden by "no_vsync" flag */ + tegra_dc_config_frame_end_intr(win->dc, true); + trace_printk("%s:Before timestamp wait\n", win->dc->ndev->name); + err = wait_event_interruptible(win->dc->timestamp_wq, + tegra_dc_is_within_n_vsync(win->dc, timestamp_ns)); + trace_printk("%s:After timestamp wait\n", win->dc->ndev->name); + tegra_dc_config_frame_end_intr(win->dc, false); + } +#endif + return err; +} + +static void (*flip_callback)(void); +static spinlock_t flip_callback_lock; +static bool init_tegra_dc_flip_callback_called; + +static int __init init_tegra_dc_flip_callback(void) +{ + spin_lock_init(&flip_callback_lock); + init_tegra_dc_flip_callback_called = true; + return 0; +} + +pure_initcall(init_tegra_dc_flip_callback); + +int tegra_dc_set_flip_callback(void (*callback)(void)) +{ + WARN_ON(!init_tegra_dc_flip_callback_called); + + spin_lock(&flip_callback_lock); + flip_callback = callback; + spin_unlock(&flip_callback_lock); + + return 0; +} +EXPORT_SYMBOL(tegra_dc_set_flip_callback); + +int tegra_dc_unset_flip_callback() +{ + spin_lock(&flip_callback_lock); + flip_callback = NULL; + spin_unlock(&flip_callback_lock); return 0; } +EXPORT_SYMBOL(tegra_dc_unset_flip_callback); static void tegra_dc_ext_flip_worker(struct work_struct *work) { @@ -288,9 +337,11 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work) for (i = 0; i < DC_N_WINDOWS; i++) { struct tegra_dc_ext_flip_win *flip_win = &data->win[i]; - int index = flip_win->attr.index; + int j = 0, index = flip_win->attr.index; struct tegra_dc_win *win; struct tegra_dc_ext_win *ext_win; + struct tegra_dc_ext_flip_data *temp = NULL; + s64 head_timestamp = 0; if (index < 0) continue; @@ -302,6 +353,31 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work) (flip_win->attr.flags & TEGRA_DC_EXT_FLIP_FLAG_CURSOR)) skip_flip = true; + mutex_lock(&ext_win->queue_lock); + list_for_each_entry(temp, &ext_win->timestamp_queue, + timestamp_node) { + if (j == 0) { + if (unlikely(temp != data)) + dev_err(&win->dc->ndev->dev, + "work queue did NOT dequeue head!!!"); + else + head_timestamp = + timespec_to_ns(&flip_win->attr.timestamp); + } else { + s64 timestamp = + timespec_to_ns(&temp->win[i].attr.timestamp); + + skip_flip = !tegra_dc_does_vsync_separate(ext->dc, + timestamp, head_timestamp); + /* Look ahead only one flip */ + break; + } + j++; + } + if (!list_empty(&ext_win->timestamp_queue)) + list_del(&data->timestamp_node); + mutex_unlock(&ext_win->queue_lock); + if (win->flags & TEGRA_WIN_FLAG_ENABLED) { int j; for (j = 0; j < TEGRA_DC_NUM_PLANES; j++) { @@ -327,17 +403,23 @@ static void tegra_dc_ext_flip_worker(struct work_struct *work) tegra_dc_update_windows(wins, nr_win); /* TODO: implement swapinterval here */ tegra_dc_sync_windows(wins, nr_win); - } + if (!tegra_dc_has_multiple_dc()) { + spin_lock(&flip_callback_lock); + if (flip_callback) + flip_callback(); + spin_unlock(&flip_callback_lock); + } - for (i = 0; i < DC_N_WINDOWS; i++) { - struct tegra_dc_ext_flip_win *flip_win = &data->win[i]; - int index = flip_win->attr.index; + for (i = 0; i < DC_N_WINDOWS; i++) { + struct tegra_dc_ext_flip_win *flip_win = &data->win[i]; + int index = flip_win->attr.index; - if (index < 0) - continue; + if (index < 0) + continue; - tegra_dc_incr_syncpt_min(ext->dc, index, - flip_win->syncpt_max); + tegra_dc_incr_syncpt_min(ext->dc, index, + flip_win->syncpt_max); + } } /* unpin and deref previous front buffers */ @@ -449,6 +531,7 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user, struct tegra_dc_ext_flip_data *data; int work_index = -1; int i, ret = 0; + bool has_timestamp = false; #ifdef CONFIG_ANDROID int index_check[DC_N_WINDOWS] = {0, }; @@ -489,6 +572,8 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user, int index = args->win[i].index; memcpy(&flip_win->attr, &args->win[i], sizeof(flip_win->attr)); + if (timespec_to_ns(&flip_win->attr.timestamp)) + has_timestamp = true; if (index < 0) continue; @@ -563,6 +648,11 @@ static int tegra_dc_ext_flip(struct tegra_dc_ext_user *user, ret = -EINVAL; goto unlock; } + if (has_timestamp) { + mutex_lock(&ext->win[work_index].queue_lock); + list_add_tail(&data->timestamp_node, &ext->win[work_index].timestamp_queue); + mutex_unlock(&ext->win[work_index].queue_lock); + } queue_work(ext->win[work_index].flip_wq, &data->work); unlock_windows_for_flip(user, args); @@ -903,6 +993,8 @@ static int tegra_dc_ext_setup_windows(struct tegra_dc_ext *ext) } mutex_init(&win->lock); + mutex_init(&win->queue_lock); + INIT_LIST_HEAD(&win->timestamp_queue); } return 0; diff --git a/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h b/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h index f68c7d5c93c2..ef7361d1d933 100644 --- a/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h +++ b/drivers/video/tegra/dc/ext/tegra_dc_ext_priv.h @@ -58,6 +58,10 @@ struct tegra_dc_ext_win { struct workqueue_struct *flip_wq; atomic_t nr_pending_flips; + + struct mutex queue_lock; + + struct list_head timestamp_queue; }; struct tegra_dc_ext { diff --git a/drivers/video/tegra/dc/hdmi.c b/drivers/video/tegra/dc/hdmi.c index 79478ea48f83..55d9163d4faf 100644 --- a/drivers/video/tegra/dc/hdmi.c +++ b/drivers/video/tegra/dc/hdmi.c @@ -1370,18 +1370,31 @@ bool tegra_dc_hdmi_detect_test(struct tegra_dc *dc, unsigned char *edid_ptr) err = tegra_edid_get_monspecs_test(hdmi->edid, &specs, edid_ptr); if (err < 0) { - dev_err(&dc->ndev->dev, "error reading edid\n"); - goto fail; - } + /* Check if there's a hard-wired mode, if so, enable it */ + if (dc->out->n_modes) + tegra_dc_enable(dc); + else { + dev_err(&dc->ndev->dev, "error reading edid\n"); + goto fail; + } +#ifdef CONFIG_SWITCH + hdmi->hpd_switch.state = 0; + switch_set_state(&hdmi->hpd_switch, 1); +#endif + dev_info(&dc->ndev->dev, "display detected\n"); - err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld); - if (err < 0) { - dev_err(&dc->ndev->dev, "error populating eld\n"); - goto fail; - } - hdmi->eld_retrieved = true; + dc->connected = true; + tegra_dc_ext_process_hotplug(dc->ndev->id); + } else { + err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld); + if (err < 0) { + dev_err(&dc->ndev->dev, "error populating eld\n"); + goto fail; + } + hdmi->eld_retrieved = true; - tegra_dc_hdmi_detect_config(dc, &specs); + tegra_dc_hdmi_detect_config(dc, &specs); + } return true; @@ -1406,18 +1419,30 @@ static bool tegra_dc_hdmi_detect(struct tegra_dc *dc) err = tegra_edid_get_monspecs(hdmi->edid, &specs); if (err < 0) { - dev_err(&dc->ndev->dev, "error reading edid\n"); - goto fail; - } + if (dc->out->n_modes) + tegra_dc_enable(dc); + else { + dev_err(&dc->ndev->dev, "error reading edid\n"); + goto fail; + } +#ifdef CONFIG_SWITCH + hdmi->hpd_switch.state = 0; + switch_set_state(&hdmi->hpd_switch, 1); +#endif + dev_info(&dc->ndev->dev, "display detected\n"); - err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld); - if (err < 0) { - dev_err(&dc->ndev->dev, "error populating eld\n"); - goto fail; - } - hdmi->eld_retrieved = true; + dc->connected = true; + tegra_dc_ext_process_hotplug(dc->ndev->id); + } else { + err = tegra_edid_get_eld(hdmi->edid, &hdmi->eld); + if (err < 0) { + dev_err(&dc->ndev->dev, "error populating eld\n"); + goto fail; + } + hdmi->eld_retrieved = true; - tegra_dc_hdmi_detect_config(dc, &specs); + tegra_dc_hdmi_detect_config(dc, &specs); + } return true; diff --git a/drivers/video/tegra/dc/mode.c b/drivers/video/tegra/dc/mode.c index 49cc5f5abd53..3a95f2e7ab0e 100644 --- a/drivers/video/tegra/dc/mode.c +++ b/drivers/video/tegra/dc/mode.c @@ -137,6 +137,17 @@ static bool check_ref_to_sync(struct tegra_dc_mode *mode) return true; } +static s64 calc_frametime_ns(const struct tegra_dc_mode *m) +{ + long h_total, v_total; + h_total = m->h_active + m->h_front_porch + m->h_back_porch + + m->h_sync_width; + v_total = m->v_active + m->v_front_porch + m->v_back_porch + + m->v_sync_width; + return (!m->pclk) ? 0 : (s64)(div_s64(((s64)h_total * v_total * + 1000000000ULL), m->pclk)); +} + /* return in 1000ths of a Hertz */ int tegra_dc_calc_refresh(const struct tegra_dc_mode *m) { @@ -247,11 +258,25 @@ int tegra_dc_program_mode(struct tegra_dc *dc, struct tegra_dc_mode *mode) return 0; } +static int panel_sync_rate; + +int tegra_dc_get_panel_sync_rate(void) +{ + return panel_sync_rate; +} +EXPORT_SYMBOL(tegra_dc_get_panel_sync_rate); + int tegra_dc_set_mode(struct tegra_dc *dc, const struct tegra_dc_mode *mode) { memcpy(&dc->mode, mode, sizeof(dc->mode)); + if (dc->out->type == TEGRA_DC_OUT_RGB) + panel_sync_rate = tegra_dc_calc_refresh(mode); + else if (dc->out->type == TEGRA_DC_OUT_DSI) + panel_sync_rate = dc->out->dsi->rated_refresh_rate * 1000; + print_mode(dc, mode, __func__); + dc->frametime_ns = calc_frametime_ns(mode); return 0; } diff --git a/drivers/video/tegra/dc/nvsd.c b/drivers/video/tegra/dc/nvsd.c index e3058b596f69..6e76ee0f1702 100644 --- a/drivers/video/tegra/dc/nvsd.c +++ b/drivers/video/tegra/dc/nvsd.c @@ -809,9 +809,12 @@ static ssize_t nvsd_settings_store(struct kobject *kobj, mutex_unlock(&dc->lock); return -ENODEV; } - mutex_unlock(&dc->lock); + tegra_dc_hold_dc_out(dc); nvsd_init(dc, sd_settings); + tegra_dc_release_dc_out(dc); + + mutex_unlock(&dc->lock); /* Update backlight state IFF we're disabling! */ if (!sd_settings->enable && sd_settings->bl_device) { diff --git a/drivers/video/tegra/dc/window.c b/drivers/video/tegra/dc/window.c index 5161dd4f7003..cd91fab428ed 100644 --- a/drivers/video/tegra/dc/window.c +++ b/drivers/video/tegra/dc/window.c @@ -24,6 +24,7 @@ #include "dc_priv.h" static int no_vsync; +static atomic_t frame_end_ref = ATOMIC_INIT(0); module_param_named(no_vsync, no_vsync, int, S_IRUGO | S_IWUSR); @@ -40,6 +41,17 @@ static bool tegra_dc_windows_are_clean(struct tegra_dc_win *windows[], return true; } +int tegra_dc_config_frame_end_intr(struct tegra_dc *dc, bool enable) +{ + tegra_dc_writel(dc, FRAME_END_INT, DC_CMD_INT_STATUS); + if (enable) { + atomic_inc(&frame_end_ref); + tegra_dc_unmask_interrupt(dc, FRAME_END_INT); + } else if (!atomic_dec_return(&frame_end_ref)) + tegra_dc_mask_interrupt(dc, FRAME_END_INT); + return 0; +} + static int get_topmost_window(u32 *depths, unsigned long *wins) { int idx, best = -1; @@ -219,8 +231,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n) return -EFAULT; } - if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_LP_MODE) - tegra_dc_host_resume(dc); + tegra_dc_hold_dc_out(dc); if (no_vsync) tegra_dc_writel(dc, WRITE_MUX_ACTIVE | READ_MUX_ACTIVE, @@ -407,8 +418,9 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n) FRAME_END_INT | V_BLANK_INT | ALL_UF_INT); } else { clear_bit(V_BLANK_FLIP, &dc->vblank_ref_count); - tegra_dc_mask_interrupt(dc, - FRAME_END_INT | V_BLANK_INT | ALL_UF_INT); + tegra_dc_mask_interrupt(dc, V_BLANK_INT | ALL_UF_INT); + if (!atomic_read(&frame_end_ref)) + tegra_dc_mask_interrupt(dc, FRAME_END_INT); } if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) @@ -424,6 +436,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n) tegra_dc_writel(dc, update_mask, DC_CMD_STATE_CONTROL); trace_printk("%s:update_mask=%#lx\n", dc->ndev->name, update_mask); + tegra_dc_release_dc_out(dc); mutex_unlock(&dc->lock); if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) mutex_unlock(&dc->one_shot_lock); @@ -456,7 +469,8 @@ void tegra_dc_trigger_windows(struct tegra_dc *dc) } if (!dirty) { - if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE)) + if (!(dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) + && !atomic_read(&frame_end_ref)) tegra_dc_mask_interrupt(dc, FRAME_END_INT); } diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c index cb7525e049e9..59c162bb3b4c 100644 --- a/drivers/video/tegra/fb.c +++ b/drivers/video/tegra/fb.c @@ -96,6 +96,7 @@ static int tegra_fb_set_par(struct fb_info *info) { struct tegra_fb_info *tegra_fb = info->par; struct fb_var_screeninfo *var = &info->var; + struct tegra_dc *dc = tegra_fb->win->dc; if (var->bits_per_pixel) { /* we only support RGB ordering for now */ @@ -124,10 +125,13 @@ static int tegra_fb_set_par(struct fb_info *info) default: return -EINVAL; } - info->fix.line_length = var->xres * var->bits_per_pixel / 8; - /* Pad the stride to 16-byte boundary. */ - info->fix.line_length = round_up(info->fix.line_length, + /* if line_length unset, then pad the stride */ + if (!info->fix.line_length) { + info->fix.line_length = var->xres * var->bits_per_pixel + / 8; + info->fix.line_length = round_up(info->fix.line_length, TEGRA_LINEAR_PITCH_ALIGNMENT); + } tegra_fb->win->stride = info->fix.line_length; tegra_fb->win->stride_uv = 0; tegra_fb->win->phys_addr_u = 0; @@ -136,17 +140,30 @@ static int tegra_fb_set_par(struct fb_info *info) if (var->pixclock) { bool stereo; + unsigned old_len = 0; struct fb_videomode m; + struct fb_videomode *old_mode = NULL; fb_var_to_videomode(&m, var); + /* Load framebuffer info with new mode details*/ + old_mode = info->mode; + old_len = info->fix.line_length; + info->mode = (struct fb_videomode *) fb_find_nearest_mode(&m, &info->modelist); if (!info->mode) { dev_warn(&tegra_fb->ndev->dev, "can't match video mode\n"); + info->mode = old_mode; return -EINVAL; } + /* Update fix line_length and window stride as per new mode */ + info->fix.line_length = var->xres * var->bits_per_pixel / 8; + info->fix.line_length = round_up(info->fix.line_length, + TEGRA_LINEAR_PITCH_ALIGNMENT); + tegra_fb->win->stride = info->fix.line_length; + /* * only enable stereo if the mode supports it and * client requests it @@ -157,10 +174,22 @@ static int tegra_fb_set_par(struct fb_info *info) #else FB_VMODE_STEREO_LEFT_RIGHT); #endif - tegra_dc_set_fb_mode(tegra_fb->win->dc, info->mode, stereo); - /* Reflect the mode change on dc */ - tegra_dc_disable(tegra_fb->win->dc); - tegra_dc_enable(tegra_fb->win->dc); + + /* Configure DC with new mode */ + if (tegra_dc_set_fb_mode(dc, info->mode, stereo)) { + /* Error while configuring DC, fallback to old mode */ + dev_warn(&tegra_fb->ndev->dev, "can't configure dc with mode %ux%u\n", + info->mode->xres, info->mode->yres); + info->mode = old_mode; + info->fix.line_length = old_len; + tegra_fb->win->stride = old_len; + return -EINVAL; + } + + /* Reflect mode chnage on DC HW */ + if (dc->enabled) + tegra_dc_disable(dc); + tegra_dc_enable(dc); tegra_fb->win->w.full = dfixed_const(info->mode->xres); tegra_fb->win->h.full = dfixed_const(info->mode->yres); @@ -326,8 +355,10 @@ static void tegra_fb_imageblit(struct fb_info *info, static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) { + struct tegra_fb_info *tegra_fb = (struct tegra_fb_info *)info->par; struct tegra_fb_modedb modedb; struct fb_modelist *modelist; + struct fb_vblank vblank = {}; int i; switch (cmd) { @@ -370,6 +401,17 @@ static int tegra_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long return -EFAULT; break; + case FBIOGET_VBLANK: + tegra_dc_get_fbvblank(tegra_fb->win->dc, &vblank); + + if (copy_to_user( + (void __user *)arg, &vblank, sizeof(vblank))) + return -EFAULT; + break; + + case FBIO_WAITFORVSYNC: + return tegra_dc_wait_for_vsync(tegra_fb->win->dc); + default: return -ENOTTY; } @@ -607,8 +649,10 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev, tegra_fb->valid = true; } + info->fix.line_length = fb_data->xres * fb_data->bits_per_pixel / 8; + stride = tegra_dc_get_stride(dc, 0); - if (!stride) /* default to pad the stride to 16-byte boundary. */ + if (!stride) /* default to pad the stride */ stride = round_up(info->fix.line_length, TEGRA_LINEAR_PITCH_ALIGNMENT); @@ -625,7 +669,6 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev, info->fix.accel = FB_ACCEL_NONE; info->fix.smem_start = fb_phys; info->fix.smem_len = fb_size; - info->fix.line_length = fb_data->xres * fb_data->bits_per_pixel / 8; info->fix.line_length = stride; info->var.xres = fb_data->xres; @@ -683,6 +726,7 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev, if (dc->mode.pclk > 1000) { struct tegra_dc_mode *mode = &dc->mode; + struct fb_videomode vmode; if (dc->out->flags & TEGRA_DC_OUT_ONE_SHOT_MODE) info->var.pixclock = KHZ2PICOS(mode->rated_pclk / 1000); @@ -694,6 +738,10 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev, info->var.lower_margin = mode->v_front_porch; info->var.hsync_len = mode->h_sync_width; info->var.vsync_len = mode->v_sync_width; + + /* Keep info->var consistent with info->modelist. */ + fb_var_to_videomode(&vmode, &info->var); + fb_add_videomode(&vmode, &info->modelist); } return tegra_fb; diff --git a/drivers/video/tegra/host/bus.c b/drivers/video/tegra/host/bus.c index 758a5ca4ad94..f22dac288051 100644 --- a/drivers/video/tegra/host/bus.c +++ b/drivers/video/tegra/host/bus.c @@ -96,7 +96,7 @@ static int nvhost_bus_match(struct device *_dev, struct device_driver *drv) if (ndrv->id_table) return nvhost_bus_match_id(dev, ndrv->id_table) != NULL; else /* driver does not support id_table */ - return !strncmp(dev->name, drv->name, strlen(drv->name)); + return !strcmp(dev->name, drv->name); } static int nvhost_drv_probe(struct device *_dev) diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c index 0137793b39ee..aaa038221971 100644 --- a/drivers/video/tegra/host/bus_client.c +++ b/drivers/video/tegra/host/bus_client.c @@ -159,7 +159,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp) } filp->private_data = priv; priv->ch = ch; - nvhost_module_add_client(ch->dev, priv); + if(nvhost_module_add_client(ch->dev, priv)) + goto fail; if (ch->ctxhandler && ch->ctxhandler->alloc) { priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch); diff --git a/drivers/video/tegra/host/chip_support.h b/drivers/video/tegra/host/chip_support.h index f5d2811f143f..412ce8b65466 100644 --- a/drivers/video/tegra/host/chip_support.h +++ b/drivers/video/tegra/host/chip_support.h @@ -125,6 +125,7 @@ struct nvhost_intr_ops { void (*set_syncpt_threshold)( struct nvhost_intr *, u32 id, u32 thresh); void (*enable_syncpt_intr)(struct nvhost_intr *, u32 id); + void (*disable_syncpt_intr)(struct nvhost_intr *, u32 id); void (*disable_all_syncpt_intrs)(struct nvhost_intr *); int (*request_host_general_irq)(struct nvhost_intr *); void (*free_host_general_irq)(struct nvhost_intr *); diff --git a/drivers/video/tegra/host/gr3d/gr3d.c b/drivers/video/tegra/host/gr3d/gr3d.c index 715468131d9e..775c77b0e88d 100644 --- a/drivers/video/tegra/host/gr3d/gr3d.c +++ b/drivers/video/tegra/host/gr3d/gr3d.c @@ -80,8 +80,10 @@ struct host1x_hwctx *nvhost_3dctx_alloc_common(struct host1x_hwctx_handler *p, ctx->restore = mem_op().alloc(memmgr, p->restore_size * 4, 32, map_restore ? mem_mgr_flag_write_combine : mem_mgr_flag_uncacheable); - if (IS_ERR_OR_NULL(ctx->restore)) + if (IS_ERR_OR_NULL(ctx->restore)) { + ctx->restore = NULL; goto fail; + } if (map_restore) { ctx->restore_virt = mem_op().mmap(ctx->restore); diff --git a/drivers/video/tegra/host/gr3d/gr3d.h b/drivers/video/tegra/host/gr3d/gr3d.h index 3855b237b702..61f708cea95c 100644 --- a/drivers/video/tegra/host/gr3d/gr3d.h +++ b/drivers/video/tegra/host/gr3d/gr3d.h @@ -29,6 +29,9 @@ #define AR3D_PSEQ_QUAD_ID 0x545 #define AR3D_DW_MEMORY_OUTPUT_ADDRESS 0x904 #define AR3D_DW_MEMORY_OUTPUT_DATA 0x905 +#define AR3D_FDC_CONTROL_0 0xa00 +#define AR3D_FDC_CONTROL_0_RESET_VAL 0xe00 +#define AR3D_FDC_CONTROL_0_INVALIDATE 1 #define AR3D_GSHIM_WRITE_MASK 0xb00 #define AR3D_GSHIM_READ_SELECT 0xb01 #define AR3D_GLOBAL_MEMORY_OUTPUT_READS 0xe40 diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.c b/drivers/video/tegra/host/gr3d/gr3d_t20.c index b6e3896fe50c..694b00527790 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t20.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t20.c @@ -144,7 +144,7 @@ static void save_push_v0(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) p->save_phys); } -static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) { /* 3d: when done, increment syncpt to base+1 */ ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); @@ -162,7 +162,7 @@ static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) h->syncpt); /* incr 2 */ } -static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count) +static void save_direct_v0(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_nonincr(host1x_uclass_indoff_r(), 1); ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, @@ -170,7 +170,7 @@ static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count) ptr[2] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, +static void save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, u32 data_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, @@ -183,7 +183,7 @@ static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, ptr[4] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr) { /* Wait for context read service to finish (cpu incr 3) */ ptr[0] = nvhost_opcode_nonincr(host1x_uclass_wait_syncpt_base_r(), 1); @@ -226,7 +226,7 @@ static u32 *save_regs_v0(u32 *ptr, unsigned int *pending, /*** save ***/ -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -284,7 +284,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr) { struct save_info info = { ptr, @@ -371,7 +371,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t20_ctxhandler_init( p->save_buf = mem_op().alloc(memmgr, p->save_size * sizeof(u32), 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c index c35fea2f3ac2..664708c7fc80 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t30.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c @@ -125,6 +125,16 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0), NVHOST_OPCODE_NOOP); + /* invalidate the FDC to prevent cache-coherency issues across GPUs + note that we assume FDC_CONTROL_0 is left in the reset state by all + contexts. the invalidate bit will clear itself, so the register + should be unchanged after this */ + nvhost_cdma_push(cdma, + nvhost_opcode_imm(AR3D_FDC_CONTROL_0, + AR3D_FDC_CONTROL_0_RESET_VAL + | AR3D_FDC_CONTROL_0_INVALIDATE), + NVHOST_OPCODE_NOOP); + /* set register set 0 and 1 register read memory output addresses, and send their reads to memory */ @@ -132,7 +142,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2), nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1)); nvhost_cdma_push(cdma, - nvhost_opcode_nonincr(0x904, 1), + nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1), ctx->restore_phys + restore_set1_offset * 4); nvhost_cdma_push(cdma, @@ -150,7 +160,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) p->save_phys); } -static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) +static void save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) { ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA, RESTORE_BEGIN_SIZE); @@ -158,7 +168,7 @@ static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) ptr += RESTORE_BEGIN_SIZE; } -static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count) +static void save_direct_v1(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, AR3D_DW_MEMORY_OUTPUT_DATA, 1); @@ -172,7 +182,7 @@ static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count) ptr[3] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, +static void save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, u32 data_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); @@ -189,7 +199,7 @@ static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, ptr[5] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) +static void save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) { /* write end of restore buffer */ ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, @@ -224,7 +234,7 @@ static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -282,7 +292,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init switch_gpu(struct save_info *info, +static void switch_gpu(struct save_info *info, unsigned int save_src_set, u32 save_dest_sets, u32 restore_dest_sets) @@ -303,7 +313,7 @@ static void __init switch_gpu(struct save_info *info, info->restore_count += 1; } -static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *p, u32 *ptr) { struct save_info info = { ptr, @@ -399,7 +409,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init( p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c index 5922b55a836a..49147975a9e4 100644 --- a/drivers/video/tegra/host/gr3d/scale3d.c +++ b/drivers/video/tegra/host/gr3d/scale3d.c @@ -1,9 +1,9 @@ /* - * drivers/video/tegra/host/t20/scale3d.c + * drivers/video/tegra/host/gr3d/scale3d.c * * Tegra Graphics Host 3D clock scaling * - * Copyright (c) 2010-2012, NVIDIA Corporation. + * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,12 +23,11 @@ * * module3d_notify_busy() is called upon submit, module3d_notify_idle() is * called when all outstanding submits are completed. Idle times are measured - * over a fixed time period (scale3d.p_period). If the 3d module idle time - * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are - * scaled down. If the percentage goes under the minimum limit (set in - * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made - * over the time frame given in scale3d.p_fast_response for clocking up - * quickly in response to load peaks. + * over a fixed time period (scale3d.p_estimation_window). If the 3d module + * idle time percentage goes over the limit (set in scale3d.p_idle_max), 3d + * clocks are scaled down. If the percentage goes under the minimum limit (set + * in scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made + * for clocking up quickly in response to load peaks. * * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic- * bezier-like factor added to pull 3d.emc rate a bit lower. @@ -37,10 +36,31 @@ #include <linux/debugfs.h> #include <linux/types.h> #include <linux/clk.h> +#include <linux/slab.h> #include <mach/clk.h> #include <mach/hardware.h> #include "scale3d.h" #include "dev.h" +#include <media/tegra_camera.h> + +#define GR3D_PRINT_STATS BIT(1) +#define GR3D_PRINT_BUSY BIT(2) +#define GR3D_PRINT_IDLE BIT(3) +#define GR3D_PRINT_HINT BIT(4) +#define GR3D_PRINT_TARGET BIT(5) + +/* time frame for load and hint tracking - when events come in at a larger + * interval, this probably indicates the current estimates are stale + */ +#define GR3D_TIMEFRAME 1000000 /* 1 sec */ + +/* the number of frames to use in the running average of load estimates and + * throughput hints. Choosing 6 frames targets a window of about 100 msec. + * Large flucutuations in frame times require a window that's large enough to + * prevent spiky scaling behavior, which in turn exacerbates frame rate + * instability. + */ +#define GR3D_FRAME_SPAN 6 static int scale3d_is_enabled(void); static void scale3d_enable(int enable); @@ -48,54 +68,73 @@ static void scale3d_enable(int enable); #define POW2(x) ((x) * (x)) /* + * 3D clock scaling should be treated differently when camera is on in AP37. + * 3D in AP37 requires 1.3V and combining it with MPE reaches to EDP limit. + * 3D clock really needs to be set to lower frequency which requires 1.0V. + * The same thing applies to 3D EMC clock. + */ +#define CAMERA_3D_CLK 300000000 +#define CAMERA_3D_EMC_CLK 437000000 + +/* * debugfs parameters to control 3d clock scaling test * - * period - time period for clock rate evaluation - * fast_response - time period for evaluation of 'busy' spikes - * idle_min - if less than [idle_min] percent idle over [fast_response] - * microseconds, clock up. - * idle_max - if over [idle_max] percent idle over [period] microseconds, - * clock down. + * estimation_window - time period for clock rate evaluation + * idle_min - if less than [idle_min / 10] percent idle over + * [estimation_window] microseconds, clock up. + * idle_max - if over [idle_max] percent idle over [estimation_window] + * microseconds, clock down. * max_scale - limits rate changes to no less than (100 - max_scale)% or * (100 + 2 * max_scale)% of current clock rate - * verbosity - set above 5 for debug printouts + * verbosity - bit flag to control debug printouts: + * 1 - stats + * 2 - busy + * 3 - idle + * 4 - hints + * 5 - target frequencies */ struct scale3d_info_rec { struct mutex lock; /* lock for timestamps etc */ int enable; int init; - ktime_t idle_frame; - ktime_t fast_frame; - ktime_t last_idle; - ktime_t last_short_term_idle; + ktime_t last_scale; int is_idle; - ktime_t last_tweak; - ktime_t last_down; + ktime_t last_adjust; int fast_up_count; int slow_down_count; int is_scaled; - int fast_responses; - unsigned long idle_total; - unsigned long idle_short_term_total; - unsigned long max_rate_3d; long emc_slope; long emc_offset; long emc_dip_slope; long emc_dip_offset; long emc_xmid; + unsigned long max_rate_3d; unsigned long min_rate_3d; + ktime_t last_throughput_hint; + struct work_struct work; struct delayed_work idle_timer; + + ktime_t last_estimation_window; + long last_total_idle; + long total_idle; + ktime_t estimation_window; + ktime_t last_notification; + long idle_estimate; + unsigned int scale; - unsigned int p_period; - unsigned int period; + unsigned int p_busy_cutoff; + unsigned int p_estimation_window; + unsigned int p_use_throughput_hint; + unsigned int p_throughput_lo_limit; + unsigned int p_throughput_lower_limit; + unsigned int p_throughput_hi_limit; + unsigned int p_scale_step; unsigned int p_idle_min; unsigned int idle_min; unsigned int p_idle_max; unsigned int idle_max; - unsigned int p_fast_response; - unsigned int fast_response; unsigned int p_adjust; unsigned int p_scale_emc; unsigned int p_emc_dip; @@ -103,13 +142,15 @@ struct scale3d_info_rec { struct clk *clk_3d; struct clk *clk_3d2; struct clk *clk_3d_emc; + int *freqlist; + int freq_count; }; static struct scale3d_info_rec scale3d; -static void scale3d_clocks(unsigned long percent) +static void scale_to_freq(unsigned long hz) { - unsigned long hz, curr; + unsigned long curr; if (!tegra_is_clk_enabled(scale3d.clk_3d)) return; @@ -119,7 +160,8 @@ static void scale3d_clocks(unsigned long percent) return; curr = clk_get_rate(scale3d.clk_3d); - hz = percent * (curr / 100); + if (hz == curr) + return; if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) { if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) @@ -139,6 +181,16 @@ static void scale3d_clocks(unsigned long percent) } } +static void scale3d_clocks(unsigned long percent) +{ + unsigned long hz, curr; + + curr = clk_get_rate(scale3d.clk_3d); + hz = percent * (curr / 100); + + scale_to_freq(hz); +} + static void scale3d_clocks_handler(struct work_struct *work) { unsigned int scale; @@ -164,12 +216,26 @@ void nvhost_scale3d_suspend(struct nvhost_device *dev) static void reset_3d_clocks(void) { if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) { - clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d); - if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) - clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d); - if (scale3d.p_scale_emc) - clk_set_rate(scale3d.clk_3d_emc, - clk_round_rate(scale3d.clk_3d_emc, UINT_MAX)); + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d, CAMERA_3D_CLK); + else + clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d); + if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) { + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d2, CAMERA_3D_CLK); + else + clk_set_rate(scale3d.clk_3d2, + scale3d.max_rate_3d); + } + if (scale3d.p_scale_emc) { + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d_emc, + CAMERA_3D_EMC_CLK); + else + clk_set_rate(scale3d.clk_3d_emc, + clk_round_rate(scale3d.clk_3d_emc, + UINT_MAX)); + } } } @@ -207,15 +273,6 @@ static void scale3d_enable(int enable) reset_3d_clocks(); } -static void reset_scaling_counters(ktime_t time) -{ - scale3d.idle_total = 0; - scale3d.idle_short_term_total = 0; - scale3d.last_idle = time; - scale3d.last_short_term_idle = time; - scale3d.idle_frame = time; -} - /* scaling_adjust - use scale up / scale down hint counts to adjust scaling * parameters. * @@ -228,8 +285,6 @@ static void reset_scaling_counters(ktime_t time) * * the parameters adjusted are * - * * fast_response time - * * period - time for scaling down estimate * * idle_min percentage * * idle_max percentage */ @@ -242,13 +297,11 @@ static void reset_scaling_counters(ktime_t time) static void scaling_adjust(ktime_t time) { long hint_ratio; - long fast_response_adjustment; - long period_adjustment; int idle_min_adjustment; int idle_max_adjustment; unsigned long dt; - dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak); + dt = (unsigned long) ktime_us_delta(time, scale3d.last_adjust); if (dt < SCALING_ADJUST_PERIOD) return; @@ -256,13 +309,9 @@ static void scaling_adjust(ktime_t time) (scale3d.slow_down_count + 1); if (hint_ratio > HINT_RATIO_MAX) { - fast_response_adjustment = -((int) scale3d.p_fast_response) / 4; - period_adjustment = scale3d.p_period / 2; idle_min_adjustment = scale3d.p_idle_min; idle_max_adjustment = scale3d.p_idle_max; } else if (hint_ratio < HINT_RATIO_MIN) { - fast_response_adjustment = scale3d.p_fast_response / 2; - period_adjustment = -((int) scale3d.p_period) / 4; idle_min_adjustment = -((int) scale3d.p_idle_min) / 2; idle_max_adjustment = -((int) scale3d.p_idle_max) / 2; } else { @@ -277,33 +326,23 @@ static void scaling_adjust(ktime_t time) diff *= 2; } - fast_response_adjustment = diff * - (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2)); - period_adjustment = - diff * (scale3d.p_period / HINT_RATIO_DIFF); idle_min_adjustment = (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF; idle_max_adjustment = (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF; } - scale3d.fast_response = - scale3d.p_fast_response + fast_response_adjustment; - scale3d.period = scale3d.p_period + period_adjustment; - scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment; + scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment; scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment; - if (scale3d.p_verbosity >= 10) - pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n", + if (scale3d.p_verbosity & GR3D_PRINT_STATS) + pr_info("scale3d stats: + %d - %d min %u max %u\n", scale3d.fast_up_count, scale3d.slow_down_count, - scale3d.fast_responses, scale3d.fast_response, - scale3d.period, scale3d.idle_min, scale3d.idle_max); + scale3d.idle_min, scale3d.idle_max); scale3d.fast_up_count = 0; scale3d.slow_down_count = 0; - scale3d.fast_responses = 0; - scale3d.last_down = time; - scale3d.last_tweak = time; + scale3d.last_adjust = time; } #undef SCALING_ADJUST_PERIOD @@ -316,61 +355,101 @@ static void scaling_state_check(ktime_t time) { unsigned long dt; - /* adjustment: set scale parameters (fast_response, period) +/- 25% + /* adjustment: set scale parameters (idle_min, idle_max) +/- 25% * based on ratio of scale up to scale down hints */ if (scale3d.p_adjust) scaling_adjust(time); else { - scale3d.fast_response = scale3d.p_fast_response; - scale3d.period = scale3d.p_period; scale3d.idle_min = scale3d.p_idle_min; scale3d.idle_max = scale3d.p_idle_max; } - /* check for load peaks */ - dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame); - if (dt > scale3d.fast_response) { - unsigned long idleness = - (scale3d.idle_short_term_total * 100) / dt; - scale3d.fast_responses++; - scale3d.fast_frame = time; - /* if too busy, scale up */ - if (idleness < scale3d.idle_min) { - scale3d.is_scaled = 0; - scale3d.fast_up_count++; - if (scale3d.p_verbosity >= 5) - pr_info("scale3d: %ld%% busy\n", - 100 - idleness); - - reset_3d_clocks(); - reset_scaling_counters(time); - return; - } - scale3d.idle_short_term_total = 0; - scale3d.last_short_term_idle = time; + dt = (unsigned long) ktime_us_delta(time, scale3d.last_scale); + if (dt < scale3d.p_estimation_window) + return; + + scale3d.last_scale = time; + + /* if too busy, scale up */ + if (scale3d.idle_estimate < scale3d.idle_min) { + scale3d.is_scaled = 0; + scale3d.fast_up_count++; + if (scale3d.p_verbosity & GR3D_PRINT_BUSY) + pr_info("scale3d: %ld/1000 busy\n", + 1000 - scale3d.idle_estimate); + + reset_3d_clocks(); + return; } - dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame); - if (dt > scale3d.period) { - unsigned long idleness = (scale3d.idle_total * 100) / dt; + if (scale3d.p_verbosity & GR3D_PRINT_IDLE) + pr_info("scale3d: idle %lu/1000\n", + scale3d.idle_estimate); - if (scale3d.p_verbosity >= 5) - pr_info("scale3d: idle %lu, ~%lu%%\n", - scale3d.idle_total, idleness); + if (scale3d.idle_estimate > scale3d.idle_max) { + if (!scale3d.is_scaled) + scale3d.is_scaled = 1; - if (idleness > scale3d.idle_max) { - if (!scale3d.is_scaled) { - scale3d.is_scaled = 1; - scale3d.last_down = time; - } - scale3d.slow_down_count++; - /* if idle time is high, clock down */ - scale3d.scale = 100 - (idleness - scale3d.idle_min); - schedule_work(&scale3d.work); - } + scale3d.slow_down_count++; + /* if idle time is high, clock down */ + scale3d.scale = + 100 - (scale3d.idle_estimate - scale3d.idle_min) / 10; + schedule_work(&scale3d.work); + } +} + +/* the idle estimate is done by keeping 2 time stamps, initially set to the + * same time. Once the estimation_window time has been exceeded, one time + * stamp is moved up to the current time. The idle estimate is calculated + * based on the idle time percentage from the earlier estimate. The next time + * an estimation_window time is exceeded, the previous idle time and estimates + * are moved up - this is intended to prevent abrupt changes to the idle + * estimate. + */ +static void update_load_estimate(int idle) +{ + unsigned long window; + unsigned long t; + + ktime_t now = ktime_get(); + t = ktime_us_delta(now, scale3d.last_notification); - reset_scaling_counters(time); + /* if the last event was over GR3D_TIMEFRAME usec ago (1 sec), the + * current load tracking data is probably stale + */ + if (t > GR3D_TIMEFRAME) { + scale3d.is_idle = idle; + scale3d.last_notification = now; + scale3d.estimation_window = now; + scale3d.last_estimation_window = now; + scale3d.total_idle = 0; + scale3d.last_total_idle = 0; + scale3d.idle_estimate = idle ? 1000 : 0; + return; + } + + if (scale3d.is_idle) { + scale3d.total_idle += t; + scale3d.last_total_idle += t; + } + + scale3d.is_idle = idle; + scale3d.last_notification = now; + + window = ktime_us_delta(now, scale3d.last_estimation_window); + /* prevent division by 0 if events come in less than 1 usec apart */ + if (window > 0) + scale3d.idle_estimate = + (1000 * scale3d.last_total_idle) / window; + + /* move up to the last estimation window */ + if (ktime_us_delta(now, scale3d.estimation_window) > + scale3d.p_estimation_window) { + scale3d.last_estimation_window = scale3d.estimation_window; + scale3d.last_total_idle = scale3d.total_idle; + scale3d.total_idle = 0; + scale3d.estimation_window = now; } } @@ -378,65 +457,226 @@ void nvhost_scale3d_notify_idle(struct nvhost_device *dev) { ktime_t t; unsigned long dt; + int delay; if (!scale3d.enable) return; - mutex_lock(&scale3d.lock); + update_load_estimate(1); t = ktime_get(); - if (scale3d.is_idle) { - dt = ktime_us_delta(t, scale3d.last_idle); - scale3d.idle_total += dt; - dt = ktime_us_delta(t, scale3d.last_short_term_idle); - scale3d.idle_short_term_total += dt; - } else - scale3d.is_idle = 1; + /* if throughput hint enabled, and last hint is recent enough, return */ + if (scale3d.p_use_throughput_hint) { + dt = ktime_us_delta(t, scale3d.last_throughput_hint); + if (dt < GR3D_TIMEFRAME) + return; + } - scale3d.last_idle = t; - scale3d.last_short_term_idle = t; + mutex_lock(&scale3d.lock); - scaling_state_check(scale3d.last_idle); + scaling_state_check(t); - /* delay idle_max % of 2 * fast_response time (given in microseconds) */ - schedule_delayed_work(&scale3d.idle_timer, - msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response) - / 50000)); + /* delay idle_max % of 2 * estimation_window (given in microseconds) */ + delay = (scale3d.idle_max * scale3d.p_estimation_window) / 500000; + schedule_delayed_work(&scale3d.idle_timer, msecs_to_jiffies(delay)); mutex_unlock(&scale3d.lock); } void nvhost_scale3d_notify_busy(struct nvhost_device *dev) { - unsigned long idle; - unsigned long short_term_idle; ktime_t t; if (!scale3d.enable) return; - mutex_lock(&scale3d.lock); - - cancel_delayed_work(&scale3d.idle_timer); + update_load_estimate(0); t = ktime_get(); - if (scale3d.is_idle) { - idle = (unsigned long) - ktime_us_delta(t, scale3d.last_idle); - scale3d.idle_total += idle; - short_term_idle = - ktime_us_delta(t, scale3d.last_short_term_idle); - scale3d.idle_short_term_total += short_term_idle; - scale3d.is_idle = 0; + /* if throughput hint enabled, and last hint is recent enough, return */ + if (scale3d.p_use_throughput_hint) { + unsigned long dt; + dt = ktime_us_delta(t, scale3d.last_throughput_hint); + if (dt < GR3D_TIMEFRAME) + return; } + mutex_lock(&scale3d.lock); + + cancel_delayed_work(&scale3d.idle_timer); scaling_state_check(t); mutex_unlock(&scale3d.lock); } +struct score { + int size; /* number of elements */ + int pos; /* position in ring buffer */ + int count; /* actual item count */ + unsigned int sum; /* running sum */ + unsigned int prev; /* previous score after 'reset' operation */ + unsigned int list[]; /* ring buffer */ +}; + +static struct score *score_init(int capacity) +{ + struct score *s; + + s = kzalloc(sizeof(struct score) + capacity * sizeof(int), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->size = capacity; + + return s; +} + +static void score_delete(struct score *s) +{ + kfree(s); +} + +#define score_get_average(s) ((s)->count ? (s)->sum / (s)->count : 0) + +static void score_add(struct score *s, unsigned int reading) +{ + if (s->count < s->size) { + s->sum += reading; + s->count++; + } else + s->sum = s->sum - s->list[s->pos] + reading; + + s->list[s->pos] = reading; + s->pos = (s->pos + 1) % s->size; +} + + +static unsigned int score_reset(struct score *s) +{ + s->prev = s->sum; + + s->count = 0; + s->pos = 0; + s->sum = 0; + + return s->prev; +} + +int freqlist_up(long target, int steps) +{ + int i, pos; + + for (i = 0; i < scale3d.freq_count; i++) + if (scale3d.freqlist[i] >= target) + break; + + pos = min(scale3d.freq_count - 1, i + steps); + return scale3d.freqlist[pos]; +} + +int freqlist_down(long target, int steps) +{ + int i, pos; + + for (i = scale3d.freq_count - 1; i >= 0; i--) + if (scale3d.freqlist[i] <= target) + break; + + pos = max(0, i - steps); + return scale3d.freqlist[pos]; +} + +static struct score *busy_history; +static struct score *hint_history; + +/* When a throughput hint is given, perform scaling based on the hint and on + * the current idle estimation. This is done as follows: + * + * 1. On moderate loads force min frequency if the throughput hint is not too + * low. + * 2. Otherwise, calculate target-rate = max-rate * load-percentage + * 3. Unless the current or average throughput hint is below the minimum + * limit, in which case, choose a higher rate + * 4. Or the average throughput hint is above the maximum limit, in which case, + * choose a lower rate. + */ +void nvhost_scale3d_set_throughput_hint(int hint) +{ + ktime_t now; + long busy; + long curr; + long target; + long dt; + int avg_busy, avg_hint; + + if (!scale3d.enable) + return; + + if (!scale3d.p_use_throughput_hint) + return; + + if (scale3d.p_verbosity & GR3D_PRINT_HINT) + pr_info("3fds: idle %ld, hint %d\n", + scale3d.idle_estimate, hint); + + now = ktime_get(); + dt = ktime_us_delta(now, scale3d.last_throughput_hint); + if (dt > GR3D_TIMEFRAME) { + score_reset(busy_history); + score_reset(hint_history); + } + + scale3d.last_throughput_hint = now; + + busy = 1000 - scale3d.idle_estimate; + curr = clk_get_rate(scale3d.clk_3d); + target = scale3d.min_rate_3d; + + score_add(busy_history, busy); + score_add(hint_history, hint); + + avg_busy = score_get_average(busy_history); + avg_hint = score_get_average(hint_history); + + if (busy > 0) + target = (curr / 1000) * busy; + + /* In practice, running the gpu at min frequency is typically + * sufficient to keep up performance at loads up to 70% on cases, + * but the average hint value is tested to keep performance up if + * needed. + */ + if (avg_busy <= scale3d.p_busy_cutoff && + avg_hint >= scale3d.p_throughput_lower_limit) + target = scale3d.min_rate_3d; + else { + target = (scale3d.max_rate_3d / 1000) * avg_busy; + + /* Scale up if either the current hint or the running average + * are below the target to prevent performance drop. + */ + if (hint <= scale3d.p_throughput_lo_limit || + avg_hint <= scale3d.p_throughput_lo_limit) { + if (target < curr) + target = curr; + target = freqlist_up(target, scale3d.p_scale_step); + } else if (avg_hint >= scale3d.p_throughput_hi_limit) { + if (target > curr) + target = curr; + target = freqlist_down(target, scale3d.p_scale_step); + } + } + + scale_to_freq(target); + + if (scale3d.p_verbosity & GR3D_PRINT_TARGET) + pr_info("3dfs: busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n", + busy, avg_busy, curr / 1000000, target, hint, avg_hint); +} +EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint); + static void scale3d_idle_handler(struct work_struct *work) { int notify_idle = 0; @@ -458,19 +698,6 @@ static void scale3d_idle_handler(struct work_struct *work) nvhost_scale3d_notify_idle(NULL); } -void nvhost_scale3d_reset() -{ - ktime_t t; - - if (!scale3d.enable) - return; - - t = ktime_get(); - mutex_lock(&scale3d.lock); - reset_scaling_counters(t); - mutex_unlock(&scale3d.lock); -} - /* * debugfs parameters to control 3d clock scaling */ @@ -495,13 +722,17 @@ void nvhost_scale3d_debug_init(struct dentry *de) } \ } while (0) - CREATE_SCALE3D_FILE(fast_response); + CREATE_SCALE3D_FILE(estimation_window); CREATE_SCALE3D_FILE(idle_min); CREATE_SCALE3D_FILE(idle_max); - CREATE_SCALE3D_FILE(period); CREATE_SCALE3D_FILE(adjust); CREATE_SCALE3D_FILE(scale_emc); CREATE_SCALE3D_FILE(emc_dip); + CREATE_SCALE3D_FILE(use_throughput_hint); + CREATE_SCALE3D_FILE(throughput_hi_limit); + CREATE_SCALE3D_FILE(throughput_lo_limit); + CREATE_SCALE3D_FILE(throughput_lower_limit); + CREATE_SCALE3D_FILE(scale_step); CREATE_SCALE3D_FILE(verbosity); #undef CREATE_SCALE3D_FILE } @@ -532,12 +763,17 @@ static ssize_t enable_3d_scaling_store(struct device *dev, static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR, enable_3d_scaling_show, enable_3d_scaling_store); +#define MAX_FREQ_COUNT 0x40 /* 64 frequencies should be enough for anyone */ + void nvhost_scale3d_init(struct nvhost_device *d) { if (!scale3d.init) { int error; unsigned long max_emc, min_emc; long correction; + long rate; + int freqs[MAX_FREQ_COUNT]; + mutex_init(&scale3d.lock); INIT_WORK(&scale3d.work, scale3d_clocks_handler); @@ -632,30 +868,74 @@ void nvhost_scale3d_init(struct nvhost_device *d) POW2(scale3d.max_rate_3d - scale3d.emc_xmid); scale3d.emc_dip_offset -= correction; + scale3d.is_idle = 1; + /* set scaling parameter defaults */ scale3d.enable = 1; - scale3d.period = scale3d.p_period = 100000; - scale3d.idle_min = scale3d.p_idle_min = 10; - scale3d.idle_max = scale3d.p_idle_max = 15; - scale3d.fast_response = scale3d.p_fast_response = 7000; + scale3d.idle_min = scale3d.p_idle_min = 100; + scale3d.idle_max = scale3d.p_idle_max = 150; scale3d.p_scale_emc = 1; scale3d.p_emc_dip = 1; scale3d.p_verbosity = 0; scale3d.p_adjust = 1; + scale3d.p_use_throughput_hint = 1; + scale3d.p_throughput_lower_limit = 940; + scale3d.p_throughput_lo_limit = 990; + scale3d.p_throughput_hi_limit = 1010; + scale3d.p_scale_step = 1; + scale3d.p_estimation_window = 8000; + scale3d.p_busy_cutoff = 750; error = device_create_file(&d->dev, &dev_attr_enable_3d_scaling); if (error) dev_err(&d->dev, "failed to create sysfs attributes"); + rate = 0; + scale3d.freq_count = 0; + while (rate <= scale3d.max_rate_3d) { + long rounded_rate; + if (unlikely(scale3d.freq_count == MAX_FREQ_COUNT)) { + pr_err("%s: too many frequencies\n", __func__); + break; + } + rounded_rate = + clk_round_rate(scale3d.clk_3d, rate); + freqs[scale3d.freq_count++] = rounded_rate; + rate = rounded_rate + 2000; + } + scale3d.freqlist = + kmalloc(scale3d.freq_count * sizeof(int), GFP_KERNEL); + if (scale3d.freqlist == NULL) + pr_err("%s: can\'t allocate freq table\n", __func__); + + memcpy(scale3d.freqlist, freqs, + scale3d.freq_count * sizeof(int)); + + busy_history = score_init(GR3D_FRAME_SPAN); + if (busy_history == NULL) + pr_err("%s: can\'t init load tracking array\n", + __func__); + + hint_history = score_init(GR3D_FRAME_SPAN); + if (hint_history == NULL) + pr_err("%s: can\'t init throughput tracking array\n", + __func__); + scale3d.init = 1; } - - nvhost_scale3d_reset(); } void nvhost_scale3d_deinit(struct nvhost_device *dev) { device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling); scale3d.init = 0; + if (scale3d.freqlist != NULL) { + kfree(scale3d.freqlist); + scale3d.freq_count = 0; + scale3d.freqlist = NULL; + } + + score_delete(busy_history); + score_delete(hint_history); } diff --git a/drivers/video/tegra/host/host1x/host1x.c b/drivers/video/tegra/host/host1x/host1x.c index 33ebc1ff5d22..31899c78065b 100644 --- a/drivers/video/tegra/host/host1x/host1x.c +++ b/drivers/video/tegra/host/host1x/host1x.c @@ -308,6 +308,19 @@ static int power_off_host(struct nvhost_device *dev) return 0; } +static void clock_on_host(struct nvhost_device *dev) +{ + struct nvhost_master *host = nvhost_get_drvdata(dev); + nvhost_intr_start(&host->intr, clk_get_rate(dev->clk[0])); +} + +static int clock_off_host(struct nvhost_device *dev) +{ + struct nvhost_master *host = nvhost_get_drvdata(dev); + nvhost_intr_stop(&host->intr); + return 0; +} + static int __devinit nvhost_user_init(struct nvhost_master *host) { int err, devno; @@ -516,6 +529,8 @@ static struct nvhost_driver nvhost_driver = { }, .finalize_poweron = power_on_host, .prepare_poweroff = power_off_host, + .finalize_clockon = clock_on_host, + .prepare_clockoff = clock_off_host, }; static int __init nvhost_mod_init(void) diff --git a/drivers/video/tegra/host/host1x/host1x_cdma.c b/drivers/video/tegra/host/host1x/host1x_cdma.c index 2e7ff5783a37..5a29ff652efe 100644 --- a/drivers/video/tegra/host/host1x/host1x_cdma.c +++ b/drivers/video/tegra/host/host1x/host1x_cdma.c @@ -233,12 +233,15 @@ static void cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr, /* after CPU incr, ensure shadow is up to date */ nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id); - /* update WAITBASE_3D by same number of incrs */ - if (waitbases) { + /* Synchronize wait bases. 2D wait bases are synchronized with + * syncpoint 19. Hence wait bases are not updated when syncptid=18. */ + + if (cdma->timeout.syncpt_id != NVSYNCPT_2D_0 && waitbases) { void __iomem *p; p = dev->sync_aperture + host1x_sync_syncpt_base_0_r() + - (ffs(waitbases) * sizeof(u32)); + (__ffs(waitbases) * sizeof(u32)); writel(syncval, p); + dev->syncpt.base_val[__ffs(waitbases)] = syncval; } /* NOP all the PB slots */ @@ -486,7 +489,7 @@ static void cdma_timeout_handler(struct work_struct *work) /* stop HW, resetting channel/module */ cdma_op().timeout_teardown_begin(cdma); - nvhost_cdma_update_sync_queue(cdma, sp, dev->dev); + nvhost_cdma_update_sync_queue(cdma, sp, ch->dev); mutex_unlock(&cdma->lock); } diff --git a/drivers/video/tegra/host/host1x/host1x_channel.c b/drivers/video/tegra/host/host1x/host1x_channel.c index 9e9fc25dc966..0274413ff698 100644 --- a/drivers/video/tegra/host/host1x/host1x_channel.c +++ b/drivers/video/tegra/host/host1x/host1x_channel.c @@ -365,7 +365,7 @@ static int host1x_channel_read_3d_reg( if (hwctx_to_save) { syncpt_incrs += hwctx_to_save->save_incrs; hwctx_to_save->hwctx.valid = true; - channel->ctxhandler->get(&hwctx_to_save->hwctx); + nvhost_job_get_hwctx(job, &hwctx_to_save->hwctx); } channel->cur_ctx = hwctx; if (channel->cur_ctx && channel->cur_ctx->valid) { @@ -470,7 +470,8 @@ static int host1x_channel_read_3d_reg( wait_event(wq, nvhost_syncpt_is_expired(&nvhost_get_host(channel->dev)->syncpt, p->syncpt, syncval - 2)); - nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, ref); + nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, p->syncpt, + ref); /* Read the register value from FIFO */ err = host1x_drain_read_fifo(channel, value, 1, &pending); @@ -580,7 +581,6 @@ static int host1x_save_context(struct nvhost_channel *ch) } hwctx_to_save->valid = true; - ch->ctxhandler->get(hwctx_to_save); ch->cur_ctx = NULL; syncpt_id = to_host1x_hwctx_handler(hwctx_to_save->h)->syncpt; @@ -623,7 +623,7 @@ static int host1x_save_context(struct nvhost_channel *ch) nvhost_syncpt_is_expired(&nvhost_get_host(ch->dev)->syncpt, syncpt_id, syncpt_val)); - nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, ref); + nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, syncpt_id, ref); nvhost_cdma_update(&ch->cdma); diff --git a/drivers/video/tegra/host/host1x/host1x_intr.c b/drivers/video/tegra/host/host1x/host1x_intr.c index 62fd07cbb9ba..facb818a0c24 100644 --- a/drivers/video/tegra/host/host1x/host1x_intr.c +++ b/drivers/video/tegra/host/host1x/host1x_intr.c @@ -131,6 +131,16 @@ static void t20_intr_enable_syncpt_intr(struct nvhost_intr *intr, u32 id) BIT_WORD(id) * REGISTER_STRIDE); } +static void t20_intr_disable_syncpt_intr(struct nvhost_intr *intr, u32 id) +{ + struct nvhost_master *dev = intr_to_dev(intr); + void __iomem *sync_regs = dev->sync_aperture; + + writel(BIT_MASK(id), sync_regs + + host1x_sync_syncpt_thresh_int_disable_r() + + BIT_WORD(id) * REGISTER_STRIDE); +} + static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr) { struct nvhost_master *dev = intr_to_dev(intr); @@ -140,7 +150,7 @@ static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr) for (reg = 0; reg <= BIT_WORD(dev->info.nb_pts) * REGISTER_STRIDE; reg += REGISTER_STRIDE) { /* disable interrupts for both cpu's */ - writel(0, sync_regs + + writel(0xffffffffu, sync_regs + host1x_sync_syncpt_thresh_int_disable_r() + reg); @@ -276,6 +286,7 @@ static const struct nvhost_intr_ops host1x_intr_ops = { .set_host_clocks_per_usec = t20_intr_set_host_clocks_per_usec, .set_syncpt_threshold = t20_intr_set_syncpt_threshold, .enable_syncpt_intr = t20_intr_enable_syncpt_intr, + .disable_syncpt_intr = t20_intr_disable_syncpt_intr, .disable_all_syncpt_intrs = t20_intr_disable_all_syncpt_intrs, .request_host_general_irq = t20_intr_request_host_general_irq, .free_host_general_irq = t20_intr_free_host_general_irq, diff --git a/drivers/video/tegra/host/mpe/mpe.c b/drivers/video/tegra/host/mpe/mpe.c index c738700469c6..d76ee0108eef 100644 --- a/drivers/video/tegra/host/mpe/mpe.c +++ b/drivers/video/tegra/host/mpe/mpe.c @@ -212,7 +212,7 @@ struct save_info { unsigned int restore_count; }; -static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_begin(struct host1x_hwctx_handler *h, u32 *ptr) { /* MPE: when done, increment syncpt to base+1 */ ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, 0, 0); @@ -229,7 +229,7 @@ static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr) } #define SAVE_BEGIN_SIZE 5 -static void __init save_direct(u32 *ptr, u32 start_reg, u32 count) +static void save_direct(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, host1x_uclass_indoff_r(), 1); @@ -239,7 +239,7 @@ static void __init save_direct(u32 *ptr, u32 start_reg, u32 count) } #define SAVE_DIRECT_SIZE 3 -static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) +static void save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, cmd_reg, 1); @@ -247,7 +247,7 @@ static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) } #define SAVE_SET_RAM_CMD_SIZE 2 -static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg) +static void save_read_ram_data_nasty(u32 *ptr, u32 data_reg) { ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, host1x_uclass_indoff_r(), 1); @@ -261,7 +261,7 @@ static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg) } #define SAVE_READ_RAM_DATA_NASTY_SIZE 5 -static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_end(struct host1x_hwctx_handler *h, u32 *ptr) { /* Wait for context read service to finish (cpu incr 3) */ ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, @@ -275,7 +275,7 @@ static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr) } #define SAVE_END_SIZE 5 -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -304,7 +304,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init setup_save_ram_nasty(struct save_info *info, unsigned words, +static void setup_save_ram_nasty(struct save_info *info, unsigned words, unsigned cmd_reg, unsigned data_reg) { u32 *ptr = info->ptr; @@ -330,7 +330,7 @@ static void __init setup_save_ram_nasty(struct save_info *info, unsigned words, info->restore_count = restore_count; } -static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr) { struct save_info info = { ptr, @@ -553,7 +553,7 @@ struct nvhost_hwctx_handler *nvhost_mpe_ctxhandler_init(u32 syncpt, p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c index 06005c423a21..5bde55ad2ff5 100644 --- a/drivers/video/tegra/host/nvhost_acm.c +++ b/drivers/video/tegra/host/nvhost_acm.c @@ -101,8 +101,17 @@ void nvhost_module_reset(struct nvhost_device *dev) static void to_state_clockgated_locked(struct nvhost_device *dev) { + struct nvhost_driver *drv = to_nvhost_driver(dev->dev.driver); + if (dev->powerstate == NVHOST_POWER_STATE_RUNNING) { - int i; + int i, err; + if (drv->prepare_clockoff) { + err = drv->prepare_clockoff(dev); + if (err) { + dev_err(&dev->dev, "error clock gating"); + return; + } + } for (i = 0; i < dev->num_clks; i++) clk_disable(dev->clk[i]); if (dev->dev.parent) @@ -141,6 +150,14 @@ static void to_state_running_locked(struct nvhost_device *dev) } } + /* Invoke callback after enabling clock. This is used for + * re-enabling host1x interrupts. */ + if (prev_state == NVHOST_POWER_STATE_CLOCKGATED + && drv->finalize_clockon) + drv->finalize_clockon(dev); + + /* Invoke callback after power un-gating. This is used for + * restoring context. */ if (prev_state == NVHOST_POWER_STATE_POWERGATED && drv->finalize_poweron) drv->finalize_poweron(dev); @@ -343,15 +360,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv) { int i; struct nvhost_module_client *m; + int found = 0; mutex_lock(&client_list_lock); list_for_each_entry(m, &dev->client_list, node) { if (priv == m->priv) { list_del(&m->node); + found = 1; break; } } - if (m) { + if (found) { kfree(m); for (i = 0; i < dev->num_clks; i++) nvhost_module_update_rate(dev, i); diff --git a/drivers/video/tegra/host/nvhost_intr.c b/drivers/video/tegra/host/nvhost_intr.c index 38a04f151e87..9788d32bd4a9 100644 --- a/drivers/video/tegra/host/nvhost_intr.c +++ b/drivers/video/tegra/host/nvhost_intr.c @@ -210,7 +210,9 @@ static int process_wait_list(struct nvhost_intr *intr, remove_completed_waiters(&syncpt->wait_head, threshold, completed); empty = list_empty(&syncpt->wait_head); - if (!empty) + if (empty) + intr_op().disable_syncpt_intr(intr, syncpt->id); + else reset_threshold_interrupt(intr, &syncpt->wait_head, syncpt->id); @@ -327,14 +329,20 @@ void *nvhost_intr_alloc_waiter() GFP_KERNEL|__GFP_REPEAT); } -void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref) +void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref) { struct nvhost_waitlist *waiter = ref; + struct nvhost_intr_syncpt *syncpt; + struct nvhost_master *host = intr_to_dev(intr); while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == WLS_REMOVED) schedule(); + syncpt = intr->syncpt + id; + (void)process_wait_list(intr, syncpt, + nvhost_syncpt_update_min(&host->syncpt, id)); + kref_put(&waiter->refcount, waiter_release); } diff --git a/drivers/video/tegra/host/nvhost_intr.h b/drivers/video/tegra/host/nvhost_intr.h index cf0b6b9e8934..d4a6157eced1 100644 --- a/drivers/video/tegra/host/nvhost_intr.h +++ b/drivers/video/tegra/host/nvhost_intr.h @@ -104,7 +104,7 @@ void *nvhost_intr_alloc_waiter(void); * You must call this if you passed non-NULL as ref. * @ref the ref returned from nvhost_intr_add_action() */ -void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref); +void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref); int nvhost_intr_init(struct nvhost_intr *intr, u32 irq_gen, u32 irq_sync); void nvhost_intr_deinit(struct nvhost_intr *intr); diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c index f93d7df1a552..f0f7e64d4504 100644 --- a/drivers/video/tegra/host/nvhost_job.c +++ b/drivers/video/tegra/host/nvhost_job.c @@ -34,19 +34,27 @@ /* Magic to use to fill freed handle slots */ #define BAD_MAGIC 0xdeadbeef -static int job_size(struct nvhost_submit_hdr_ext *hdr) +static size_t job_size(struct nvhost_submit_hdr_ext *hdr) { - int num_relocs = hdr ? hdr->num_relocs : 0; - int num_waitchks = hdr ? hdr->num_waitchks : 0; - int num_cmdbufs = hdr ? hdr->num_cmdbufs : 0; - int num_unpins = num_cmdbufs + num_relocs; + s64 num_relocs = hdr ? (int)hdr->num_relocs : 0; + s64 num_waitchks = hdr ? (int)hdr->num_waitchks : 0; + s64 num_cmdbufs = hdr ? (int)hdr->num_cmdbufs : 0; + s64 num_unpins = num_cmdbufs + num_relocs; + s64 total; - return sizeof(struct nvhost_job) + if(num_relocs < 0 || num_waitchks < 0 || num_cmdbufs < 0) + return 0; + + total = sizeof(struct nvhost_job) + num_relocs * sizeof(struct nvhost_reloc) + num_relocs * sizeof(struct nvhost_reloc_shift) + num_unpins * sizeof(struct mem_handle *) + num_waitchks * sizeof(struct nvhost_waitchk) + num_cmdbufs * sizeof(struct nvhost_job_gather); + + if(total > ULONG_MAX) + return 0; + return (size_t)total; } static void init_fields(struct nvhost_job *job, @@ -63,7 +71,11 @@ static void init_fields(struct nvhost_job *job, job->priority = priority; job->clientid = clientid; - /* Redistribute memory to the structs */ + /* + * Redistribute memory to the structs. + * Overflows and negative conditions have + * already been checked in job_alloc(). + */ mem += sizeof(struct nvhost_job); job->relocarray = num_relocs ? mem : NULL; mem += num_relocs * sizeof(struct nvhost_reloc); @@ -91,8 +103,11 @@ struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch, int clientid) { struct nvhost_job *job = NULL; + size_t size = job_size(hdr); - job = vzalloc(job_size(hdr)); + if(!size) + goto error; + job = vzalloc(size); if (!job) goto error; diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c index 9fa7d0652c1f..38c28ca116e7 100644 --- a/drivers/video/tegra/host/nvhost_syncpt.c +++ b/drivers/video/tegra/host/nvhost_syncpt.c @@ -235,7 +235,7 @@ int nvhost_syncpt_wait_timeout(struct nvhost_syncpt *sp, u32 id, check_count++; } } - nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), ref); + nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), id, ref); done: nvhost_module_idle(syncpt_to_dev(sp)->dev); @@ -344,7 +344,7 @@ static ssize_t syncpt_min_show(struct kobject *kobj, struct nvhost_syncpt_attr *syncpt_attr = container_of(attr, struct nvhost_syncpt_attr, attr); - return snprintf(buf, PAGE_SIZE, "%d", + return snprintf(buf, PAGE_SIZE, "%u", nvhost_syncpt_read(&syncpt_attr->host->syncpt, syncpt_attr->id)); } @@ -355,7 +355,7 @@ static ssize_t syncpt_max_show(struct kobject *kobj, struct nvhost_syncpt_attr *syncpt_attr = container_of(attr, struct nvhost_syncpt_attr, attr); - return snprintf(buf, PAGE_SIZE, "%d", + return snprintf(buf, PAGE_SIZE, "%u", nvhost_syncpt_read_max(&syncpt_attr->host->syncpt, syncpt_attr->id)); } diff --git a/drivers/video/tegra/host/t30/t30.c b/drivers/video/tegra/host/t30/t30.c index 0c8d626a4d67..334d598d5c0b 100644 --- a/drivers/video/tegra/host/t30/t30.c +++ b/drivers/video/tegra/host/t30/t30.c @@ -142,7 +142,7 @@ static struct nvhost_device tegra_gr2d02_device = { .waitbases = BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1), .modulemutexes = BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) | BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B), - .clocks = { {"gr2d", UINT_MAX}, + .clocks = { {"gr2d", 0}, {"epp", 0}, {"emc", 300000000} }, NVHOST_MODULE_NO_POWERGATE_IDS, diff --git a/drivers/video/tegra/nvmap/nvmap.c b/drivers/video/tegra/nvmap/nvmap.c index b7fd695d04ee..a0dcf26337f8 100644 --- a/drivers/video/tegra/nvmap/nvmap.c +++ b/drivers/video/tegra/nvmap/nvmap.c @@ -3,7 +3,7 @@ * * Memory manager for Tegra GPU * - * Copyright (c) 2009-2011, NVIDIA Corporation. + * Copyright (c) 2009-2012, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -271,7 +271,7 @@ int nvmap_pin_ids(struct nvmap_client *client, * if the caller crashes after pinning a global handle, the handle * will be permanently leaked. */ nvmap_ref_lock(client); - for (i = 0; i < nr && !ret; i++) { + for (i = 0; i < nr; i++) { ref = _nvmap_validate_id_locked(client, ids[i]); if (ref) { atomic_inc(&ref->pin); @@ -280,19 +280,19 @@ int nvmap_pin_ids(struct nvmap_client *client, struct nvmap_handle *verify; nvmap_ref_unlock(client); verify = nvmap_validate_get(client, ids[i]); - if (verify) + if (verify) { nvmap_warn(client, "%s pinning unreferenced " "handle %p\n", current->group_leader->comm, h[i]); - else + } else { + h[i] = NULL; ret = -EPERM; + } nvmap_ref_lock(client); } } nvmap_ref_unlock(client); - nr = i; - if (ret) goto out; @@ -317,6 +317,9 @@ out: if (ret) { nvmap_ref_lock(client); for (i = 0; i < nr; i++) { + if(!ids[i]) + continue; + ref = _nvmap_validate_id_locked(client, ids[i]); if (!ref) { nvmap_warn(client, "%s freed handle %p " @@ -330,7 +333,8 @@ out: nvmap_ref_unlock(client); for (i = 0; i < nr; i++) - nvmap_handle_put(h[i]); + if(h[i]) + nvmap_handle_put(h[i]); } return ret; diff --git a/drivers/video/tegra/nvmap/nvmap_common.h b/drivers/video/tegra/nvmap/nvmap_common.h index 6da010720bb2..2a1e2b4b3c4b 100644 --- a/drivers/video/tegra/nvmap/nvmap_common.h +++ b/drivers/video/tegra/nvmap/nvmap_common.h @@ -23,7 +23,8 @@ extern void v7_flush_kern_cache_all(void *); extern void v7_clean_kern_cache_all(void *); -#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD (8 * PAGE_SIZE) +#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER (8 * PAGE_SIZE) +#define FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_OUTER (1024 * 1024) static inline void inner_flush_cache_all(void) { diff --git a/drivers/video/tegra/nvmap/nvmap_dev.c b/drivers/video/tegra/nvmap/nvmap_dev.c index 98b0bcc18ba5..0c12348db88e 100644 --- a/drivers/video/tegra/nvmap/nvmap_dev.c +++ b/drivers/video/tegra/nvmap/nvmap_dev.c @@ -290,7 +290,7 @@ int nvmap_flush_heap_block(struct nvmap_client *client, if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE) goto out; - if (len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD) { + if (len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER) { inner_flush_cache_all(); if (prot != NVMAP_HANDLE_INNER_CACHEABLE) outer_flush_range(block->base, block->base + len); @@ -886,10 +886,11 @@ static void nvmap_vma_open(struct vm_area_struct *vma) struct nvmap_vma_priv *priv; priv = vma->vm_private_data; - BUG_ON(!priv); atomic_inc(&priv->count); + if(priv->handle) + nvmap_usecount_inc(priv->handle); } static void nvmap_vma_close(struct vm_area_struct *vma) @@ -898,8 +899,8 @@ static void nvmap_vma_close(struct vm_area_struct *vma) if (priv) { if (priv->handle) { + BUG_ON(priv->handle->usecount == 0); nvmap_usecount_dec(priv->handle); - BUG_ON(priv->handle->usecount < 0); } if (!atomic_dec_return(&priv->count)) { if (priv->handle) diff --git a/drivers/video/tegra/nvmap/nvmap_handle.c b/drivers/video/tegra/nvmap/nvmap_handle.c index 05046ed8ba79..4b7760b22190 100644 --- a/drivers/video/tegra/nvmap/nvmap_handle.c +++ b/drivers/video/tegra/nvmap/nvmap_handle.c @@ -3,7 +3,7 @@ * * Handle allocation and freeing routines for nvmap * - * Copyright (c) 2009-2012, NVIDIA Corporation. + * Copyright (c) 2009-2012, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +36,7 @@ #include <asm/cacheflush.h> #include <asm/outercache.h> +#include <asm/tlbflush.h> #include <asm/pgtable.h> #include <mach/iovmm.h> @@ -56,7 +57,7 @@ * the kernel (i.e., not a carveout handle) includes its array of pages. to * preserve kmalloc space, if the array of pages exceeds PAGELIST_VMALLOC_MIN, * the array is allocated using vmalloc. */ -#define PAGELIST_VMALLOC_MIN (PAGE_SIZE * 2) +#define PAGELIST_VMALLOC_MIN (PAGE_SIZE) #ifdef CONFIG_NVMAP_PAGE_POOLS @@ -85,8 +86,11 @@ static struct page *nvmap_page_pool_alloc_locked(struct nvmap_page_pool *pool) { struct page *page = NULL; - if (pool->npages > 0) + if (pool->npages > 0) { page = pool->page_array[--pool->npages]; + atomic_dec(&page->_count); + BUG_ON(atomic_read(&page->_count) != 1); + } return page; } @@ -107,7 +111,9 @@ static bool nvmap_page_pool_release_locked(struct nvmap_page_pool *pool, { int ret = false; + BUG_ON(atomic_read(&page->_count) != 1); if (enable_pp && pool->npages < pool->max_pages) { + atomic_inc(&page->_count); pool->page_array[pool->npages++] = page; ret = true; } @@ -134,6 +140,7 @@ static int nvmap_page_pool_get_available_count(struct nvmap_page_pool *pool) static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) { + int err; int i = nr_free; int idx = 0; struct page *page; @@ -149,8 +156,12 @@ static int nvmap_page_pool_free(struct nvmap_page_pool *pool, int nr_free) i--; } - if (idx) - set_pages_array_wb(pool->shrink_array, idx); + if (idx) { + /* This op should never fail. */ + err = set_pages_array_wb(pool->shrink_array, idx); + BUG_ON(err); + } + while (idx--) __free_page(pool->shrink_array[idx]); nvmap_page_pool_unlock(pool); @@ -367,8 +378,9 @@ POOL_SIZE_MOUDLE_PARAM_CB(wb, NVMAP_HANDLE_CACHEABLE); int nvmap_page_pool_init(struct nvmap_page_pool *pool, int flags) { - struct page *page; int i; + int err; + struct page *page; static int reg = 1; struct sysinfo info; int highmem_pages = 0; @@ -431,7 +443,8 @@ int nvmap_page_pool_init(struct nvmap_page_pool *pool, int flags) s_memtype_str[flags], highmem_pages, pool->max_pages, info.totalram, info.freeram, info.totalhigh, info.freehigh); do_cpa: - (*s_cpa[flags])(pool->page_array, pool->npages); + err = (*s_cpa[flags])(pool->page_array, pool->npages); + BUG_ON(err); nvmap_page_pool_unlock(pool); return 0; fail: @@ -444,7 +457,7 @@ fail: static inline void *altalloc(size_t len) { - if (len >= PAGELIST_VMALLOC_MIN) + if (len > PAGELIST_VMALLOC_MIN) return vmalloc(len); else return kmalloc(len, GFP_KERNEL); @@ -455,7 +468,7 @@ static inline void altfree(void *ptr, size_t len) if (!ptr) return; - if (len >= PAGELIST_VMALLOC_MIN) + if (len > PAGELIST_VMALLOC_MIN) vfree(ptr); else kfree(ptr); @@ -463,6 +476,7 @@ static inline void altfree(void *ptr, size_t len) void _nvmap_handle_free(struct nvmap_handle *h) { + int err; struct nvmap_share *share = nvmap_get_share_from_dev(h->dev); unsigned int i, nr_page, page_index = 0; #ifdef CONFIG_NVMAP_PAGE_POOLS @@ -506,9 +520,12 @@ void _nvmap_handle_free(struct nvmap_handle *h) /* Restore page attributes. */ if (h->flags == NVMAP_HANDLE_WRITE_COMBINE || h->flags == NVMAP_HANDLE_UNCACHEABLE || - h->flags == NVMAP_HANDLE_INNER_CACHEABLE) - set_pages_array_wb(&h->pgalloc.pages[page_index], + h->flags == NVMAP_HANDLE_INNER_CACHEABLE) { + /* This op should never fail. */ + err = set_pages_array_wb(&h->pgalloc.pages[page_index], nr_page - page_index); + BUG_ON(err); + } skip_attr_restore: if (h->pgalloc.area) @@ -546,6 +563,7 @@ static struct page *nvmap_alloc_pages_exact(gfp_t gfp, size_t size) static int handle_page_alloc(struct nvmap_client *client, struct nvmap_handle *h, bool contiguous) { + int err = 0; size_t size = PAGE_ALIGN(h->size); unsigned int nr_page = size >> PAGE_SHIFT; pgprot_t prot; @@ -555,6 +573,17 @@ static int handle_page_alloc(struct nvmap_client *client, struct nvmap_page_pool *pool = NULL; struct nvmap_share *share = nvmap_get_share_from_dev(h->dev); #endif + gfp_t gfp = GFP_NVMAP; + unsigned long kaddr, paddr; + pte_t **pte = NULL; + + if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) { + gfp |= __GFP_ZERO; + prot = nvmap_pgprot(h, pgprot_kernel); + pte = nvmap_alloc_pte(client->dev, (void **)&kaddr); + if (IS_ERR(pte)) + return -ENOMEM; + } pages = altalloc(nr_page * sizeof(*pages)); if (!pages) @@ -565,7 +594,7 @@ static int handle_page_alloc(struct nvmap_client *client, h->pgalloc.area = NULL; if (contiguous) { struct page *page; - page = nvmap_alloc_pages_exact(GFP_NVMAP, size); + page = nvmap_alloc_pages_exact(gfp, size); if (!page) goto fail; @@ -582,12 +611,29 @@ static int handle_page_alloc(struct nvmap_client *client, pages[i] = nvmap_page_pool_alloc(pool); if (!pages[i]) break; + if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) { + /* + * Just memset low mem pages; they will for + * sure have a virtual address. Otherwise, build + * a mapping for the page in the kernel. + */ + if (!PageHighMem(pages[i])) { + memset(page_address(pages[i]), 0, + PAGE_SIZE); + } else { + paddr = page_to_phys(pages[i]); + set_pte_at(&init_mm, kaddr, *pte, + pfn_pte(__phys_to_pfn(paddr), + prot)); + flush_tlb_kernel_page(kaddr); + memset((char *)kaddr, 0, PAGE_SIZE); + } + } page_index++; } #endif for (; i < nr_page; i++) { - pages[i] = nvmap_alloc_pages_exact(GFP_NVMAP, - PAGE_SIZE); + pages[i] = nvmap_alloc_pages_exact(gfp, PAGE_SIZE); if (!pages[i]) goto fail; } @@ -608,16 +654,21 @@ static int handle_page_alloc(struct nvmap_client *client, /* Update the pages mapping in kernel page table. */ if (h->flags == NVMAP_HANDLE_WRITE_COMBINE) - set_pages_array_wc(&pages[page_index], - nr_page - page_index); + err = set_pages_array_wc(&pages[page_index], + nr_page - page_index); else if (h->flags == NVMAP_HANDLE_UNCACHEABLE) - set_pages_array_uc(&pages[page_index], - nr_page - page_index); + err = set_pages_array_uc(&pages[page_index], + nr_page - page_index); else if (h->flags == NVMAP_HANDLE_INNER_CACHEABLE) - set_pages_array_iwb(&pages[page_index], - nr_page - page_index); + err = set_pages_array_iwb(&pages[page_index], + nr_page - page_index); + + if (err) + goto fail; skip_attr_change: + if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) + nvmap_free_pte(client->dev, pte); h->size = size; h->pgalloc.pages = pages; h->pgalloc.contig = contiguous; @@ -625,10 +676,12 @@ skip_attr_change: return 0; fail: - while (i--) { - set_pages_array_wb(&pages[i], 1); + if (h->userflags & NVMAP_HANDLE_ZEROED_PAGES) + nvmap_free_pte(client->dev, pte); + err = set_pages_array_wb(pages, i); + BUG_ON(err); + while (i--) __free_page(pages[i]); - } altfree(pages, nr_page * sizeof(*pages)); wmb(); return -ENOMEM; diff --git a/drivers/video/tegra/nvmap/nvmap_heap.c b/drivers/video/tegra/nvmap/nvmap_heap.c index a6fe78c42f87..738ba26232d3 100644 --- a/drivers/video/tegra/nvmap/nvmap_heap.c +++ b/drivers/video/tegra/nvmap/nvmap_heap.c @@ -3,7 +3,7 @@ * * GPU heap allocator. * - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2012, NVIDIA Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -420,6 +420,9 @@ static struct nvmap_heap_block *do_heap_alloc(struct nvmap_heap *heap, list_for_each_entry(i, &heap->free_list, free_list) { size_t fix_size; fix_base = ALIGN(i->block.base, align); + if(!fix_base || fix_base >= i->block.base + i->size) + continue; + fix_size = i->size - (fix_base - i->block.base); /* needed for compaction. relocated chunk diff --git a/drivers/video/tegra/nvmap/nvmap_ioctl.c b/drivers/video/tegra/nvmap/nvmap_ioctl.c index 44f00d2951a0..da974b2c6846 100644 --- a/drivers/video/tegra/nvmap/nvmap_ioctl.c +++ b/drivers/video/tegra/nvmap/nvmap_ioctl.c @@ -3,7 +3,7 @@ * * User-space interface to nvmap * - * Copyright (c) 2011, NVIDIA Corporation. + * Copyright (c) 2011-2012, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,10 +65,10 @@ int nvmap_ioctl_pinop(struct file *filp, bool is_pin, void __user *arg) return -EINVAL; if (op.count > 1) { - size_t bytes = op.count * sizeof(unsigned long *); + size_t bytes = op.count * sizeof(*refs); /* kcalloc below will catch overflow. */ if (op.count > ARRAY_SIZE(on_stack)) - refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL); + refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL); else refs = on_stack; @@ -175,6 +175,9 @@ int nvmap_ioctl_alloc(struct file *filp, void __user *arg) /* user-space handles are aligned to page boundaries, to prevent * data leakage. */ op.align = max_t(size_t, op.align, PAGE_SIZE); +#if defined(CONFIG_NVMAP_FORCE_ZEROED_USER_PAGES) + op.flags |= NVMAP_HANDLE_ZEROED_PAGES; +#endif return nvmap_alloc_handle_id(client, op.handle, op.heap_mask, op.align, op.flags); @@ -236,6 +239,11 @@ int nvmap_map_into_caller_ptr(struct file *filp, void __user *arg) if (!h) return -EPERM; + if(!h->alloc) { + nvmap_handle_put(h); + return -EFAULT; + } + trace_nvmap_map_into_caller_ptr(client, h, op.offset, op.length, op.flags); down_read(¤t->mm->mmap_sem); @@ -251,7 +259,7 @@ int nvmap_map_into_caller_ptr(struct file *filp, void __user *arg) goto out; } - if ((op.offset + op.length) > h->size) { + if (op.offset > h->size || (op.offset + op.length) > h->size) { err = -EADDRNOTAVAIL; goto out; } @@ -542,14 +550,32 @@ static void heap_page_cache_maint(struct nvmap_client *client, } } +static bool fast_cache_maint_outer(unsigned long start, + unsigned long end, unsigned int op) +{ + bool result = false; +#if defined(CONFIG_NVMAP_OUTER_CACHE_MAINT_BY_SET_WAYS) + if (end - start >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_OUTER) { + if (op == NVMAP_CACHE_OP_WB_INV) { + outer_flush_all(); + result = true; + } + if (op == NVMAP_CACHE_OP_WB) { + outer_clean_all(); + result = true; + } + } +#endif + return result; +} + static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h, unsigned long start, unsigned long end, unsigned int op) { int ret = false; - #if defined(CONFIG_NVMAP_CACHE_MAINT_BY_SET_WAYS) if ((op == NVMAP_CACHE_OP_INV) || - ((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD)) + ((end - start) < FLUSH_CLEAN_BY_SET_WAY_THRESHOLD_INNER)) goto out; if (op == NVMAP_CACHE_OP_WB_INV) @@ -557,13 +583,19 @@ static bool fast_cache_maint(struct nvmap_client *client, struct nvmap_handle *h else if (op == NVMAP_CACHE_OP_WB) inner_clean_cache_all(); - if (h->heap_pgalloc && (h->flags != NVMAP_HANDLE_INNER_CACHEABLE)) { - heap_page_cache_maint(client, h, start, end, op, - false, true, NULL, 0, 0); - } else if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE) { - start += h->carveout->base; - end += h->carveout->base; - outer_cache_maint(op, start, end - start); + /* outer maintenance */ + if (h->flags != NVMAP_HANDLE_INNER_CACHEABLE ) { + if(!fast_cache_maint_outer(start, end, op)) + { + if (h->heap_pgalloc) { + heap_page_cache_maint(client, h, start, + end, op, false, true, NULL, 0, 0); + } else { + start += h->carveout->base; + end += h->carveout->base; + outer_cache_maint(op, start, end - start); + } + } } ret = true; out: |