video: tegra: nvhost: add submit timeout support

In this change, nvhost_cdma starts a timer (if a timeout is specified in the userctx), for the buffer at the head of the sync_queue that has not reached its syncpt threshold. If the timeout fires, nvhost_cdma initiates a channel / module reset. It then detects up to where in the sync_queue it stopped execution (based on the current HW syncpt value). For any remaining uncompleted buffers in the context, nvhost_cdma NOPs the entry and CPU incrs the syncpt to where it should be had it completed. If one of the sync_queue entries belongs to another context, it still does the syncpt incrs for this context, but via the PB as a GATHER opcode, At the end, CDMA is restarted, so buffers are refetched (either with NOP slots, or GATHERs to incr syncpts). This appears as though the buffer has completed (and the associated resources released). For testing, debugfs entries have been added under /d/tegra_nvhost force_timeout_val - set the timeout value, in ms force_timeout_channel - channel ID, were timeouts checks occur force_timeout_pid - process ID to set the userctx The idea is to set the timeout_val, then the timeout_channel (e.g. for 3D, the channel ID is 1) and then the process ID, gotten from running adb shell ps. Bug 625545 Change-Id: I659e9255f1105f3439ce23e9169a19739b83ea52 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/42655 Reviewed-by: Scott Williams <scwilliams@nvidia.com> Reviewed-by: Varun Colbert <vcolbert@nvidia.com> Tested-by: Varun Colbert <vcolbert@nvidia.com>
author: Chris Johnson <cwj@nvidia.com> 2011-08-12 09:04:09 +0300
committer: Varun Colbert <vcolbert@nvidia.com> 2011-08-18 11:37:49 -0700
commit: 04bb6c2b2ee1bc07bf6b0248bb64f41509415fb7 (patch)
tree: a64aaa90098af1bd9c1a73998f4a768ebbb839f6 /drivers/video/tegra/host/t20
parent: 75794f7f04952642b6747bf3a99fea35e2a8f8a7 (diff)
4 files changed, 516 insertions, 32 deletions
diff --git a/drivers/video/tegra/host/t20/3dctx_t20.c b/drivers/video/tegra/host/t20/3dctx_t20.c
index dadfbed3434a..7ad7166b2d3a 100644
--- a/drivers/video/tegra/host/t20/3dctx_t20.c
+++ b/drivers/video/tegra/host/t20/3dctx_t20.c
@@ -216,11 +216,12 @@ static void setup_restore_v0(u32 *ptr)
 /*** save ***/
 
 /* the same context save command sequence is used for all contexts. */
-static struct nvmap_handle_ref *save_buf = NULL;
-static phys_addr_t save_phys = 0;
-static unsigned int save_size = 0;
-static unsigned int save_incrs = 0;
-static unsigned int save_thresh = 0;
+static struct nvmap_handle_ref *save_buf;
+static phys_addr_t save_phys;
+static unsigned int save_size;
+static unsigned int save_incrs;
+static unsigned int save_thresh;
+static unsigned int save_slots;
 
 static void __init setup_save_regs(const struct ctx_saver *saver,
 			struct save_info *info,
@@ -648,6 +649,7 @@ static struct nvhost_hwctx *ctx3d_alloc_common(struct nvhost_channel *ch,
 	ctx->save = save_buf;
 	ctx->save_incrs = save_incrs;
 	ctx->save_thresh = save_thresh;
+	ctx->save_slots = save_slots;
 	ctx->restore_phys = nvmap_pin(nvmap, ctx->restore);
 	ctx->restore_size = restore_size;
 	ctx->restore_incrs = restore_incrs;
@@ -769,6 +771,15 @@ int __init t20_nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h)
 		return err;
 	}
 
+	save_slots = 1;		/* save_push_v0() */
+	if (s_is_v1) {
+		save_slots = 6;	/* save_push_v1() */
+		if (register_sets == 2)
+			save_slots += 2;
+		if (s_war_insert_syncpoints)
+			save_slots += 1;
+	}
+
 	save_ptr = nvmap_mmap(save_buf);
 	if (!save_ptr) {
 		nvmap_free(nvmap, save_buf);
diff --git a/drivers/video/tegra/host/t20/cdma_t20.c b/drivers/video/tegra/host/t20/cdma_t20.c
index eaba1c78af92..69c3039357a8 100644
--- a/drivers/video/tegra/host/t20/cdma_t20.c
+++ b/drivers/video/tegra/host/t20/cdma_t20.c
@@ -25,6 +25,9 @@
 #include "../dev.h"
 
 #include "hardware_t20.h"
+#include "syncpt_t20.h"
+
+static void t20_cdma_timeout_handler(struct work_struct *work);
 
 /*
  * push_buffer
@@ -155,6 +158,266 @@ static u32 t20_push_buffer_putptr(struct push_buffer *pb)
 	return pb->phys + pb->cur;
 }
 
+/*
+ * The syncpt incr buffer is filled with methods to increment syncpts, which
+ * is later GATHER-ed into the mainline PB. It's used when a timed out context
+ * is interleaved with other work, so needs to inline the syncpt increments
+ * to maintain the count (but otherwise does no work).
+ */
+
+/**
+ * Init timeout and syncpt incr buffer resources
+ */
+static int t20_cdma_timeout_init(struct nvhost_cdma *cdma,
+				 u32 syncpt_id)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvmap_client *nvmap = cdma_to_nvmap(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 i = 0;
+
+	if (syncpt_id == NVSYNCPT_INVALID)
+		return -EINVAL;
+
+	/* allocate and map syncpt incr memory */
+	sb->mem = nvmap_alloc(nvmap,
+			(SYNCPT_INCR_BUFFER_SIZE_WORDS * sizeof(u32)), 32,
+			NVMAP_HANDLE_WRITE_COMBINE);
+	if (IS_ERR_OR_NULL(sb->mem)) {
+		sb->mem = NULL;
+		goto fail;
+	}
+	sb->mapped = nvmap_mmap(sb->mem);
+	if (sb->mapped == NULL)
+		goto fail;
+
+	/* pin syncpt buffer and get physical address */
+	sb->phys = nvmap_pin(nvmap, sb->mem);
+	if (sb->phys >= 0xfffff000) {
+		sb->phys = 0;
+		goto fail;
+	}
+
+	dev_dbg(&dev->pdev->dev, "%s: SYNCPT_INCR buffer at 0x%x\n",
+		 __func__, sb->phys);
+
+	sb->words_per_incr = (syncpt_id == NVSYNCPT_3D) ? 5 : 3;
+	sb->incr_per_buffer = (SYNCPT_INCR_BUFFER_SIZE_WORDS /
+				sb->words_per_incr);
+
+	/* init buffer with SETCL and INCR_SYNCPT methods */
+	while (i < sb->incr_per_buffer) {
+		sb->mapped[i++] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+						0, 0);
+		sb->mapped[i++] = nvhost_opcode_imm_incr_syncpt(
+						NV_CLASS_HOST_SYNCPT_IMMEDIATE,
+						syncpt_id);
+		if (syncpt_id == NVSYNCPT_3D) {
+			/* also contains base increments */
+			sb->mapped[i++] = nvhost_opcode_nonincr(
+						NV_CLASS_HOST_INCR_SYNCPT_BASE,
+						1);
+			sb->mapped[i++] = nvhost_class_host_incr_syncpt_base(
+						NVWAITBASE_3D, 1);
+		}
+		sb->mapped[i++] = nvhost_opcode_setclass(ch->desc->class,
+						0, 0);
+	}
+	wmb();
+
+	INIT_DELAYED_WORK(&cdma->timeout.wq, t20_cdma_timeout_handler);
+	cdma->timeout.initialized = true;
+
+	return 0;
+fail:
+	cdma_op(cdma).timeout_destroy(cdma);
+	return -ENOMEM;
+}
+
+/**
+ * Clean up timeout syncpt buffer resources
+ */
+static void t20_cdma_timeout_destroy(struct nvhost_cdma *cdma)
+{
+	struct nvmap_client *nvmap = cdma_to_nvmap(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+
+	if (sb->mapped)
+		nvmap_munmap(sb->mem, sb->mapped);
+
+	if (sb->phys != 0)
+		nvmap_unpin(nvmap, sb->mem);
+
+	if (sb->mem)
+		nvmap_free(nvmap, sb->mem);
+
+	sb->mem = NULL;
+	sb->mapped = NULL;
+	sb->phys = 0;
+
+	if (cdma->timeout.initialized)
+		cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.initialized = false;
+}
+
+/**
+ * Increment timedout buffer's syncpt via CPU.
+ */
+static void t20_cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 nr_slots)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	u32 i, getidx;
+
+	for (i = 0; i < syncpt_incrs; i++)
+		nvhost_syncpt_cpu_incr(&dev->syncpt, cdma->timeout.syncpt_id);
+
+	/* after CPU incr, ensure shadow is up to date */
+	nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id);
+
+	/* update WAITBASE_3D by same number of incrs */
+	if (cdma->timeout.syncpt_id == NVSYNCPT_3D) {
+		void __iomem *p;
+		p = dev->sync_aperture + HOST1X_SYNC_SYNCPT_BASE_0 +
+				(NVWAITBASE_3D * sizeof(u32));
+		writel(readl(p) + syncpt_incrs, p);
+	}
+
+	/* NOP all the PB slots */
+	getidx = getptr - pb->phys;
+	while (nr_slots--) {
+		u32 *p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = NVHOST_OPCODE_NOOP;
+		*(p++) = NVHOST_OPCODE_NOOP;
+		dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+			__func__, pb->phys + getidx);
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+	}
+	wmb();
+}
+
+/**
+ * This routine is called at the point we transition back into a timed
+ * ctx. The syncpts are incremented via pushbuffer with a flag indicating
+ * whether there's a CTXSAVE that should be still executed (for the
+ * preceding HW ctx).
+ */
+static void t20_cdma_timeout_pb_incr(struct nvhost_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 nr_slots,
+				bool exec_ctxsave)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct nvhost_userctx_timeout *timeout = cdma->timeout.ctx_timeout;
+	u32 getidx, *p;
+
+	/* should have enough slots to incr to desired count */
+	BUG_ON(syncpt_incrs > (nr_slots * sb->incr_per_buffer));
+
+	getidx = getptr - pb->phys;
+	if (exec_ctxsave) {
+		/* don't disrupt the CTXSAVE of a good/non-timed out ctx */
+		nr_slots -= timeout->hwctx->save_slots;
+		syncpt_incrs -= timeout->hwctx->save_incrs;
+
+		getidx += (timeout->hwctx->save_slots * 8);
+		getidx &= (PUSH_BUFFER_SIZE - 1);
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: exec CTXSAVE of prev ctx (slots %d, incrs %d)\n",
+			__func__, nr_slots, syncpt_incrs);
+	}
+
+	while (syncpt_incrs) {
+		u32 incrs, count;
+
+		/* GATHER count are incrs * number of DWORDs per incr */
+		incrs = min(syncpt_incrs, sb->incr_per_buffer);
+		count = incrs * sb->words_per_incr;
+
+		p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = nvhost_opcode_gather(count);
+		*(p++) = sb->phys;
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: GATHER at 0x%x, from 0x%x, dcount = %d\n",
+			__func__,
+			pb->phys + getidx, sb->phys,
+			(incrs * sb->words_per_incr));
+
+		syncpt_incrs -= incrs;
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+		nr_slots--;
+	}
+
+	/* NOP remaining slots */
+	while (nr_slots--) {
+		p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = NVHOST_OPCODE_NOOP;
+		*(p++) = NVHOST_OPCODE_NOOP;
+		dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+			__func__, pb->phys + getidx);
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+	}
+	wmb();
+}
+
+/**
+ * Clear a context switch save for a timed out context that's been
+ * queued up in a non-timed out context.
+ */
+static void t20_cdma_timeout_clear_ctxsave(struct nvhost_cdma *cdma,
+				u32 getptr, u32 nr_slots)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct nvhost_userctx_timeout *timeout = cdma->timeout.ctx_timeout;
+	u32 getidx, *p;
+
+	getidx = getptr - pb->phys;
+	p = (u32 *)((u32)pb->mapped + getidx);
+
+	if (timeout->hwctx) {
+		u32 incrs, slots_to_clear;
+
+		slots_to_clear = timeout->hwctx->save_slots;
+		incrs = timeout->hwctx->save_incrs;
+
+		BUG_ON(slots_to_clear > nr_slots);
+		BUG_ON(incrs > sb->incr_per_buffer);
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: clearing CTXSAVE at 0x%x, for %d slots %d incrs\n",
+			__func__, pb->phys + getidx, slots_to_clear, incrs);
+
+		/* first, GATHER incr for ctxsave */
+		if (incrs) {
+			u32 count = incrs * sb->words_per_incr;
+
+			p = (u32 *)((u32)pb->mapped + getidx);
+			*(p++) = nvhost_opcode_gather(count);
+			*(p++) = sb->phys;
+
+			getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+			slots_to_clear--;
+		}
+
+		/* NOP remaining slots */
+		while (slots_to_clear--) {
+			p = (u32 *)((u32)pb->mapped + getidx);
+			*(p++) = NVHOST_OPCODE_NOOP;
+			*(p++) = NVHOST_OPCODE_NOOP;
+			dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+				__func__, pb->phys + getidx);
+			getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+		}
+	}
+	wmb();
+}
 
 /**
  * Start channel DMA
@@ -167,7 +430,6 @@ static void t20_cdma_start(struct nvhost_cdma *cdma)
 		return;
 
 	BUG_ON(!cdma_pb_op(cdma).putptr);
-
 	cdma->last_put = cdma_pb_op(cdma).putptr(&cdma->push_buffer);
 
 	writel(nvhost_channel_dmactrl(true, false, false),
@@ -190,6 +452,53 @@ static void t20_cdma_start(struct nvhost_cdma *cdma)
 }
 
 /**
+ * Similar to t20_cdma_start(), but rather than starting from an idle
+ * state (where DMA GET is set to DMA PUT), on a timeout we restore
+ * DMA GET from an explicit value (so DMA may again be pending).
+ */
+static void t20_cdma_timeout_restart(struct nvhost_cdma *cdma, u32 getptr)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	void __iomem *chan_regs = cdma_to_channel(cdma)->aperture;
+
+	if (cdma->running)
+		return;
+
+	BUG_ON(!cdma_pb_op(cdma).putptr);
+	cdma->last_put = cdma_pb_op(cdma).putptr(&cdma->push_buffer);
+
+	writel(nvhost_channel_dmactrl(true, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, end pointer (all of memory) */
+	writel(0, chan_regs + HOST1X_CHANNEL_DMASTART);
+	writel(0xFFFFFFFF, chan_regs + HOST1X_CHANNEL_DMAEND);
+
+	/* set GET, by loading the value in PUT (then reset GET) */
+	writel(getptr, chan_regs + HOST1X_CHANNEL_DMAPUT);
+	writel(nvhost_channel_dmactrl(true, true, true),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	dev_dbg(&dev->pdev->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__,
+		readl(chan_regs + HOST1X_CHANNEL_DMAGET),
+		readl(chan_regs + HOST1X_CHANNEL_DMAPUT),
+		cdma->last_put);
+
+	/* deassert GET reset and set PUT */
+	writel(nvhost_channel_dmactrl(true, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+	writel(cdma->last_put, chan_regs + HOST1X_CHANNEL_DMAPUT);
+
+	/* start the command DMA */
+	writel(nvhost_channel_dmactrl(false, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/**
  * Kick channel DMA into action by writing its PUT offset (if it has changed)
  */
 static void t20_cdma_kick(struct nvhost_cdma *cdma)
@@ -235,12 +544,145 @@ void t20_cdma_peek(struct nvhost_cdma *cdma,
 	out[1] = p[offset + 1];
 }
 
+/**
+ * Stops both channel's command processor and CDMA immediately.
+ * Also, tears down the channel and resets corresponding module.
+ */
+void t20_cdma_timeout_teardown_begin(struct nvhost_cdma *cdma)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	BUG_ON(cdma->torndown);
+
+	dev_dbg(&dev->pdev->dev,
+		"begin channel teardown (channel id %d)\n", ch->chid);
+
+	cmdproc_stop = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_stop_chid(cmdproc_stop, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(&dev->pdev->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__,
+		readl(ch->aperture + HOST1X_CHANNEL_DMAGET),
+		readl(ch->aperture + HOST1X_CHANNEL_DMAPUT),
+		cdma->last_put);
+
+	writel(nvhost_channel_dmactrl(true, false, false),
+		ch->aperture + HOST1X_CHANNEL_DMACTRL);
+
+	writel(BIT(ch->chid), dev->sync_aperture + HOST1X_SYNC_CH_TEARDOWN);
+	nvhost_module_reset(&ch->mod);
+
+	cdma->running = false;
+	cdma->torndown = true;
+}
+
+void t20_cdma_timeout_teardown_end(struct nvhost_cdma *cdma, u32 getptr)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	BUG_ON(!cdma->torndown || cdma->running);
+
+	dev_dbg(&dev->pdev->dev,
+		"end channel teardown (id %d, DMAGET restart = 0x%x)\n",
+		ch->chid, getptr);
+
+	cmdproc_stop = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_run_chid(cmdproc_stop, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	cdma->torndown = false;
+	t20_cdma_timeout_restart(cdma, getptr);
+}
+
+/**
+ * If this timeout fires, it indicates the current sync_queue entry has
+ * exceeded its TTL and the userctx should be timed out and remaining
+ * submits already issued cleaned up (future submits return an error).
+ */
+static void t20_cdma_timeout_handler(struct work_struct *work)
+{
+	struct nvhost_cdma *cdma;
+	struct nvhost_master *dev;
+	struct nvhost_syncpt *sp;
+	struct nvhost_channel *ch;
+
+	u32 syncpt_val;
+
+	u32 prev_cmdproc, cmdproc_stop;
+
+	cdma = container_of(to_delayed_work(work), struct nvhost_cdma,
+			    timeout.wq);
+	dev = cdma_to_dev(cdma);
+	sp = &dev->syncpt;
+	ch = cdma_to_channel(cdma);
+
+	mutex_lock(&cdma->lock);
+
+	if (!cdma->timeout.ctx_timeout) {
+		dev_dbg(&dev->pdev->dev,
+			 "cdma_timeout: expired, but has NULL context\n");
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	/* stop processing to get a clean snapshot */
+	prev_cmdproc = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_stop_chid(prev_cmdproc, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(&dev->pdev->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
+		prev_cmdproc, cmdproc_stop);
+
+	syncpt_val = nvhost_syncpt_update_min(&dev->syncpt,
+			cdma->timeout.syncpt_id);
+
+	/* has buffer actually completed? */
+	if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) {
+		dev_dbg(&dev->pdev->dev,
+			 "cdma_timeout: expired, but buffer had completed\n");
+		/* restore */
+		cmdproc_stop = nvhost_sync_cmdproc_run_chid(prev_cmdproc,
+			ch->chid);
+		writel(cmdproc_stop,
+			dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	dev_warn(&dev->pdev->dev,
+		"%s: timeout: %d (%s) ctx 0x%p, HW thresh %d, done %d\n",
+		__func__,
+		cdma->timeout.syncpt_id,
+		syncpt_op(sp).name(sp, cdma->timeout.syncpt_id),
+		cdma->timeout.ctx_timeout,
+		syncpt_val, cdma->timeout.syncpt_val);
+
+	/* stop HW, resetting channel/module */
+	cdma_op(cdma).timeout_teardown_begin(cdma);
+
+	nvhost_cdma_update_sync_queue(cdma, sp, &dev->pdev->dev);
+}
+
 int nvhost_init_t20_cdma_support(struct nvhost_master *host)
 {
 	host->op.cdma.start = t20_cdma_start;
 	host->op.cdma.stop = t20_cdma_stop;
 	host->op.cdma.kick = t20_cdma_kick;
 
+	host->op.cdma.timeout_init = t20_cdma_timeout_init;
+	host->op.cdma.timeout_destroy = t20_cdma_timeout_destroy;
+	host->op.cdma.timeout_teardown_begin = t20_cdma_timeout_teardown_begin;
+	host->op.cdma.timeout_teardown_end = t20_cdma_timeout_teardown_end;
+	host->op.cdma.timeout_cpu_incr = t20_cdma_timeout_cpu_incr;
+	host->op.cdma.timeout_pb_incr = t20_cdma_timeout_pb_incr;
+	host->op.cdma.timeout_clear_ctxsave = t20_cdma_timeout_clear_ctxsave;
+
 	host->sync_queue_size = NVHOST_SYNC_QUEUE_SIZE;
 
 	host->op.push_buffer.reset = t20_push_buffer_reset;
diff --git a/drivers/video/tegra/host/t20/channel_t20.c b/drivers/video/tegra/host/t20/channel_t20.c
index fdbf6ba7355d..b45c00421ec9 100644
--- a/drivers/video/tegra/host/t20/channel_t20.c
+++ b/drivers/video/tegra/host/t20/channel_t20.c
@@ -27,6 +27,7 @@
 
 #include "hardware_t20.h"
 #include "syncpt_t20.h"
+#include "../dev.h"
 
 #define NVHOST_NUMCHANNELS (NV_HOST1X_CHANNELS - 1)
 #define NVHOST_CHANNEL_BASE 0
@@ -42,10 +43,7 @@
 #define NVMODMUTEX_DSI       (9)
 #define NV_FIFO_READ_TIMEOUT 200000
 
-static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action);
 static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action);
-static void power_mpe(struct nvhost_module *mod, enum nvhost_power_action action);
-
 
 
 static const struct nvhost_channeldesc channelmap[] = {
@@ -74,7 +72,6 @@ static const struct nvhost_channeldesc channelmap[] = {
 	.waitbases     = BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1),
 	.modulemutexes = BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) |
 			 BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B),
-	.power         = power_2d,
 },
 {
 	/* channel 3 */
@@ -98,7 +95,6 @@ static const struct nvhost_channeldesc channelmap[] = {
 			 BIT(NVSYNCPT_MPE_WR_SAFE),
 	.waitbases     = BIT(NVWAITBASE_MPE),
 	.class	       = NV_VIDEO_ENCODE_MPEG_CLASS_ID,
-	.power	       = power_mpe,
 	.exclusive     = true,
 	.keepalive     = true,
 },
@@ -138,6 +134,7 @@ static int t20_channel_init(struct nvhost_channel *ch,
 			    struct nvhost_master *dev, int index)
 {
 	ch->dev = dev;
+	ch->chid = index;
 	ch->desc = channelmap + index;
 	mutex_init(&ch->reflock);
 	mutex_init(&ch->submitlock);
@@ -161,6 +158,7 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 			      int nr_unpins,
 			      u32 syncpt_id,
 			      u32 syncpt_incrs,
+			      struct nvhost_userctx_timeout *timeout,
 			      u32 *syncpt_value,
 			      bool null_kickoff)
 {
@@ -176,6 +174,9 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 	if (strcmp(channel->mod.name, "gr3d") == 0)
 		module3d_notify_busy();
 
+	/* before error checks, return current max */
+	*syncpt_value = nvhost_syncpt_read_max(sp, syncpt_id);
+
 	/* get submit lock */
 	err = mutex_lock_interruptible(&channel->submitlock);
 	if (err) {
@@ -198,11 +199,26 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 		}
 	}
 
+	/* begin a CDMA submit */
+	err = nvhost_cdma_begin(&channel->cdma, timeout);
+	if (err) {
+		mutex_unlock(&channel->submitlock);
+		nvhost_module_idle(&channel->mod);
+		return err;
+	}
+
 	/* context switch */
 	if (channel->cur_ctx != hwctx) {
 		trace_nvhost_channel_context_switch(channel->desc->name,
 		  channel->cur_ctx, hwctx);
 		hwctx_to_save = channel->cur_ctx;
+		if (hwctx_to_save && hwctx_to_save->timeout &&
+			hwctx_to_save->timeout->has_timedout) {
+			hwctx_to_save = NULL;
+			dev_dbg(&channel->dev->pdev->dev,
+				"%s: skip save of timed out context (0x%p)\n",
+				__func__, channel->cur_ctx->timeout);
+		}
 		if (hwctx_to_save) {
 			syncpt_incrs += hwctx_to_save->save_incrs;
 			hwctx_to_save->valid = true;
@@ -223,9 +239,6 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 		syncval = nvhost_syncpt_incr_max(sp,
 						syncpt_id, syncpt_incrs);
 
-	/* begin a CDMA submit */
-	nvhost_cdma_begin(&channel->cdma);
-
 	/* push save buffer (pre-gather setup depends on unit) */
 	if (hwctx_to_save)
 		channel->ctxhandler.save_push(&channel->cdma, hwctx_to_save);
@@ -281,7 +294,8 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	nvhost_cdma_end(&channel->cdma, user_nvmap,
-			syncpt_id, syncval, unpins, nr_unpins);
+			syncpt_id, syncval, unpins, nr_unpins,
+			timeout);
 
 	trace_nvhost_channel_submitted(channel->desc->name,
 			syncval-syncpt_incrs, syncval);
@@ -308,23 +322,16 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 	return 0;
 }
 
-static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action)
-{
-	/* TODO: [ahatala 2010-06-17] reimplement EPP hang war */
-	if (action == NVHOST_POWER_ACTION_OFF) {
-		/* TODO: [ahatala 2010-06-17] reset EPP */
-	}
-}
-
 static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 {
 	struct nvhost_channel *ch = container_of(mod, struct nvhost_channel, mod);
 	struct nvhost_hwctx *hwctx_to_save;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	u32 syncpt_incrs, syncpt_val;
+	int err;
 	void *ref;
 
-	if (action != NVHOST_POWER_ACTION_OFF)
+	if ((action != NVHOST_POWER_ACTION_OFF) || !mod->can_powergate)
 		return;
 
 	mutex_lock(&ch->submitlock);
@@ -337,6 +344,12 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	if (strcmp(mod->name, "gr3d") == 0)
 		module3d_notify_busy();
 
+	err = nvhost_cdma_begin(&ch->cdma, hwctx_to_save->timeout);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		return;
+	}
+
 	hwctx_to_save->valid = true;
 	ch->ctxhandler.get(hwctx_to_save);
 	ch->cur_ctx = NULL;
@@ -345,9 +358,9 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	syncpt_val = nvhost_syncpt_incr_max(&ch->dev->syncpt,
 					NVSYNCPT_3D, syncpt_incrs);
 
-	nvhost_cdma_begin(&ch->cdma);
 	ch->ctxhandler.save_push(&ch->cdma, hwctx_to_save);
-	nvhost_cdma_end(&ch->cdma, ch->dev->nvmap, NVSYNCPT_3D, syncpt_val, NULL, 0);
+	nvhost_cdma_end(&ch->cdma, ch->dev->nvmap, NVSYNCPT_3D, syncpt_val,
+			NULL, 0, hwctx_to_save->timeout);
 
 	nvhost_intr_add_action(&ch->dev->intr, NVSYNCPT_3D,
 			syncpt_val - syncpt_incrs + hwctx_to_save->save_thresh,
@@ -366,13 +379,10 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	mutex_unlock(&ch->submitlock);
 }
 
-static void power_mpe(struct nvhost_module *mod, enum nvhost_power_action action)
-{
-}
-
 static int t20_channel_read_3d_reg(
 	struct nvhost_channel *channel,
 	struct nvhost_hwctx *hwctx,
+	struct nvhost_userctx_timeout *timeout,
 	u32 offset,
 	u32 *value)
 {
@@ -414,7 +424,7 @@ static int t20_channel_read_3d_reg(
 		NVSYNCPT_3D, syncpt_incrs);
 
 	/* begin a CDMA submit */
-	nvhost_cdma_begin(&channel->cdma);
+	nvhost_cdma_begin(&channel->cdma, timeout);
 
 	/* push save buffer (pre-gather setup depends on unit) */
 	if (hwctx_to_save)
@@ -463,7 +473,8 @@ static int t20_channel_read_3d_reg(
 
 	/* end CDMA submit  */
 	nvhost_cdma_end(&channel->cdma, channel->dev->nvmap,
-			NVSYNCPT_3D, syncval, NULL, 0);
+			NVSYNCPT_3D, syncval, NULL, 0,
+			timeout);
 
 	/*
 	 * schedule a context save interrupt (to drain the host FIFO
diff --git a/drivers/video/tegra/host/t20/hardware_t20.h b/drivers/video/tegra/host/t20/hardware_t20.h
index 1e68bdcde0fa..4245a44c6bc2 100644
--- a/drivers/video/tegra/host/t20/hardware_t20.h
+++ b/drivers/video/tegra/host/t20/hardware_t20.h
@@ -91,6 +91,8 @@ enum {
 	HOST1X_SYNC_SYNCPT_THRESH_CPU1_INT_STATUS = 0x48,
 	HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE = 0x60,
 	HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 = 0x68,
+	HOST1X_SYNC_CMDPROC_STOP = 0xac,
+	HOST1X_SYNC_CH_TEARDOWN = 0xb0,
 	HOST1X_SYNC_USEC_CLK = 0x1a4,
 	HOST1X_SYNC_CTXSW_TIMEOUT_CFG = 0x1a8,
 	HOST1X_SYNC_IP_BUSY_TIMEOUT = 0x1bc,
@@ -129,6 +131,20 @@ static inline unsigned int nvhost_sync_mlock_owner_owner_chid(u32 reg)
 	return (reg >> 8) & 0xf;
 }
 
+static inline unsigned int nvhost_sync_cmdproc_stop_chid(u32 reg, u32 chid)
+{
+	return reg | BIT(chid);
+}
+
+static inline unsigned int nvhost_sync_cmdproc_run_chid(u32 reg, u32 chid)
+{
+	return reg & ~(BIT(chid));
+}
+
+static inline unsigned int nvhost_sync_ch_teardown_chid(u32 reg, u32 chid)
+{
+	return reg | BIT(chid);
+}
 
 /* host class methods */
 enum {
@@ -271,4 +287,8 @@ int nvhost_drain_read_fifo(void __iomem *chan_regs,
 /* 8 bytes per slot. (This number does not include the final RESTART.) */
 #define PUSH_BUFFER_SIZE (NVHOST_GATHER_QUEUE_SIZE * 8)
 
+/* 4K page containing GATHERed methods to increment channel syncpts
+ * and replaces the original timed out contexts GATHER slots */
+#define SYNCPT_INCR_BUFFER_SIZE_WORDS   (4096 / sizeof(u32))
+
 #endif /* __NVHOST_HARDWARE_T20_H */
author	Chris Johnson <cwj@nvidia.com>	2011-08-12 09:04:09 +0300
committer	Varun Colbert <vcolbert@nvidia.com>	2011-08-18 11:37:49 -0700
commit	04bb6c2b2ee1bc07bf6b0248bb64f41509415fb7 (patch)
tree	a64aaa90098af1bd9c1a73998f4a768ebbb839f6 /drivers/video/tegra/host/t20
parent	75794f7f04952642b6747bf3a99fea35e2a8f8a7 (diff)