diff options
author | Paul Mundt <lethal@linux-sh.org> | 2010-05-24 08:52:55 +0900 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2010-05-24 08:52:55 +0900 |
commit | 1f782fee18b39b9ad438ebbd82c2915a16c879ee (patch) | |
tree | f292930065e6c860714c134790ab8882680ac739 /drivers/dma | |
parent | 8eda2f21ed9c936a54fd7bc16cbfa5ee656635c2 (diff) | |
parent | f4b87dee923342505e1ddba8d34ce9de33e75050 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'drivers/dma')
-rw-r--r-- | drivers/dma/Kconfig | 14 | ||||
-rw-r--r-- | drivers/dma/Makefile | 2 | ||||
-rw-r--r-- | drivers/dma/at_hdmac.c | 35 | ||||
-rw-r--r-- | drivers/dma/coh901318.c | 263 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 22 | ||||
-rw-r--r-- | drivers/dma/dw_dmac.c | 24 | ||||
-rw-r--r-- | drivers/dma/fsldma.c | 28 | ||||
-rw-r--r-- | drivers/dma/ioat/dma.c | 12 | ||||
-rw-r--r-- | drivers/dma/ioat/dma.h | 19 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.c | 186 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.h | 33 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 143 | ||||
-rw-r--r-- | drivers/dma/ioat/pci.c | 7 | ||||
-rw-r--r-- | drivers/dma/iop-adma.c | 39 | ||||
-rw-r--r-- | drivers/dma/ipu/ipu_idmac.c | 34 | ||||
-rw-r--r-- | drivers/dma/mpc512x_dma.c | 15 | ||||
-rw-r--r-- | drivers/dma/mv_xor.c | 25 | ||||
-rw-r--r-- | drivers/dma/ppc4xx/adma.c | 19 | ||||
-rw-r--r-- | drivers/dma/shdma.c | 27 | ||||
-rw-r--r-- | drivers/dma/ste_dma40.c | 2657 | ||||
-rw-r--r-- | drivers/dma/ste_dma40_ll.c | 454 | ||||
-rw-r--r-- | drivers/dma/ste_dma40_ll.h | 354 | ||||
-rw-r--r-- | drivers/dma/timb_dma.c | 860 | ||||
-rw-r--r-- | drivers/dma/txx9dmac.c | 23 |
24 files changed, 4845 insertions, 450 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 9d8ca990dde6..dab6f17fbbc7 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -141,6 +141,13 @@ config COH901318 help Enable support for ST-Ericsson COH 901 318 DMA. +config STE_DMA40 + bool "ST-Ericsson DMA40 support" + depends on ARCH_U8500 + select DMA_ENGINE + help + Support for ST-Ericsson DMA40 controller + config AMCC_PPC440SPE_ADMA tristate "AMCC PPC440SPe ADMA support" depends on 440SPe || 440SP @@ -149,6 +156,13 @@ config AMCC_PPC440SPE_ADMA help Enable support for the AMCC PPC440SPe RAID engines. +config TIMB_DMA + tristate "Timberdale FPGA DMA support" + depends on MFD_TIMBERDALE || HAS_IOMEM + select DMA_ENGINE + help + Enable support for the Timberdale FPGA DMA engine. + config ARCH_HAS_ASYNC_TX_FIND_CHANNEL bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 22bba3d5e2b6..20881426c1ac 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -20,3 +20,5 @@ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_SH_DMAE) += shdma.o obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ +obj-$(CONFIG_TIMB_DMA) += timb_dma.o +obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 278cf5bceef2..bd5250e8c00c 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -760,13 +760,18 @@ err_desc_get: return NULL; } -static void atc_terminate_all(struct dma_chan *chan) +static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); struct at_desc *desc, *_desc; LIST_HEAD(list); + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + /* * This is only called when something went wrong elsewhere, so * we don't really care about the data. Just disable the @@ -790,32 +795,30 @@ static void atc_terminate_all(struct dma_chan *chan) /* Flush all pending and queued descriptors */ list_for_each_entry_safe(desc, _desc, &list, desc_node) atc_chain_complete(atchan, desc); + + return 0; } /** - * atc_is_tx_complete - poll for transaction completion + * atc_tx_status - poll for transaction completion * @chan: DMA channel * @cookie: transaction identifier to check status of - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction + * @txstate: if not %NULL updated with transaction state * - * If @done and @used are passed in, upon return they reflect the driver + * If @txstate is passed in, upon return it reflect the driver * internal state and can be used with dma_async_is_complete() to check * the status of multiple cookies without re-checking hardware state. */ static enum dma_status -atc_is_tx_complete(struct dma_chan *chan, +atc_tx_status(struct dma_chan *chan, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) + struct dma_tx_state *txstate) { struct at_dma_chan *atchan = to_at_dma_chan(chan); dma_cookie_t last_used; dma_cookie_t last_complete; enum dma_status ret; - dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", - cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; @@ -833,10 +836,10 @@ atc_is_tx_complete(struct dma_chan *chan, spin_unlock_bh(&atchan->lock); - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); + dev_vdbg(chan2dev(chan), "tx_status: %d (d%d, u%d)\n", + cookie, last_complete ? last_complete : 0, + last_used ? last_used : 0); return ret; } @@ -1082,7 +1085,7 @@ static int __init at_dma_probe(struct platform_device *pdev) /* set base routines */ atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources; atdma->dma_common.device_free_chan_resources = atc_free_chan_resources; - atdma->dma_common.device_is_tx_complete = atc_is_tx_complete; + atdma->dma_common.device_tx_status = atc_tx_status; atdma->dma_common.device_issue_pending = atc_issue_pending; atdma->dma_common.dev = &pdev->dev; @@ -1092,7 +1095,7 @@ static int __init at_dma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; - atdma->dma_common.device_terminate_all = atc_terminate_all; + atdma->dma_common.device_control = atc_control; } dma_writel(atdma, EN, AT_DMA_ENABLE); diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 1656fdcdb6c2..a724e6be1b4d 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -37,7 +37,7 @@ struct coh901318_desc { struct list_head node; struct scatterlist *sg; unsigned int sg_len; - struct coh901318_lli *data; + struct coh901318_lli *lli; enum dma_data_direction dir; unsigned long flags; }; @@ -283,7 +283,7 @@ static int coh901318_start(struct coh901318_chan *cohc) } static int coh901318_prep_linked_list(struct coh901318_chan *cohc, - struct coh901318_lli *data) + struct coh901318_lli *lli) { int channel = cohc->id; void __iomem *virtbase = cohc->base->virtbase; @@ -292,18 +292,18 @@ static int coh901318_prep_linked_list(struct coh901318_chan *cohc, COH901318_CX_STAT_SPACING*channel) & COH901318_CX_STAT_ACTIVE); - writel(data->src_addr, + writel(lli->src_addr, virtbase + COH901318_CX_SRC_ADDR + COH901318_CX_SRC_ADDR_SPACING * channel); - writel(data->dst_addr, virtbase + + writel(lli->dst_addr, virtbase + COH901318_CX_DST_ADDR + COH901318_CX_DST_ADDR_SPACING * channel); - writel(data->link_addr, virtbase + COH901318_CX_LNK_ADDR + + writel(lli->link_addr, virtbase + COH901318_CX_LNK_ADDR + COH901318_CX_LNK_ADDR_SPACING * channel); - writel(data->control, virtbase + COH901318_CX_CTRL + + writel(lli->control, virtbase + COH901318_CX_CTRL + COH901318_CX_CTRL_SPACING * channel); return 0; @@ -408,33 +408,107 @@ coh901318_first_queued(struct coh901318_chan *cohc) return d; } +static inline u32 coh901318_get_bytes_in_lli(struct coh901318_lli *in_lli) +{ + struct coh901318_lli *lli = in_lli; + u32 bytes = 0; + + while (lli) { + bytes += lli->control & COH901318_CX_CTRL_TC_VALUE_MASK; + lli = lli->virt_link_addr; + } + return bytes; +} + /* - * DMA start/stop controls + * Get the number of bytes left to transfer on this channel, + * it is unwise to call this before stopping the channel for + * absolute measures, but for a rough guess you can still call + * it. */ -u32 coh901318_get_bytes_left(struct dma_chan *chan) +static u32 coh901318_get_bytes_left(struct dma_chan *chan) { - unsigned long flags; - u32 ret; struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_desc *cohd; + struct list_head *pos; + unsigned long flags; + u32 left = 0; + int i = 0; spin_lock_irqsave(&cohc->lock, flags); - /* Read transfer count value */ - ret = readl(cohc->base->virtbase + - COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING * - cohc->id) & COH901318_CX_CTRL_TC_VALUE_MASK; + /* + * If there are many queued jobs, we iterate and add the + * size of them all. We take a special look on the first + * job though, since it is probably active. + */ + list_for_each(pos, &cohc->active) { + /* + * The first job in the list will be working on the + * hardware. The job can be stopped but still active, + * so that the transfer counter is somewhere inside + * the buffer. + */ + cohd = list_entry(pos, struct coh901318_desc, node); + + if (i == 0) { + struct coh901318_lli *lli; + dma_addr_t ladd; + + /* Read current transfer count value */ + left = readl(cohc->base->virtbase + + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * cohc->id) & + COH901318_CX_CTRL_TC_VALUE_MASK; + + /* See if the transfer is linked... */ + ladd = readl(cohc->base->virtbase + + COH901318_CX_LNK_ADDR + + COH901318_CX_LNK_ADDR_SPACING * + cohc->id) & + ~COH901318_CX_LNK_LINK_IMMEDIATE; + /* Single transaction */ + if (!ladd) + continue; + + /* + * Linked transaction, follow the lli, find the + * currently processing lli, and proceed to the next + */ + lli = cohd->lli; + while (lli && lli->link_addr != ladd) + lli = lli->virt_link_addr; + + if (lli) + lli = lli->virt_link_addr; + + /* + * Follow remaining lli links around to count the total + * number of bytes left + */ + left += coh901318_get_bytes_in_lli(lli); + } else { + left += coh901318_get_bytes_in_lli(cohd->lli); + } + i++; + } + + /* Also count bytes in the queued jobs */ + list_for_each(pos, &cohc->queue) { + cohd = list_entry(pos, struct coh901318_desc, node); + left += coh901318_get_bytes_in_lli(cohd->lli); + } spin_unlock_irqrestore(&cohc->lock, flags); - return ret; + return left; } -EXPORT_SYMBOL(coh901318_get_bytes_left); - -/* Stops a transfer without losing data. Enables power save. - Use this function in conjunction with coh901318_continue(..) -*/ -void coh901318_stop(struct dma_chan *chan) +/* + * Pauses a transfer without losing data. Enables power save. + * Use this function in conjunction with coh901318_resume. + */ +static void coh901318_pause(struct dma_chan *chan) { u32 val; unsigned long flags; @@ -475,12 +549,11 @@ void coh901318_stop(struct dma_chan *chan) spin_unlock_irqrestore(&cohc->lock, flags); } -EXPORT_SYMBOL(coh901318_stop); -/* Continues a transfer that has been stopped via 300_dma_stop(..). +/* Resumes a transfer that has been stopped via 300_dma_stop(..). Power save is handled. */ -void coh901318_continue(struct dma_chan *chan) +static void coh901318_resume(struct dma_chan *chan) { u32 val; unsigned long flags; @@ -506,7 +579,6 @@ void coh901318_continue(struct dma_chan *chan) spin_unlock_irqrestore(&cohc->lock, flags); } -EXPORT_SYMBOL(coh901318_continue); bool coh901318_filter_id(struct dma_chan *chan, void *chan_id) { @@ -565,29 +637,30 @@ static int coh901318_config(struct coh901318_chan *cohc, */ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) { - struct coh901318_desc *cohd_que; + struct coh901318_desc *cohd; - /* start queued jobs, if any + /* + * start queued jobs, if any * TODO: transmit all queued jobs in one go */ - cohd_que = coh901318_first_queued(cohc); + cohd = coh901318_first_queued(cohc); - if (cohd_que != NULL) { + if (cohd != NULL) { /* Remove from queue */ - coh901318_desc_remove(cohd_que); + coh901318_desc_remove(cohd); /* initiate DMA job */ cohc->busy = 1; - coh901318_desc_submit(cohc, cohd_que); + coh901318_desc_submit(cohc, cohd); - coh901318_prep_linked_list(cohc, cohd_que->data); + coh901318_prep_linked_list(cohc, cohd->lli); - /* start dma job */ + /* start dma job on this channel */ coh901318_start(cohc); } - return cohd_que; + return cohd; } /* @@ -622,7 +695,7 @@ static void dma_tasklet(unsigned long data) cohc->completed = cohd_fin->desc.cookie; /* release the lli allocation and remove the descriptor */ - coh901318_lli_free(&cohc->base->pool, &cohd_fin->data); + coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli); /* return desc to free-list */ coh901318_desc_remove(cohd_fin); @@ -666,23 +739,44 @@ static void dma_tasklet(unsigned long data) /* called from interrupt context */ static void dma_tc_handle(struct coh901318_chan *cohc) { - BUG_ON(!cohc->allocated && (list_empty(&cohc->active) || - list_empty(&cohc->queue))); - - if (!cohc->allocated) + /* + * If the channel is not allocated, then we shouldn't have + * any TC interrupts on it. + */ + if (!cohc->allocated) { + dev_err(COHC_2_DEV(cohc), "spurious interrupt from " + "unallocated channel\n"); return; + } spin_lock(&cohc->lock); + /* + * When we reach this point, at least one queue item + * should have been moved over from cohc->queue to + * cohc->active and run to completion, that is why we're + * getting a terminal count interrupt is it not? + * If you get this BUG() the most probable cause is that + * the individual nodes in the lli chain have IRQ enabled, + * so check your platform config for lli chain ctrl. + */ + BUG_ON(list_empty(&cohc->active)); + cohc->nbr_active_done++; + /* + * This attempt to take a job from cohc->queue, put it + * into cohc->active and start it. + */ if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - BUG_ON(list_empty(&cohc->active)); - spin_unlock(&cohc->lock); + /* + * This tasklet will remove items from cohc->active + * and thus terminates them. + */ if (cohc_chan_conf(cohc)->priority_high) tasklet_hi_schedule(&cohc->tasklet); else @@ -809,6 +903,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id) static int coh901318_alloc_chan_resources(struct dma_chan *chan) { struct coh901318_chan *cohc = to_coh901318_chan(chan); + unsigned long flags; dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n", __func__, cohc->id); @@ -816,11 +911,15 @@ static int coh901318_alloc_chan_resources(struct dma_chan *chan) if (chan->client_count > 1) return -EBUSY; + spin_lock_irqsave(&cohc->lock, flags); + coh901318_config(cohc, NULL); cohc->allocated = 1; cohc->completed = chan->cookie = 1; + spin_unlock_irqrestore(&cohc->lock, flags); + return 1; } @@ -843,7 +942,7 @@ coh901318_free_chan_resources(struct dma_chan *chan) spin_unlock_irqrestore(&cohc->lock, flags); - chan->device->device_terminate_all(chan); + chan->device->device_control(chan, DMA_TERMINATE_ALL, 0); } @@ -870,7 +969,7 @@ static struct dma_async_tx_descriptor * coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t size, unsigned long flags) { - struct coh901318_lli *data; + struct coh901318_lli *lli; struct coh901318_desc *cohd; unsigned long flg; struct coh901318_chan *cohc = to_coh901318_chan(chan); @@ -892,23 +991,23 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size) lli_len++; - data = coh901318_lli_alloc(&cohc->base->pool, lli_len); + lli = coh901318_lli_alloc(&cohc->base->pool, lli_len); - if (data == NULL) + if (lli == NULL) goto err; ret = coh901318_lli_fill_memcpy( - &cohc->base->pool, data, src, size, dest, + &cohc->base->pool, lli, src, size, dest, cohc_chan_param(cohc)->ctrl_lli_chained, ctrl_last); if (ret) goto err; - COH_DBG(coh901318_list_print(cohc, data)); + COH_DBG(coh901318_list_print(cohc, lli)); /* Pick a descriptor to handle this transfer */ cohd = coh901318_desc_get(cohc); - cohd->data = data; + cohd->lli = lli; cohd->flags = flags; cohd->desc.tx_submit = coh901318_tx_submit; @@ -926,7 +1025,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned long flags) { struct coh901318_chan *cohc = to_coh901318_chan(chan); - struct coh901318_lli *data; + struct coh901318_lli *lli; struct coh901318_desc *cohd; const struct coh901318_params *params; struct scatterlist *sg; @@ -999,13 +1098,13 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } pr_debug("Allocate %d lli:s for this transfer\n", len); - data = coh901318_lli_alloc(&cohc->base->pool, len); + lli = coh901318_lli_alloc(&cohc->base->pool, len); - if (data == NULL) + if (lli == NULL) goto err_dma_alloc; - /* initiate allocated data list */ - ret = coh901318_lli_fill_sg(&cohc->base->pool, data, sgl, sg_len, + /* initiate allocated lli list */ + ret = coh901318_lli_fill_sg(&cohc->base->pool, lli, sgl, sg_len, cohc_dev_addr(cohc), ctrl_chained, ctrl, @@ -1014,14 +1113,14 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (ret) goto err_lli_fill; - COH_DBG(coh901318_list_print(cohc, data)); + COH_DBG(coh901318_list_print(cohc, lli)); /* Pick a descriptor to handle this transfer */ cohd = coh901318_desc_get(cohc); cohd->dir = direction; cohd->flags = flags; cohd->desc.tx_submit = coh901318_tx_submit; - cohd->data = data; + cohd->lli = lli; spin_unlock_irqrestore(&cohc->lock, flg); @@ -1035,9 +1134,8 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } static enum dma_status -coh901318_is_tx_complete(struct dma_chan *chan, - dma_cookie_t cookie, dma_cookie_t *done, - dma_cookie_t *used) +coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct coh901318_chan *cohc = to_coh901318_chan(chan); dma_cookie_t last_used; @@ -1049,10 +1147,10 @@ coh901318_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, + coh901318_get_bytes_left(chan)); + if (ret == DMA_IN_PROGRESS && cohc->stopped) + ret = DMA_PAUSED; return ret; } @@ -1065,23 +1163,42 @@ coh901318_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&cohc->lock, flags); - /* Busy means that pending jobs are already being processed */ + /* + * Busy means that pending jobs are already being processed, + * and then there is no point in starting the queue: the + * terminal count interrupt on the channel will take the next + * job on the queue and execute it anyway. + */ if (!cohc->busy) coh901318_queue_start(cohc); spin_unlock_irqrestore(&cohc->lock, flags); } -static void -coh901318_terminate_all(struct dma_chan *chan) +static int +coh901318_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { unsigned long flags; struct coh901318_chan *cohc = to_coh901318_chan(chan); struct coh901318_desc *cohd; void __iomem *virtbase = cohc->base->virtbase; - coh901318_stop(chan); + if (cmd == DMA_PAUSE) { + coh901318_pause(chan); + return 0; + } + + if (cmd == DMA_RESUME) { + coh901318_resume(chan); + return 0; + } + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + /* The remainder of this function terminates the transfer */ + coh901318_pause(chan); spin_lock_irqsave(&cohc->lock, flags); /* Clear any pending BE or TC interrupt */ @@ -1099,7 +1216,7 @@ coh901318_terminate_all(struct dma_chan *chan) while ((cohd = coh901318_first_active_get(cohc))) { /* release the lli allocation*/ - coh901318_lli_free(&cohc->base->pool, &cohd->data); + coh901318_lli_free(&cohc->base->pool, &cohd->lli); /* return desc to free-list */ coh901318_desc_remove(cohd); @@ -1108,7 +1225,7 @@ coh901318_terminate_all(struct dma_chan *chan) while ((cohd = coh901318_first_queued(cohc))) { /* release the lli allocation*/ - coh901318_lli_free(&cohc->base->pool, &cohd->data); + coh901318_lli_free(&cohc->base->pool, &cohd->lli); /* return desc to free-list */ coh901318_desc_remove(cohd); @@ -1120,6 +1237,8 @@ coh901318_terminate_all(struct dma_chan *chan) cohc->busy = 0; spin_unlock_irqrestore(&cohc->lock, flags); + + return 0; } void coh901318_base_init(struct dma_device *dma, const int *pick_chans, struct coh901318_base *base) @@ -1235,9 +1354,9 @@ static int __init coh901318_probe(struct platform_device *pdev) base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources; base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources; base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg; - base->dma_slave.device_is_tx_complete = coh901318_is_tx_complete; + base->dma_slave.device_tx_status = coh901318_tx_status; base->dma_slave.device_issue_pending = coh901318_issue_pending; - base->dma_slave.device_terminate_all = coh901318_terminate_all; + base->dma_slave.device_control = coh901318_control; base->dma_slave.dev = &pdev->dev; err = dma_async_device_register(&base->dma_slave); @@ -1255,9 +1374,9 @@ static int __init coh901318_probe(struct platform_device *pdev) base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources; base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources; base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy; - base->dma_memcpy.device_is_tx_complete = coh901318_is_tx_complete; + base->dma_memcpy.device_tx_status = coh901318_tx_status; base->dma_memcpy.device_issue_pending = coh901318_issue_pending; - base->dma_memcpy.device_terminate_all = coh901318_terminate_all; + base->dma_memcpy.device_control = coh901318_control; base->dma_memcpy.dev = &pdev->dev; /* * This controller can only access address at even 32bit boundaries, diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index d18b5d069d7e..9d31d5eb95c1 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -515,7 +515,6 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v break; if (--device->privatecnt == 0) dma_cap_clear(DMA_PRIVATE, device->cap_mask); - chan->private = NULL; chan = NULL; } } @@ -537,7 +536,6 @@ void dma_release_channel(struct dma_chan *chan) /* drop PRIVATE cap enabled by __dma_request_channel() */ if (--chan->device->privatecnt == 0) dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask); - chan->private = NULL; mutex_unlock(&dma_list_mutex); } EXPORT_SYMBOL_GPL(dma_release_channel); @@ -695,11 +693,11 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->device_prep_slave_sg); BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && - !device->device_terminate_all); + !device->device_control); BUG_ON(!device->device_alloc_chan_resources); BUG_ON(!device->device_free_chan_resources); - BUG_ON(!device->device_is_tx_complete); + BUG_ON(!device->device_tx_status); BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); @@ -978,7 +976,9 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan) { tx->chan = chan; + #ifndef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH spin_lock_init(&tx->lock); + #endif } EXPORT_SYMBOL(dma_async_tx_descriptor_init); @@ -1011,7 +1011,7 @@ EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); */ void dma_run_dependencies(struct dma_async_tx_descriptor *tx) { - struct dma_async_tx_descriptor *dep = tx->next; + struct dma_async_tx_descriptor *dep = txd_next(tx); struct dma_async_tx_descriptor *dep_next; struct dma_chan *chan; @@ -1019,7 +1019,7 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx) return; /* we'll submit tx->next now, so clear the link */ - tx->next = NULL; + txd_clear_next(tx); chan = dep->chan; /* keep submitting up until a channel switch is detected @@ -1027,14 +1027,14 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx) * processing the interrupt from async_tx_channel_switch */ for (; dep; dep = dep_next) { - spin_lock_bh(&dep->lock); - dep->parent = NULL; - dep_next = dep->next; + txd_lock(dep); + txd_clear_parent(dep); + dep_next = txd_next(dep); if (dep_next && dep_next->chan == chan) - dep->next = NULL; /* ->next will be submitted */ + txd_clear_next(dep); /* ->next will be submitted */ else dep_next = NULL; /* submit current dep and terminate */ - spin_unlock_bh(&dep->lock); + txd_unlock(dep); dep->tx_submit(dep); } diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index d28369f7afd2..a3991ab0d67e 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -781,13 +781,18 @@ err_desc_get: return NULL; } -static void dwc_terminate_all(struct dma_chan *chan) +static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; LIST_HEAD(list); + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + /* * This is only called when something went wrong elsewhere, so * we don't really care about the data. Just disable the @@ -810,12 +815,14 @@ static void dwc_terminate_all(struct dma_chan *chan) /* Flush all pending and queued descriptors */ list_for_each_entry_safe(desc, _desc, &list, desc_node) dwc_descriptor_complete(dwc, desc); + + return 0; } static enum dma_status -dwc_is_tx_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +dwc_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); dma_cookie_t last_used; @@ -835,10 +842,7 @@ dwc_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return ret; } @@ -1338,9 +1342,9 @@ static int __init dw_probe(struct platform_device *pdev) dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; - dw->dma.device_terminate_all = dwc_terminate_all; + dw->dma.device_control = dwc_control; - dw->dma.device_is_tx_complete = dwc_is_tx_complete; + dw->dma.device_tx_status = dwc_tx_status; dw->dma.device_issue_pending = dwc_issue_pending; dma_writel(dw, CFG, DW_CFG_DMA_EN); diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 88f470f0d820..1fdf180cbd67 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -775,13 +775,18 @@ fail: return NULL; } -static void fsl_dma_device_terminate_all(struct dma_chan *dchan) +static int fsl_dma_device_control(struct dma_chan *dchan, + enum dma_ctrl_cmd cmd, unsigned long arg) { struct fsldma_chan *chan; unsigned long flags; + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + if (!dchan) - return; + return -EINVAL; chan = to_fsl_chan(dchan); @@ -795,6 +800,8 @@ static void fsl_dma_device_terminate_all(struct dma_chan *dchan) fsldma_free_desc_list(chan, &chan->ld_running); spin_unlock_irqrestore(&chan->desc_lock, flags); + + return 0; } /** @@ -965,13 +972,12 @@ static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan) } /** - * fsl_dma_is_complete - Determine the DMA status + * fsl_tx_status - Determine the DMA status * @chan : Freescale DMA channel */ -static enum dma_status fsl_dma_is_complete(struct dma_chan *dchan, +static enum dma_status fsl_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) + struct dma_tx_state *txstate) { struct fsldma_chan *chan = to_fsl_chan(dchan); dma_cookie_t last_used; @@ -982,11 +988,7 @@ static enum dma_status fsl_dma_is_complete(struct dma_chan *dchan, last_used = dchan->cookie; last_complete = chan->completed_cookie; - if (done) - *done = last_complete; - - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -1330,10 +1332,10 @@ static int __devinit fsldma_of_probe(struct of_device *op, fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; - fdev->common.device_is_tx_complete = fsl_dma_is_complete; + fdev->common.device_tx_status = fsl_tx_status; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; - fdev->common.device_terminate_all = fsl_dma_device_terminate_all; + fdev->common.device_control = fsl_dma_device_control; fdev->common.dev = &op->dev; dev_set_drvdata(&op->dev, fdev); diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 3e5a8005c62b..c9213ead4a26 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -727,18 +727,18 @@ static void ioat1_timer_event(unsigned long data) } enum dma_status -ioat_is_dma_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct ioat_chan_common *chan = to_chan_common(c); struct ioatdma_device *device = chan->device; - if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS) return DMA_SUCCESS; device->cleanup_fn((unsigned long) c); - return ioat_is_complete(c, cookie, done, used); + return ioat_tx_status(c, cookie, txstate); } static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) @@ -858,7 +858,7 @@ int __devinit ioat_dma_self_test(struct ioatdma_device *device) tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); if (tmo == 0 || - dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; @@ -1199,7 +1199,7 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; dma->device_free_chan_resources = ioat1_dma_free_chan_resources; - dma->device_is_tx_complete = ioat_is_dma_complete; + dma->device_tx_status = ioat_dma_tx_status; err = ioat_probe(device); if (err) diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 86b97ac8774e..6d3a73b57e54 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -96,6 +96,7 @@ struct ioat_chan_common { #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 + #define IOAT_RESHAPE_PENDING 4 struct timer_list timer; #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) @@ -142,15 +143,14 @@ static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) } /** - * ioat_is_complete - poll the status of an ioat transaction + * ioat_tx_status - poll the status of an ioat transaction * @c: channel handle * @cookie: transaction identifier - * @done: if set, updated with last completed transaction - * @used: if set, updated with last used transaction + * @txstate: if set, updated with the transaction state */ static inline enum dma_status -ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct ioat_chan_common *chan = to_chan_common(c); dma_cookie_t last_used; @@ -159,10 +159,7 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, last_used = c->cookie; last_complete = chan->completed_cookie; - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -338,8 +335,8 @@ struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx); -enum dma_status ioat_is_dma_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used); +enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw); bool ioat_cleanup_preamble(struct ioat_chan_common *chan, diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index b5ae56c211e6..3c8b32a83794 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -56,8 +56,6 @@ void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) ioat->dmacount += ioat2_ring_pending(ioat); ioat->issued = ioat->head; - /* make descriptor updates globally visible before notifying channel */ - wmb(); writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x count: %#x\n", @@ -69,9 +67,9 @@ void ioat2_issue_pending(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); if (ioat2_ring_pending(ioat)) { - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&ioat->prep_lock); __ioat2_issue_pending(ioat); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); } } @@ -80,7 +78,7 @@ void ioat2_issue_pending(struct dma_chan *c) * @ioat: ioat2+ channel * * Check if the number of unsubmitted descriptors has exceeded the - * watermark. Called with ring_lock held + * watermark. Called with prep_lock held */ static void ioat2_update_pending(struct ioat2_dma_chan *ioat) { @@ -92,7 +90,6 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) { struct ioat_ring_ent *desc; struct ioat_dma_descriptor *hw; - int idx; if (ioat2_ring_space(ioat) < 1) { dev_err(to_dev(&ioat->base), @@ -102,8 +99,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); - idx = ioat2_desc_alloc(ioat, 1); - desc = ioat2_get_ring_ent(ioat, idx); + desc = ioat2_get_ring_ent(ioat, ioat->head); hw = desc->hw; hw->ctl = 0; @@ -117,14 +113,16 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) async_tx_ack(&desc->txd); ioat2_set_chainaddr(ioat, desc->txd.phys); dump_desc_dbg(ioat, desc); + wmb(); + ioat->head += 1; __ioat2_issue_pending(ioat); } static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) { - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&ioat->prep_lock); __ioat2_start_null_desc(ioat); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); } static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) @@ -134,15 +132,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) struct ioat_ring_ent *desc; bool seen_current = false; u16 active; - int i; + int idx = ioat->tail, i; dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); active = ioat2_ring_active(ioat); for (i = 0; i < active && !seen_current; i++) { - prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + smp_read_barrier_depends(); + prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); + desc = ioat2_get_ring_ent(ioat, idx + i); tx = &desc->txd; dump_desc_dbg(ioat, desc); if (tx->cookie) { @@ -158,11 +157,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) if (tx->phys == phys_complete) seen_current = true; } - ioat->tail += i; + smp_mb(); /* finish all descriptor reads before incrementing tail */ + ioat->tail = idx + i; BUG_ON(active && !seen_current); /* no active descs have written a completion? */ chan->last_completion = phys_complete; - if (ioat->head == ioat->tail) { + if (active - i == 0) { dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", __func__); clear_bit(IOAT_COMPLETION_PENDING, &chan->state); @@ -179,24 +179,9 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) struct ioat_chan_common *chan = &ioat->base; unsigned long phys_complete; - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; - - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } - - if (!spin_trylock_bh(&ioat->ring_lock)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } - - __cleanup(ioat, phys_complete); - - spin_unlock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); spin_unlock_bh(&chan->cleanup_lock); } @@ -287,12 +272,10 @@ void ioat2_timer_event(unsigned long data) struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); struct ioat_chan_common *chan = &ioat->base; - spin_lock_bh(&chan->cleanup_lock); if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { unsigned long phys_complete; u64 status; - spin_lock_bh(&ioat->ring_lock); status = ioat_chansts(chan); /* when halted due to errors check for channel @@ -311,26 +294,31 @@ void ioat2_timer_event(unsigned long data) * acknowledged a pending completion once, then be more * forceful with a restart */ - if (ioat_cleanup_preamble(chan, &phys_complete)) + spin_lock_bh(&chan->cleanup_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) { __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + } else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); ioat2_restart_channel(ioat); - else { + spin_unlock_bh(&ioat->prep_lock); + } else { set_bit(IOAT_COMPLETION_ACK, &chan->state); mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); } - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); } else { u16 active; /* if the ring is idle, empty, and oversized try to step * down the size */ - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); active = ioat2_ring_active(ioat); if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) reshape_ring(ioat, ioat->alloc_order-1); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); /* keep shrinking until we get back to our minimum * default size @@ -338,7 +326,6 @@ void ioat2_timer_event(unsigned long data) if (ioat->alloc_order > ioat_get_alloc_order()) mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } - spin_unlock_bh(&chan->cleanup_lock); } static int ioat2_reset_hw(struct ioat_chan_common *chan) @@ -392,7 +379,7 @@ int ioat2_enumerate_channels(struct ioatdma_device *device) ioat_init_channel(device, &ioat->base, i); ioat->xfercap_log = xfercap_log; - spin_lock_init(&ioat->ring_lock); + spin_lock_init(&ioat->prep_lock); if (device->reset_hw(&ioat->base)) { i = 0; break; @@ -418,8 +405,17 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ + wmb(); + + ioat->head += ioat->produce; + ioat2_update_pending(ioat); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); return cookie; } @@ -531,13 +527,15 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) if (!ring) return -ENOMEM; - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); ioat->ring = ring; ioat->head = 0; ioat->issued = 0; ioat->tail = 0; ioat->alloc_order = order; - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); tasklet_enable(&chan->cleanup_task); ioat2_start_null_desc(ioat); @@ -553,7 +551,7 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order) */ struct ioat_chan_common *chan = &ioat->base; struct dma_chan *c = &chan->common; - const u16 curr_size = ioat2_ring_mask(ioat) + 1; + const u16 curr_size = ioat2_ring_size(ioat); const u16 active = ioat2_ring_active(ioat); const u16 new_size = 1 << order; struct ioat_ring_ent **ring; @@ -653,54 +651,61 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order) } /** - * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops - * @idx: gets starting descriptor index on successful allocation + * ioat2_check_space_lock - verify space and grab ring producer lock * @ioat: ioat2,3 channel (ring) to operate on * @num_descs: allocation length */ -int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs) { struct ioat_chan_common *chan = &ioat->base; + bool retry; - spin_lock_bh(&ioat->ring_lock); + retry: + spin_lock_bh(&ioat->prep_lock); /* never allow the last descriptor to be consumed, we need at * least one free at all times to allow for on-the-fly ring * resizing. */ - while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { - if (reshape_ring(ioat, ioat->alloc_order + 1) && - ioat2_ring_space(ioat) > num_descs) - break; - - if (printk_ratelimit()) - dev_dbg(to_dev(chan), - "%s: ring full! num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, - ioat->issued); - spin_unlock_bh(&ioat->ring_lock); - - /* progress reclaim in the allocation failure case we - * may be called under bh_disabled so we need to trigger - * the timer event directly - */ - spin_lock_bh(&chan->cleanup_lock); - if (jiffies > chan->timer.expires && - timer_pending(&chan->timer)) { - struct ioatdma_device *device = chan->device; - - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - spin_unlock_bh(&chan->cleanup_lock); - device->timer_fn((unsigned long) &chan->common); - } else - spin_unlock_bh(&chan->cleanup_lock); - return -ENOMEM; + if (likely(ioat2_ring_space(ioat) > num_descs)) { + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + ioat->produce = num_descs; + return 0; /* with ioat->prep_lock held */ } + retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state); + spin_unlock_bh(&ioat->prep_lock); - dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + /* is another cpu already trying to expand the ring? */ + if (retry) + goto retry; - *idx = ioat2_desc_alloc(ioat, num_descs); - return 0; /* with ioat->ring_lock held */ + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); + retry = reshape_ring(ioat, ioat->alloc_order + 1); + clear_bit(IOAT_RESHAPE_PENDING, &chan->state); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); + + /* if we were able to expand the ring retry the allocation */ + if (retry) + goto retry; + + if (printk_ratelimit()) + dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + device->timer_fn((unsigned long) &chan->common); + } + + return -ENOMEM; } struct dma_async_tx_descriptor * @@ -713,14 +718,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dst = dma_dest; dma_addr_t src = dma_src; size_t total_len = len; - int num_descs; - u16 idx; - int i; + int num_descs, idx, i; num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && - ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) - /* pass */; + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; else return NULL; i = 0; @@ -777,7 +779,8 @@ void ioat2_free_chan_resources(struct dma_chan *c) device->cleanup_fn((unsigned long) c); device->reset_hw(chan); - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); descs = ioat2_ring_space(ioat); dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); for (i = 0; i < descs; i++) { @@ -800,7 +803,8 @@ void ioat2_free_chan_resources(struct dma_chan *c) ioat->alloc_order = 0; pci_pool_free(device->completion_pool, chan->completion, chan->completion_dma); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); chan->last_completion = 0; chan->completion_dma = 0; @@ -855,7 +859,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) dma->device_issue_pending = ioat2_issue_pending; dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_is_tx_complete = ioat_is_dma_complete; + dma->device_tx_status = ioat_tx_status; err = ioat_probe(device); if (err) diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index ef2871fd7868..a2c413b2b8d8 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -22,6 +22,7 @@ #define IOATDMA_V2_H #include <linux/dmaengine.h> +#include <linux/circ_buf.h> #include "dma.h" #include "hw.h" @@ -49,8 +50,9 @@ extern int ioat_ring_alloc_order; * @tail: cleanup index * @dmacount: identical to 'head' except for occasionally resetting to zero * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time * @ring: software ring buffer implementation of hardware ring - * @ring_lock: protects ring attributes + * @prep_lock: serializes descriptor preparation (producers) */ struct ioat2_dma_chan { struct ioat_chan_common base; @@ -60,8 +62,9 @@ struct ioat2_dma_chan { u16 tail; u16 dmacount; u16 alloc_order; + u16 produce; struct ioat_ring_ent **ring; - spinlock_t ring_lock; + spinlock_t prep_lock; }; static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) @@ -71,38 +74,26 @@ static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) return container_of(chan, struct ioat2_dma_chan, base); } -static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) +static inline u16 ioat2_ring_size(struct ioat2_dma_chan *ioat) { - return (1 << ioat->alloc_order) - 1; + return 1 << ioat->alloc_order; } /* count of descriptors in flight with the engine */ static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) { - return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); + return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat)); } /* count of descriptors pending submission to hardware */ static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) { - return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); + return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat)); } static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) { - u16 num_descs = ioat2_ring_mask(ioat) + 1; - u16 active = ioat2_ring_active(ioat); - - BUG_ON(active > num_descs); - - return num_descs - active; -} - -/* assumes caller already checked space */ -static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) -{ - ioat->head += len; - return ioat->head - len; + return ioat2_ring_size(ioat) - ioat2_ring_active(ioat); } static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) @@ -151,7 +142,7 @@ struct ioat_ring_ent { static inline struct ioat_ring_ent * ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) { - return ioat->ring[idx & ioat2_ring_mask(ioat)]; + return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)]; } static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) @@ -168,7 +159,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); int ioat2_enumerate_channels(struct ioatdma_device *device); struct dma_async_tx_descriptor * ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 6740e319c9cf..1cdd22e1051b 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -260,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent *desc; bool seen_current = false; + int idx = ioat->tail, i; u16 active; - int i; dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", __func__, ioat->head, ioat->tail, ioat->issued); @@ -270,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) for (i = 0; i < active && !seen_current; i++) { struct dma_async_tx_descriptor *tx; - prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + smp_read_barrier_depends(); + prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); + desc = ioat2_get_ring_ent(ioat, idx + i); dump_desc_dbg(ioat, desc); tx = &desc->txd; if (tx->cookie) { chan->completed_cookie = tx->cookie; - ioat3_dma_unmap(ioat, desc, ioat->tail + i); + ioat3_dma_unmap(ioat, desc, idx + i); tx->cookie = 0; if (tx->callback) { tx->callback(tx->callback_param); @@ -293,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) i++; } } - ioat->tail += i; + smp_mb(); /* finish all descriptor reads before incrementing tail */ + ioat->tail = idx + i; BUG_ON(active && !seen_current); /* no active descs have written a completion? */ chan->last_completion = phys_complete; - active = ioat2_ring_active(ioat); - if (active == 0) { + if (active - i == 0) { dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", __func__); clear_bit(IOAT_COMPLETION_PENDING, &chan->state); mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } /* 5 microsecond delay per pending descriptor */ - writew(min((5 * active), IOAT_INTRDELAY_MASK), + writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), chan->device->reg_base + IOAT_INTRDELAY_OFFSET); } -/* try to cleanup, but yield (via spin_trylock) to incoming submissions - * with the expectation that we will immediately poll again shortly - */ -static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat) +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; unsigned long phys_complete; - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; - - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } - - if (!spin_trylock_bh(&ioat->ring_lock)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } - - __cleanup(ioat, phys_complete); - - spin_unlock_bh(&ioat->ring_lock); - spin_unlock_bh(&chan->cleanup_lock); -} - -/* run cleanup now because we already delayed the interrupt via INTRDELAY */ -static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; - - prefetch(chan->completion); - spin_lock_bh(&chan->cleanup_lock); - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } - spin_lock_bh(&ioat->ring_lock); - - __cleanup(ioat, phys_complete); - - spin_unlock_bh(&ioat->ring_lock); + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); spin_unlock_bh(&chan->cleanup_lock); } @@ -363,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data) { struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - ioat3_cleanup_sync(ioat); + ioat3_cleanup(ioat); writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } @@ -384,12 +346,10 @@ static void ioat3_timer_event(unsigned long data) struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); struct ioat_chan_common *chan = &ioat->base; - spin_lock_bh(&chan->cleanup_lock); if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { unsigned long phys_complete; u64 status; - spin_lock_bh(&ioat->ring_lock); status = ioat_chansts(chan); /* when halted due to errors check for channel @@ -408,26 +368,31 @@ static void ioat3_timer_event(unsigned long data) * acknowledged a pending completion once, then be more * forceful with a restart */ + spin_lock_bh(&chan->cleanup_lock); if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { + spin_lock_bh(&ioat->prep_lock); ioat3_restart_channel(ioat); - else { + spin_unlock_bh(&ioat->prep_lock); + } else { set_bit(IOAT_COMPLETION_ACK, &chan->state); mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); } - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); } else { u16 active; /* if the ring is idle, empty, and oversized try to step * down the size */ - spin_lock_bh(&ioat->ring_lock); + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->prep_lock); active = ioat2_ring_active(ioat); if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) reshape_ring(ioat, ioat->alloc_order-1); - spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&ioat->prep_lock); + spin_unlock_bh(&chan->cleanup_lock); /* keep shrinking until we get back to our minimum * default size @@ -435,21 +400,20 @@ static void ioat3_timer_event(unsigned long data) if (ioat->alloc_order > ioat_get_alloc_order()) mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); } - spin_unlock_bh(&chan->cleanup_lock); } static enum dma_status -ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS) return DMA_SUCCESS; - ioat3_cleanup_poll(ioat); + ioat3_cleanup(ioat); - return ioat_is_complete(c, cookie, done, used); + return ioat_tx_status(c, cookie, txstate); } static struct dma_async_tx_descriptor * @@ -460,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, struct ioat_ring_ent *desc; size_t total_len = len; struct ioat_fill_descriptor *fill; - int num_descs; u64 src_data = (0x0101010101010101ULL) * (value & 0xff); - u16 idx; - int i; + int num_descs, idx, i; num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && - ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) - /* pass */; + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) + idx = ioat->head; else return NULL; i = 0; @@ -513,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, struct ioat_xor_descriptor *xor; struct ioat_xor_ext_descriptor *xor_ex = NULL; struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; u32 offset = 0; - int num_descs; - int with_ext; - int i; - u16 idx; u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; BUG_ON(src_cnt < 2); @@ -537,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, * (legacy) descriptor to ensure all completion writes arrive in * order. */ - if (likely(num_descs) && - ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) - /* pass */; + if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) + idx = ioat->head; else return NULL; i = 0; @@ -657,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, struct ioat_pq_ext_descriptor *pq_ex = NULL; struct ioat_dma_descriptor *hw; u32 offset = 0; - int num_descs; - int with_ext; - int i, s; - u16 idx; u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; dev_dbg(to_dev(chan), "%s\n", __func__); /* the engine requires at least two sources (we provide @@ -687,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, * order. */ if (likely(num_descs) && - ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) - /* pass */; + ioat2_check_space_lock(ioat, num_descs+1) == 0) + idx = ioat->head; else return NULL; i = 0; @@ -851,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_ring_ent *desc; struct ioat_dma_descriptor *hw; - u16 idx; - if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) - desc = ioat2_get_ring_ent(ioat, idx); + if (ioat2_check_space_lock(ioat, 1) == 0) + desc = ioat2_get_ring_ent(ioat, ioat->head); else return NULL; @@ -977,7 +930,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test xor timed out\n"); err = -ENODEV; goto free_resources; @@ -1031,7 +984,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test validate timed out\n"); err = -ENODEV; goto free_resources; @@ -1072,7 +1025,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test memset timed out\n"); err = -ENODEV; goto free_resources; @@ -1115,7 +1068,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test 2nd validate timed out\n"); err = -ENODEV; goto free_resources; @@ -1222,7 +1175,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) if (cap & IOAT_CAP_XOR) { is_raid_device = true; dma->max_xor = 8; - dma->xor_align = 2; + dma->xor_align = 6; dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_xor; @@ -1233,7 +1186,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) if (cap & IOAT_CAP_PQ) { is_raid_device = true; dma_set_maxpq(dma, 8, 0); - dma->pq_align = 2; + dma->pq_align = 6; dma_cap_set(DMA_PQ, dma->cap_mask); dma->device_prep_dma_pq = ioat3_prep_pq; @@ -1243,7 +1196,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) if (!(cap & IOAT_CAP_XOR)) { dma->max_xor = 8; - dma->xor_align = 2; + dma->xor_align = 6; dma_cap_set(DMA_XOR, dma->cap_mask); dma->device_prep_dma_xor = ioat3_prep_pqxor; @@ -1259,11 +1212,11 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) if (is_raid_device) { - dma->device_is_tx_complete = ioat3_is_complete; + dma->device_tx_status = ioat3_tx_status; device->cleanup_fn = ioat3_cleanup_event; device->timer_fn = ioat3_timer_event; } else { - dma->device_is_tx_complete = ioat_is_dma_complete; + dma->device_tx_status = ioat_dma_tx_status; device->cleanup_fn = ioat2_cleanup_event; device->timer_fn = ioat2_timer_event; } diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 99ec26725bae..fab37d1cf48d 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -138,15 +138,10 @@ static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_devic if (err) return err; - device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); - if (!device) - return -ENOMEM; - - pci_set_master(pdev); - device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); if (!device) return -ENOMEM; + pci_set_master(pdev); pci_set_drvdata(pdev, device); device->version = readb(device->reg_base + IOAT_VER_OFFSET); diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 1ebc801678b0..161c452923b8 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -894,14 +894,14 @@ static void iop_adma_free_chan_resources(struct dma_chan *chan) } /** - * iop_adma_is_complete - poll the status of an ADMA transaction + * iop_adma_status - poll the status of an ADMA transaction * @chan: ADMA channel handle * @cookie: ADMA transaction identifier + * @txstate: a holder for the current state of the channel or NULL */ -static enum dma_status iop_adma_is_complete(struct dma_chan *chan, +static enum dma_status iop_adma_status(struct dma_chan *chan, dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) + struct dma_tx_state *txstate) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); dma_cookie_t last_used; @@ -910,12 +910,7 @@ static enum dma_status iop_adma_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = iop_chan->completed_cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - + dma_set_tx_state(txstate, last_complete, last_used, 0); ret = dma_async_is_complete(cookie, last_complete, last_used); if (ret == DMA_SUCCESS) return ret; @@ -924,11 +919,7 @@ static enum dma_status iop_adma_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = iop_chan->completed_cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -1043,7 +1034,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(1); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test copy timed out, disabling\n"); @@ -1143,7 +1134,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test xor timed out, disabling\n"); @@ -1190,7 +1181,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test zero sum timed out, disabling\n"); err = -ENODEV; @@ -1214,7 +1205,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test memset timed out, disabling\n"); err = -ENODEV; @@ -1246,7 +1237,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test non-zero sum timed out, disabling\n"); err = -ENODEV; @@ -1341,7 +1332,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test pq timed out, disabling\n"); err = -ENODEV; @@ -1378,7 +1369,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); err = -ENODEV; @@ -1410,7 +1401,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) iop_adma_issue_pending(dma_chan); msleep(8); - if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); err = -ENODEV; @@ -1508,7 +1499,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) /* set base routines */ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; - dma_dev->device_is_tx_complete = iop_adma_is_complete; + dma_dev->device_tx_status = iop_adma_status; dma_dev->device_issue_pending = iop_adma_issue_pending; dma_dev->dev = &pdev->dev; diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 2a446397c884..cb26ee9773d6 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -1472,13 +1472,18 @@ static void idmac_issue_pending(struct dma_chan *chan) */ } -static void __idmac_terminate_all(struct dma_chan *chan) +static int __idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct idmac_channel *ichan = to_idmac_chan(chan); struct idmac *idmac = to_idmac(chan->device); unsigned long flags; int i; + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + ipu_disable_channel(idmac, ichan, ichan->status >= IPU_CHANNEL_ENABLED); @@ -1505,17 +1510,23 @@ static void __idmac_terminate_all(struct dma_chan *chan) tasklet_enable(&to_ipu(idmac)->tasklet); ichan->status = IPU_CHANNEL_INITIALIZED; + + return 0; } -static void idmac_terminate_all(struct dma_chan *chan) +static int idmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct idmac_channel *ichan = to_idmac_chan(chan); + int ret; mutex_lock(&ichan->chan_mutex); - __idmac_terminate_all(chan); + ret = __idmac_control(chan, cmd, arg); mutex_unlock(&ichan->chan_mutex); + + return ret; } #ifdef DEBUG @@ -1607,7 +1618,7 @@ static void idmac_free_chan_resources(struct dma_chan *chan) mutex_lock(&ichan->chan_mutex); - __idmac_terminate_all(chan); + __idmac_control(chan, DMA_TERMINATE_ALL, 0); if (ichan->status > IPU_CHANNEL_FREE) { #ifdef DEBUG @@ -1637,15 +1648,12 @@ static void idmac_free_chan_resources(struct dma_chan *chan) tasklet_schedule(&to_ipu(idmac)->tasklet); } -static enum dma_status idmac_is_tx_complete(struct dma_chan *chan, - dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) +static enum dma_status idmac_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) { struct idmac_channel *ichan = to_idmac_chan(chan); - if (done) - *done = ichan->completed; - if (used) - *used = chan->cookie; + dma_set_tx_state(txstate, ichan->completed, chan->cookie, 0); if (cookie != chan->cookie) return DMA_ERROR; return DMA_SUCCESS; @@ -1664,12 +1672,12 @@ static int __init ipu_idmac_init(struct ipu *ipu) dma->dev = ipu->dev; dma->device_alloc_chan_resources = idmac_alloc_chan_resources; dma->device_free_chan_resources = idmac_free_chan_resources; - dma->device_is_tx_complete = idmac_is_tx_complete; + dma->device_tx_status = idmac_tx_status; dma->device_issue_pending = idmac_issue_pending; /* Compulsory for DMA_SLAVE fields */ dma->device_prep_slave_sg = idmac_prep_slave_sg; - dma->device_terminate_all = idmac_terminate_all; + dma->device_control = idmac_control; INIT_LIST_HEAD(&dma->channels); for (i = 0; i < IPU_CHANNELS_NUM; i++) { @@ -1703,7 +1711,7 @@ static void __exit ipu_idmac_exit(struct ipu *ipu) for (i = 0; i < IPU_CHANNELS_NUM; i++) { struct idmac_channel *ichan = ipu->channel + i; - idmac_terminate_all(&ichan->dma_chan); + idmac_control(&ichan->dma_chan, DMA_TERMINATE_ALL, 0); idmac_prep_slave_sg(&ichan->dma_chan, NULL, 0, DMA_NONE, 0); } diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index bbbd58566625..201e6e19c344 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -541,8 +541,8 @@ static void mpc_dma_issue_pending(struct dma_chan *chan) /* Check request completion status */ static enum dma_status -mpc_dma_is_tx_complete(struct dma_chan *chan, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); unsigned long flags; @@ -554,12 +554,7 @@ mpc_dma_is_tx_complete(struct dma_chan *chan, dma_cookie_t cookie, last_complete = mchan->completed_cookie; spin_unlock_irqrestore(&mchan->lock, flags); - if (done) - *done = last_complete; - - if (used) - *used = last_used; - + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -663,7 +658,7 @@ static int __devinit mpc_dma_probe(struct of_device *op, } regs_start = res.start; - regs_size = res.end - res.start + 1; + regs_size = resource_size(&res); if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) { dev_err(dev, "Error requesting memory region!\n"); @@ -694,7 +689,7 @@ static int __devinit mpc_dma_probe(struct of_device *op, dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources; dma->device_free_chan_resources = mpc_dma_free_chan_resources; dma->device_issue_pending = mpc_dma_issue_pending; - dma->device_is_tx_complete = mpc_dma_is_tx_complete; + dma->device_tx_status = mpc_dma_tx_status; dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy; INIT_LIST_HEAD(&dma->channels); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index e2fd34da64f2..86c5ae9fde34 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -810,14 +810,14 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan) } /** - * mv_xor_is_complete - poll the status of an XOR transaction + * mv_xor_status - poll the status of an XOR transaction * @chan: XOR channel handle * @cookie: XOR transaction identifier + * @txstate: XOR transactions state holder (or NULL) */ -static enum dma_status mv_xor_is_complete(struct dma_chan *chan, +static enum dma_status mv_xor_status(struct dma_chan *chan, dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) + struct dma_tx_state *txstate) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); dma_cookie_t last_used; @@ -827,10 +827,7 @@ static enum dma_status mv_xor_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = mv_chan->completed_cookie; mv_chan->is_complete_cookie = cookie; - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); ret = dma_async_is_complete(cookie, last_complete, last_used); if (ret == DMA_SUCCESS) { @@ -842,11 +839,7 @@ static enum dma_status mv_xor_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = mv_chan->completed_cookie; - if (done) - *done = last_complete; - if (used) - *used = last_used; - + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -975,7 +968,7 @@ static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device) async_tx_ack(tx); msleep(1); - if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + if (mv_xor_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test copy timed out, disabling\n"); @@ -1073,7 +1066,7 @@ mv_xor_xor_self_test(struct mv_xor_device *device) async_tx_ack(tx); msleep(8); - if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + if (mv_xor_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { dev_printk(KERN_ERR, dma_chan->device->dev, "Self-test xor timed out, disabling\n"); @@ -1168,7 +1161,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev) /* set base routines */ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; - dma_dev->device_is_tx_complete = mv_xor_is_complete; + dma_dev->device_tx_status = mv_xor_status; dma_dev->device_issue_pending = mv_xor_issue_pending; dma_dev->dev = &pdev->dev; diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index d44626fa35ad..c6079fcca13f 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -3935,12 +3935,13 @@ static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan) } /** - * ppc440spe_adma_is_complete - poll the status of an ADMA transaction + * ppc440spe_adma_tx_status - poll the status of an ADMA transaction * @chan: ADMA channel handle * @cookie: ADMA transaction identifier + * @txstate: a holder for the current state of the channel */ -static enum dma_status ppc440spe_adma_is_complete(struct dma_chan *chan, - dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) +static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, struct dma_tx_state *txstate) { struct ppc440spe_adma_chan *ppc440spe_chan; dma_cookie_t last_used; @@ -3951,10 +3952,7 @@ static enum dma_status ppc440spe_adma_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = ppc440spe_chan->completed_cookie; - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); ret = dma_async_is_complete(cookie, last_complete, last_used); if (ret == DMA_SUCCESS) @@ -3965,10 +3963,7 @@ static enum dma_status ppc440spe_adma_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = ppc440spe_chan->completed_cookie; - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return dma_async_is_complete(cookie, last_complete, last_used); } @@ -4180,7 +4175,7 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) ppc440spe_adma_alloc_chan_resources; adev->common.device_free_chan_resources = ppc440spe_adma_free_chan_resources; - adev->common.device_is_tx_complete = ppc440spe_adma_is_complete; + adev->common.device_tx_status = ppc440spe_adma_tx_status; adev->common.device_issue_pending = ppc440spe_adma_issue_pending; /* Set prep routines based on capability */ diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index c2b0172a7589..a2585c90a139 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -597,12 +597,17 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( direction, flags); } -static void sh_dmae_terminate_all(struct dma_chan *chan) +static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + if (!chan) - return; + return -EINVAL; dmae_halt(sh_chan); @@ -618,6 +623,8 @@ static void sh_dmae_terminate_all(struct dma_chan *chan) spin_unlock_bh(&sh_chan->desc_lock); sh_dmae_chan_ld_cleanup(sh_chan, true); + + return 0; } static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) @@ -749,10 +756,9 @@ static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) sh_chan_xfer_ld_queue(sh_chan); } -static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, +static enum dma_status sh_dmae_tx_status(struct dma_chan *chan, dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) + struct dma_tx_state *txstate) { struct sh_dmae_chan *sh_chan = to_sh_chan(chan); dma_cookie_t last_used; @@ -764,12 +770,7 @@ static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = sh_chan->completed_cookie; BUG_ON(last_complete < 0); - - if (done) - *done = last_complete; - - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); spin_lock_bh(&sh_chan->desc_lock); @@ -1041,12 +1042,12 @@ static int __init sh_dmae_probe(struct platform_device *pdev) = sh_dmae_alloc_chan_resources; shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; - shdev->common.device_is_tx_complete = sh_dmae_is_complete; + shdev->common.device_tx_status = sh_dmae_tx_status; shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; /* Compulsory for DMA_SLAVE fields */ shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg; - shdev->common.device_terminate_all = sh_dmae_terminate_all; + shdev->common.device_control = sh_dmae_control; shdev->common.dev = &pdev->dev; /* Default transfer size of 32 bytes requires 32-byte alignment */ diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c new file mode 100644 index 000000000000..c426829f6ab8 --- /dev/null +++ b/drivers/dma/ste_dma40.c @@ -0,0 +1,2657 @@ +/* + * driver/dma/ste_dma40.c + * + * Copyright (C) ST-Ericsson 2007-2010 + * License terms: GNU General Public License (GPL) version 2 + * Author: Per Friden <per.friden@stericsson.com> + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> + * + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/delay.h> + +#include <plat/ste_dma40.h> + +#include "ste_dma40_ll.h" + +#define D40_NAME "dma40" + +#define D40_PHY_CHAN -1 + +/* For masking out/in 2 bit channel positions */ +#define D40_CHAN_POS(chan) (2 * (chan / 2)) +#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan)) + +/* Maximum iterations taken before giving up suspending a channel */ +#define D40_SUSPEND_MAX_IT 500 + +#define D40_ALLOC_FREE (1 << 31) +#define D40_ALLOC_PHY (1 << 30) +#define D40_ALLOC_LOG_FREE 0 + +/* The number of free d40_desc to keep in memory before starting + * to kfree() them */ +#define D40_DESC_CACHE_SIZE 50 + +/* Hardware designer of the block */ +#define D40_PERIPHID2_DESIGNER 0x8 + +/** + * enum 40_command - The different commands and/or statuses. + * + * @D40_DMA_STOP: DMA channel command STOP or status STOPPED, + * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN. + * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible. + * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED. + */ +enum d40_command { + D40_DMA_STOP = 0, + D40_DMA_RUN = 1, + D40_DMA_SUSPEND_REQ = 2, + D40_DMA_SUSPENDED = 3 +}; + +/** + * struct d40_lli_pool - Structure for keeping LLIs in memory + * + * @base: Pointer to memory area when the pre_alloc_lli's are not large + * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if + * pre_alloc_lli is used. + * @size: The size in bytes of the memory at base or the size of pre_alloc_lli. + * @pre_alloc_lli: Pre allocated area for the most common case of transfers, + * one buffer to one buffer. + */ +struct d40_lli_pool { + void *base; + int size; + /* Space for dst and src, plus an extra for padding */ + u8 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)]; +}; + +/** + * struct d40_desc - A descriptor is one DMA job. + * + * @lli_phy: LLI settings for physical channel. Both src and dst= + * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if + * lli_len equals one. + * @lli_log: Same as above but for logical channels. + * @lli_pool: The pool with two entries pre-allocated. + * @lli_len: Number of LLI's in lli_pool + * @lli_tcount: Number of LLIs processed in the transfer. When equals lli_len + * then this transfer job is done. + * @txd: DMA engine struct. Used for among other things for communication + * during a transfer. + * @node: List entry. + * @dir: The transfer direction of this job. + * @is_in_client_list: true if the client owns this descriptor. + * + * This descriptor is used for both logical and physical transfers. + */ + +struct d40_desc { + /* LLI physical */ + struct d40_phy_lli_bidir lli_phy; + /* LLI logical */ + struct d40_log_lli_bidir lli_log; + + struct d40_lli_pool lli_pool; + u32 lli_len; + u32 lli_tcount; + + struct dma_async_tx_descriptor txd; + struct list_head node; + + enum dma_data_direction dir; + bool is_in_client_list; +}; + +/** + * struct d40_lcla_pool - LCLA pool settings and data. + * + * @base: The virtual address of LCLA. + * @phy: Physical base address of LCLA. + * @base_size: size of lcla. + * @lock: Lock to protect the content in this struct. + * @alloc_map: Mapping between physical channel and LCLA entries. + * @num_blocks: The number of entries of alloc_map. Equals to the + * number of physical channels. + */ +struct d40_lcla_pool { + void *base; + dma_addr_t phy; + resource_size_t base_size; + spinlock_t lock; + u32 *alloc_map; + int num_blocks; +}; + +/** + * struct d40_phy_res - struct for handling eventlines mapped to physical + * channels. + * + * @lock: A lock protection this entity. + * @num: The physical channel number of this entity. + * @allocated_src: Bit mapped to show which src event line's are mapped to + * this physical channel. Can also be free or physically allocated. + * @allocated_dst: Same as for src but is dst. + * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as + * event line number. Both allocated_src and allocated_dst can not be + * allocated to a physical channel, since the interrupt handler has then + * no way of figure out which one the interrupt belongs to. + */ +struct d40_phy_res { + spinlock_t lock; + int num; + u32 allocated_src; + u32 allocated_dst; +}; + +struct d40_base; + +/** + * struct d40_chan - Struct that describes a channel. + * + * @lock: A spinlock to protect this struct. + * @log_num: The logical number, if any of this channel. + * @completed: Starts with 1, after first interrupt it is set to dma engine's + * current cookie. + * @pending_tx: The number of pending transfers. Used between interrupt handler + * and tasklet. + * @busy: Set to true when transfer is ongoing on this channel. + * @phy_chan: Pointer to physical channel which this instance runs on. + * @chan: DMA engine handle. + * @tasklet: Tasklet that gets scheduled from interrupt context to complete a + * transfer and call client callback. + * @client: Cliented owned descriptor list. + * @active: Active descriptor. + * @queue: Queued jobs. + * @free: List of free descripts, ready to be reused. + * @free_len: Number of descriptors in the free list. + * @dma_cfg: The client configuration of this dma channel. + * @base: Pointer to the device instance struct. + * @src_def_cfg: Default cfg register setting for src. + * @dst_def_cfg: Default cfg register setting for dst. + * @log_def: Default logical channel settings. + * @lcla: Space for one dst src pair for logical channel transfers. + * @lcpa: Pointer to dst and src lcpa settings. + * + * This struct can either "be" a logical or a physical channel. + */ +struct d40_chan { + spinlock_t lock; + int log_num; + /* ID of the most recent completed transfer */ + int completed; + int pending_tx; + bool busy; + struct d40_phy_res *phy_chan; + struct dma_chan chan; + struct tasklet_struct tasklet; + struct list_head client; + struct list_head active; + struct list_head queue; + struct list_head free; + int free_len; + struct stedma40_chan_cfg dma_cfg; + struct d40_base *base; + /* Default register configurations */ + u32 src_def_cfg; + u32 dst_def_cfg; + struct d40_def_lcsp log_def; + struct d40_lcla_elem lcla; + struct d40_log_lli_full *lcpa; +}; + +/** + * struct d40_base - The big global struct, one for each probe'd instance. + * + * @interrupt_lock: Lock used to make sure one interrupt is handle a time. + * @execmd_lock: Lock for execute command usage since several channels share + * the same physical register. + * @dev: The device structure. + * @virtbase: The virtual base address of the DMA's register. + * @clk: Pointer to the DMA clock structure. + * @phy_start: Physical memory start of the DMA registers. + * @phy_size: Size of the DMA register map. + * @irq: The IRQ number. + * @num_phy_chans: The number of physical channels. Read from HW. This + * is the number of available channels for this driver, not counting "Secure + * mode" allocated physical channels. + * @num_log_chans: The number of logical channels. Calculated from + * num_phy_chans. + * @dma_both: dma_device channels that can do both memcpy and slave transfers. + * @dma_slave: dma_device channels that can do only do slave transfers. + * @dma_memcpy: dma_device channels that can do only do memcpy transfers. + * @phy_chans: Room for all possible physical channels in system. + * @log_chans: Room for all possible logical channels in system. + * @lookup_log_chans: Used to map interrupt number to logical channel. Points + * to log_chans entries. + * @lookup_phy_chans: Used to map interrupt number to physical channel. Points + * to phy_chans entries. + * @plat_data: Pointer to provided platform_data which is the driver + * configuration. + * @phy_res: Vector containing all physical channels. + * @lcla_pool: lcla pool settings and data. + * @lcpa_base: The virtual mapped address of LCPA. + * @phy_lcpa: The physical address of the LCPA. + * @lcpa_size: The size of the LCPA area. + */ +struct d40_base { + spinlock_t interrupt_lock; + spinlock_t execmd_lock; + struct device *dev; + void __iomem *virtbase; + struct clk *clk; + phys_addr_t phy_start; + resource_size_t phy_size; + int irq; + int num_phy_chans; + int num_log_chans; + struct dma_device dma_both; + struct dma_device dma_slave; + struct dma_device dma_memcpy; + struct d40_chan *phy_chans; + struct d40_chan *log_chans; + struct d40_chan **lookup_log_chans; + struct d40_chan **lookup_phy_chans; + struct stedma40_platform_data *plat_data; + /* Physical half channels */ + struct d40_phy_res *phy_res; + struct d40_lcla_pool lcla_pool; + void *lcpa_base; + dma_addr_t phy_lcpa; + resource_size_t lcpa_size; +}; + +/** + * struct d40_interrupt_lookup - lookup table for interrupt handler + * + * @src: Interrupt mask register. + * @clr: Interrupt clear register. + * @is_error: true if this is an error interrupt. + * @offset: start delta in the lookup_log_chans in d40_base. If equals to + * D40_PHY_CHAN, the lookup_phy_chans shall be used instead. + */ +struct d40_interrupt_lookup { + u32 src; + u32 clr; + bool is_error; + int offset; +}; + +/** + * struct d40_reg_val - simple lookup struct + * + * @reg: The register. + * @val: The value that belongs to the register in reg. + */ +struct d40_reg_val { + unsigned int reg; + unsigned int val; +}; + +static int d40_pool_lli_alloc(struct d40_desc *d40d, + int lli_len, bool is_log) +{ + u32 align; + void *base; + + if (is_log) + align = sizeof(struct d40_log_lli); + else + align = sizeof(struct d40_phy_lli); + + if (lli_len == 1) { + base = d40d->lli_pool.pre_alloc_lli; + d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli); + d40d->lli_pool.base = NULL; + } else { + d40d->lli_pool.size = ALIGN(lli_len * 2 * align, align); + + base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT); + d40d->lli_pool.base = base; + + if (d40d->lli_pool.base == NULL) + return -ENOMEM; + } + + if (is_log) { + d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base, + align); + d40d->lli_log.dst = PTR_ALIGN(d40d->lli_log.src + lli_len, + align); + } else { + d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base, + align); + d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len, + align); + + d40d->lli_phy.src_addr = virt_to_phys(d40d->lli_phy.src); + d40d->lli_phy.dst_addr = virt_to_phys(d40d->lli_phy.dst); + } + + return 0; +} + +static void d40_pool_lli_free(struct d40_desc *d40d) +{ + kfree(d40d->lli_pool.base); + d40d->lli_pool.base = NULL; + d40d->lli_pool.size = 0; + d40d->lli_log.src = NULL; + d40d->lli_log.dst = NULL; + d40d->lli_phy.src = NULL; + d40d->lli_phy.dst = NULL; + d40d->lli_phy.src_addr = 0; + d40d->lli_phy.dst_addr = 0; +} + +static dma_cookie_t d40_assign_cookie(struct d40_chan *d40c, + struct d40_desc *desc) +{ + dma_cookie_t cookie = d40c->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + d40c->chan.cookie = cookie; + desc->txd.cookie = cookie; + + return cookie; +} + +static void d40_desc_reset(struct d40_desc *d40d) +{ + d40d->lli_tcount = 0; +} + +static void d40_desc_remove(struct d40_desc *d40d) +{ + list_del(&d40d->node); +} + +static struct d40_desc *d40_desc_get(struct d40_chan *d40c) +{ + struct d40_desc *desc; + struct d40_desc *d; + struct d40_desc *_d; + + if (!list_empty(&d40c->client)) { + list_for_each_entry_safe(d, _d, &d40c->client, node) + if (async_tx_test_ack(&d->txd)) { + d40_pool_lli_free(d); + d40_desc_remove(d); + desc = d; + goto out; + } + } + + if (list_empty(&d40c->free)) { + /* Alloc new desc because we're out of used ones */ + desc = kzalloc(sizeof(struct d40_desc), GFP_NOWAIT); + if (desc == NULL) + goto out; + INIT_LIST_HEAD(&desc->node); + } else { + /* Reuse an old desc. */ + desc = list_first_entry(&d40c->free, + struct d40_desc, + node); + list_del(&desc->node); + d40c->free_len--; + } +out: + return desc; +} + +static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d) +{ + if (d40c->free_len < D40_DESC_CACHE_SIZE) { + list_add_tail(&d40d->node, &d40c->free); + d40c->free_len++; + } else + kfree(d40d); +} + +static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc) +{ + list_add_tail(&desc->node, &d40c->active); +} + +static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->active)) + return NULL; + + d = list_first_entry(&d40c->active, + struct d40_desc, + node); + return d; +} + +static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) +{ + list_add_tail(&desc->node, &d40c->queue); +} + +static struct d40_desc *d40_first_queued(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->queue)) + return NULL; + + d = list_first_entry(&d40c->queue, + struct d40_desc, + node); + return d; +} + +/* Support functions for logical channels */ + +static int d40_lcla_id_get(struct d40_chan *d40c, + struct d40_lcla_pool *pool) +{ + int src_id = 0; + int dst_id = 0; + struct d40_log_lli *lcla_lidx_base = + pool->base + d40c->phy_chan->num * 1024; + int i; + int lli_per_log = d40c->base->plat_data->llis_per_log; + + if (d40c->lcla.src_id >= 0 && d40c->lcla.dst_id >= 0) + return 0; + + if (pool->num_blocks > 32) + return -EINVAL; + + spin_lock(&pool->lock); + + for (i = 0; i < pool->num_blocks; i++) { + if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) { + pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i); + break; + } + } + src_id = i; + if (src_id >= pool->num_blocks) + goto err; + + for (; i < pool->num_blocks; i++) { + if (!(pool->alloc_map[d40c->phy_chan->num] & (0x1 << i))) { + pool->alloc_map[d40c->phy_chan->num] |= (0x1 << i); + break; + } + } + + dst_id = i; + if (dst_id == src_id) + goto err; + + d40c->lcla.src_id = src_id; + d40c->lcla.dst_id = dst_id; + d40c->lcla.dst = lcla_lidx_base + dst_id * lli_per_log + 1; + d40c->lcla.src = lcla_lidx_base + src_id * lli_per_log + 1; + + + spin_unlock(&pool->lock); + return 0; +err: + spin_unlock(&pool->lock); + return -EINVAL; +} + +static void d40_lcla_id_put(struct d40_chan *d40c, + struct d40_lcla_pool *pool, + int id) +{ + if (id < 0) + return; + + d40c->lcla.src_id = -1; + d40c->lcla.dst_id = -1; + + spin_lock(&pool->lock); + pool->alloc_map[d40c->phy_chan->num] &= (~(0x1 << id)); + spin_unlock(&pool->lock); +} + +static int d40_channel_execute_command(struct d40_chan *d40c, + enum d40_command command) +{ + int status, i; + void __iomem *active_reg; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&d40c->base->execmd_lock, flags); + + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + if (command == D40_DMA_SUSPEND_REQ) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + goto done; + } + + writel(command << D40_CHAN_POS(d40c->phy_chan->num), active_reg); + + if (command == D40_DMA_SUSPEND_REQ) { + + for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) { + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + + cpu_relax(); + /* + * Reduce the number of bus accesses while + * waiting for the DMA to suspend. + */ + udelay(3); + + if (status == D40_DMA_STOP || + status == D40_DMA_SUSPENDED) + break; + } + + if (i == D40_SUSPEND_MAX_IT) { + dev_err(&d40c->chan.dev->device, + "[%s]: unable to suspend the chl %d (log: %d) status %x\n", + __func__, d40c->phy_chan->num, d40c->log_num, + status); + dump_stack(); + ret = -EBUSY; + } + + } +done: + spin_unlock_irqrestore(&d40c->base->execmd_lock, flags); + return ret; +} + +static void d40_term_all(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + struct d40_desc *d; + struct d40_desc *_d; + + /* Release active descriptors */ + while ((d40d = d40_first_active_get(d40c))) { + d40_desc_remove(d40d); + + /* Return desc to free-list */ + d40_desc_free(d40c, d40d); + } + + /* Release queued descriptors waiting for transfer */ + while ((d40d = d40_first_queued(d40c))) { + d40_desc_remove(d40d); + + /* Return desc to free-list */ + d40_desc_free(d40c, d40d); + } + + /* Release client owned descriptors */ + if (!list_empty(&d40c->client)) + list_for_each_entry_safe(d, _d, &d40c->client, node) { + d40_pool_lli_free(d); + d40_desc_remove(d); + /* Return desc to free-list */ + d40_desc_free(d40c, d40d); + } + + d40_lcla_id_put(d40c, &d40c->base->lcla_pool, + d40c->lcla.src_id); + d40_lcla_id_put(d40c, &d40c->base->lcla_pool, + d40c->lcla.dst_id); + + d40c->pending_tx = 0; + d40c->busy = false; +} + +static void d40_config_set_event(struct d40_chan *d40c, bool do_enable) +{ + u32 val; + unsigned long flags; + + if (do_enable) + val = D40_ACTIVATE_EVENTLINE; + else + val = D40_DEACTIVATE_EVENTLINE; + + spin_lock_irqsave(&d40c->phy_chan->lock, flags); + + /* Enable event line connected to device (or memcpy) */ + if ((d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) || + (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) { + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + + writel((val << D40_EVENTLINE_POS(event)) | + ~D40_EVENTLINE_MASK(event), + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); + } + if (d40c->dma_cfg.dir != STEDMA40_PERIPH_TO_MEM) { + u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + + writel((val << D40_EVENTLINE_POS(event)) | + ~D40_EVENTLINE_MASK(event), + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK); + } + + spin_unlock_irqrestore(&d40c->phy_chan->lock, flags); +} + +static u32 d40_chan_has_events(struct d40_chan *d40c) +{ + u32 val = 0; + + /* If SSLNK or SDLNK is zero all events are disabled */ + if ((d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) || + (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) + val = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSLNK); + + if (d40c->dma_cfg.dir != STEDMA40_PERIPH_TO_MEM) + val = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK); + return val; +} + +static void d40_config_enable_lidx(struct d40_chan *d40c) +{ + /* Set LIDX for lcla */ + writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & + D40_SREG_ELEM_LOG_LIDX_MASK, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT); + + writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) & + D40_SREG_ELEM_LOG_LIDX_MASK, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT); +} + +static int d40_config_write(struct d40_chan *d40c) +{ + u32 addr_base; + u32 var; + int res; + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res) + return res; + + /* Odd addresses are even addresses + 4 */ + addr_base = (d40c->phy_chan->num % 2) * 4; + /* Setup channel mode to logical or physical */ + var = ((u32)(d40c->log_num != D40_PHY_CHAN) + 1) << + D40_CHAN_POS(d40c->phy_chan->num); + writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base); + + /* Setup operational mode option register */ + var = ((d40c->dma_cfg.channel_type >> STEDMA40_INFO_CH_MODE_OPT_POS) & + 0x3) << D40_CHAN_POS(d40c->phy_chan->num); + + writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base); + + if (d40c->log_num != D40_PHY_CHAN) { + /* Set default config for CFG reg */ + writel(d40c->src_def_cfg, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SSCFG); + writel(d40c->dst_def_cfg, + d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDCFG); + + d40_config_enable_lidx(d40c); + } + return res; +} + +static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d) +{ + + if (d40d->lli_phy.dst && d40d->lli_phy.src) { + d40_phy_lli_write(d40c->base->virtbase, + d40c->phy_chan->num, + d40d->lli_phy.dst, + d40d->lli_phy.src); + d40d->lli_tcount = d40d->lli_len; + } else if (d40d->lli_log.dst && d40d->lli_log.src) { + u32 lli_len; + struct d40_log_lli *src = d40d->lli_log.src; + struct d40_log_lli *dst = d40d->lli_log.dst; + + src += d40d->lli_tcount; + dst += d40d->lli_tcount; + + if (d40d->lli_len <= d40c->base->plat_data->llis_per_log) + lli_len = d40d->lli_len; + else + lli_len = d40c->base->plat_data->llis_per_log; + d40d->lli_tcount += lli_len; + d40_log_lli_write(d40c->lcpa, d40c->lcla.src, + d40c->lcla.dst, + dst, src, + d40c->base->plat_data->llis_per_log); + } +} + +static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct d40_chan *d40c = container_of(tx->chan, + struct d40_chan, + chan); + struct d40_desc *d40d = container_of(tx, struct d40_desc, txd); + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + tx->cookie = d40_assign_cookie(d40c, d40d); + + d40_desc_queue(d40c, d40d); + + spin_unlock_irqrestore(&d40c->lock, flags); + + return tx->cookie; +} + +static int d40_start(struct d40_chan *d40c) +{ + int err; + + if (d40c->log_num != D40_PHY_CHAN) { + err = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (err) + return err; + d40_config_set_event(d40c, true); + } + + err = d40_channel_execute_command(d40c, D40_DMA_RUN); + + return err; +} + +static struct d40_desc *d40_queue_start(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + int err; + + /* Start queued jobs, if any */ + d40d = d40_first_queued(d40c); + + if (d40d != NULL) { + d40c->busy = true; + + /* Remove from queue */ + d40_desc_remove(d40d); + + /* Add to active queue */ + d40_desc_submit(d40c, d40d); + + /* Initiate DMA job */ + d40_desc_load(d40c, d40d); + + /* Start dma job */ + err = d40_start(d40c); + + if (err) + return NULL; + } + + return d40d; +} + +/* called from interrupt context */ +static void dma_tc_handle(struct d40_chan *d40c) +{ + struct d40_desc *d40d; + + if (!d40c->phy_chan) + return; + + /* Get first active entry from list */ + d40d = d40_first_active_get(d40c); + + if (d40d == NULL) + return; + + if (d40d->lli_tcount < d40d->lli_len) { + + d40_desc_load(d40c, d40d); + /* Start dma job */ + (void) d40_start(d40c); + return; + } + + if (d40_queue_start(d40c) == NULL) + d40c->busy = false; + + d40c->pending_tx++; + tasklet_schedule(&d40c->tasklet); + +} + +static void dma_tasklet(unsigned long data) +{ + struct d40_chan *d40c = (struct d40_chan *) data; + struct d40_desc *d40d_fin; + unsigned long flags; + dma_async_tx_callback callback; + void *callback_param; + + spin_lock_irqsave(&d40c->lock, flags); + + /* Get first active entry from list */ + d40d_fin = d40_first_active_get(d40c); + + if (d40d_fin == NULL) + goto err; + + d40c->completed = d40d_fin->txd.cookie; + + /* + * If terminating a channel pending_tx is set to zero. + * This prevents any finished active jobs to return to the client. + */ + if (d40c->pending_tx == 0) { + spin_unlock_irqrestore(&d40c->lock, flags); + return; + } + + /* Callback to client */ + callback = d40d_fin->txd.callback; + callback_param = d40d_fin->txd.callback_param; + + if (async_tx_test_ack(&d40d_fin->txd)) { + d40_pool_lli_free(d40d_fin); + d40_desc_remove(d40d_fin); + /* Return desc to free-list */ + d40_desc_free(d40c, d40d_fin); + } else { + d40_desc_reset(d40d_fin); + if (!d40d_fin->is_in_client_list) { + d40_desc_remove(d40d_fin); + list_add_tail(&d40d_fin->node, &d40c->client); + d40d_fin->is_in_client_list = true; + } + } + + d40c->pending_tx--; + + if (d40c->pending_tx) + tasklet_schedule(&d40c->tasklet); + + spin_unlock_irqrestore(&d40c->lock, flags); + + if (callback) + callback(callback_param); + + return; + + err: + /* Rescue manouver if receiving double interrupts */ + if (d40c->pending_tx > 0) + d40c->pending_tx--; + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static irqreturn_t d40_handle_interrupt(int irq, void *data) +{ + static const struct d40_interrupt_lookup il[] = { + {D40_DREG_LCTIS0, D40_DREG_LCICR0, false, 0}, + {D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32}, + {D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64}, + {D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96}, + {D40_DREG_LCEIS0, D40_DREG_LCICR0, true, 0}, + {D40_DREG_LCEIS1, D40_DREG_LCICR1, true, 32}, + {D40_DREG_LCEIS2, D40_DREG_LCICR2, true, 64}, + {D40_DREG_LCEIS3, D40_DREG_LCICR3, true, 96}, + {D40_DREG_PCTIS, D40_DREG_PCICR, false, D40_PHY_CHAN}, + {D40_DREG_PCEIS, D40_DREG_PCICR, true, D40_PHY_CHAN}, + }; + + int i; + u32 regs[ARRAY_SIZE(il)]; + u32 tmp; + u32 idx; + u32 row; + long chan = -1; + struct d40_chan *d40c; + unsigned long flags; + struct d40_base *base = data; + + spin_lock_irqsave(&base->interrupt_lock, flags); + + /* Read interrupt status of both logical and physical channels */ + for (i = 0; i < ARRAY_SIZE(il); i++) + regs[i] = readl(base->virtbase + il[i].src); + + for (;;) { + + chan = find_next_bit((unsigned long *)regs, + BITS_PER_LONG * ARRAY_SIZE(il), chan + 1); + + /* No more set bits found? */ + if (chan == BITS_PER_LONG * ARRAY_SIZE(il)) + break; + + row = chan / BITS_PER_LONG; + idx = chan & (BITS_PER_LONG - 1); + + /* ACK interrupt */ + tmp = readl(base->virtbase + il[row].clr); + tmp |= 1 << idx; + writel(tmp, base->virtbase + il[row].clr); + + if (il[row].offset == D40_PHY_CHAN) + d40c = base->lookup_phy_chans[idx]; + else + d40c = base->lookup_log_chans[il[row].offset + idx]; + spin_lock(&d40c->lock); + + if (!il[row].is_error) + dma_tc_handle(d40c); + else + dev_err(base->dev, "[%s] IRQ chan: %ld offset %d idx %d\n", + __func__, chan, il[row].offset, idx); + + spin_unlock(&d40c->lock); + } + + spin_unlock_irqrestore(&base->interrupt_lock, flags); + + return IRQ_HANDLED; +} + + +static int d40_validate_conf(struct d40_chan *d40c, + struct stedma40_chan_cfg *conf) +{ + int res = 0; + u32 dst_event_group = D40_TYPE_TO_GROUP(conf->dst_dev_type); + u32 src_event_group = D40_TYPE_TO_GROUP(conf->src_dev_type); + bool is_log = (conf->channel_type & STEDMA40_CHANNEL_IN_OPER_MODE) + == STEDMA40_CHANNEL_IN_LOG_MODE; + + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH && + dst_event_group == STEDMA40_DEV_DST_MEMORY) { + dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n", + __func__); + res = -EINVAL; + } + + if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM && + src_event_group == STEDMA40_DEV_SRC_MEMORY) { + dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n", + __func__); + res = -EINVAL; + } + + if (src_event_group == STEDMA40_DEV_SRC_MEMORY && + dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) { + dev_err(&d40c->chan.dev->device, + "[%s] No event line\n", __func__); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_PERIPH_TO_PERIPH && + (src_event_group != dst_event_group)) { + dev_err(&d40c->chan.dev->device, + "[%s] Invalid event group\n", __func__); + res = -EINVAL; + } + + if (conf->dir == STEDMA40_PERIPH_TO_PERIPH) { + /* + * DMAC HW supports it. Will be added to this driver, + * in case any dma client requires it. + */ + dev_err(&d40c->chan.dev->device, + "[%s] periph to periph not supported\n", + __func__); + res = -EINVAL; + } + + return res; +} + +static bool d40_alloc_mask_set(struct d40_phy_res *phy, bool is_src, + int log_event_line, bool is_log) +{ + unsigned long flags; + spin_lock_irqsave(&phy->lock, flags); + if (!is_log) { + /* Physical interrupts are masked per physical full channel */ + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + phy->allocated_dst = D40_ALLOC_PHY; + phy->allocated_src = D40_ALLOC_PHY; + goto found; + } else + goto not_found; + } + + /* Logical channel */ + if (is_src) { + if (phy->allocated_src == D40_ALLOC_PHY) + goto not_found; + + if (phy->allocated_src == D40_ALLOC_FREE) + phy->allocated_src = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_src & (1 << log_event_line))) { + phy->allocated_src |= 1 << log_event_line; + goto found; + } else + goto not_found; + } else { + if (phy->allocated_dst == D40_ALLOC_PHY) + goto not_found; + + if (phy->allocated_dst == D40_ALLOC_FREE) + phy->allocated_dst = D40_ALLOC_LOG_FREE; + + if (!(phy->allocated_dst & (1 << log_event_line))) { + phy->allocated_dst |= 1 << log_event_line; + goto found; + } else + goto not_found; + } + +not_found: + spin_unlock_irqrestore(&phy->lock, flags); + return false; +found: + spin_unlock_irqrestore(&phy->lock, flags); + return true; +} + +static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src, + int log_event_line) +{ + unsigned long flags; + bool is_free = false; + + spin_lock_irqsave(&phy->lock, flags); + if (!log_event_line) { + /* Physical interrupts are masked per physical full channel */ + phy->allocated_dst = D40_ALLOC_FREE; + phy->allocated_src = D40_ALLOC_FREE; + is_free = true; + goto out; + } + + /* Logical channel */ + if (is_src) { + phy->allocated_src &= ~(1 << log_event_line); + if (phy->allocated_src == D40_ALLOC_LOG_FREE) + phy->allocated_src = D40_ALLOC_FREE; + } else { + phy->allocated_dst &= ~(1 << log_event_line); + if (phy->allocated_dst == D40_ALLOC_LOG_FREE) + phy->allocated_dst = D40_ALLOC_FREE; + } + + is_free = ((phy->allocated_src | phy->allocated_dst) == + D40_ALLOC_FREE); + +out: + spin_unlock_irqrestore(&phy->lock, flags); + + return is_free; +} + +static int d40_allocate_channel(struct d40_chan *d40c) +{ + int dev_type; + int event_group; + int event_line; + struct d40_phy_res *phys; + int i; + int j; + int log_num; + bool is_src; + bool is_log = (d40c->dma_cfg.channel_type & STEDMA40_CHANNEL_IN_OPER_MODE) + == STEDMA40_CHANNEL_IN_LOG_MODE; + + + phys = d40c->base->phy_res; + + if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { + dev_type = d40c->dma_cfg.src_dev_type; + log_num = 2 * dev_type; + is_src = true; + } else if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + /* dst event lines are used for logical memcpy */ + dev_type = d40c->dma_cfg.dst_dev_type; + log_num = 2 * dev_type + 1; + is_src = false; + } else + return -EINVAL; + + event_group = D40_TYPE_TO_GROUP(dev_type); + event_line = D40_TYPE_TO_EVENT(dev_type); + + if (!is_log) { + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + /* Find physical half channel */ + for (i = 0; i < d40c->base->num_phy_chans; i++) { + + if (d40_alloc_mask_set(&phys[i], is_src, + 0, is_log)) + goto found_phy; + } + } else + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], is_src, + 0, is_log)) + goto found_phy; + } + } + return -EINVAL; +found_phy: + d40c->phy_chan = &phys[i]; + d40c->log_num = D40_PHY_CHAN; + goto out; + } + if (dev_type == -1) + return -EINVAL; + + /* Find logical channel */ + for (j = 0; j < d40c->base->num_phy_chans; j += 8) { + int phy_num = j + event_group * 2; + /* + * Spread logical channels across all available physical rather + * than pack every logical channel at the first available phy + * channels. + */ + if (is_src) { + for (i = phy_num; i < phy_num + 2; i++) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log)) + goto found_log; + } + } else { + for (i = phy_num + 1; i >= phy_num; i--) { + if (d40_alloc_mask_set(&phys[i], is_src, + event_line, is_log)) + goto found_log; + } + } + } + return -EINVAL; + +found_log: + d40c->phy_chan = &phys[i]; + d40c->log_num = log_num; +out: + + if (is_log) + d40c->base->lookup_log_chans[d40c->log_num] = d40c; + else + d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c; + + return 0; + +} + +static int d40_config_chan(struct d40_chan *d40c, + struct stedma40_chan_cfg *info) +{ + + /* Fill in basic CFG register values */ + d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg, + &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN); + + if (d40c->log_num != D40_PHY_CHAN) { + d40_log_cfg(&d40c->dma_cfg, + &d40c->log_def.lcsp1, &d40c->log_def.lcsp3); + + if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.src_dev_type * 32; + else + d40c->lcpa = d40c->base->lcpa_base + + d40c->dma_cfg.dst_dev_type * 32 + 16; + } + + /* Write channel configuration to the DMA */ + return d40_config_write(d40c); +} + +static int d40_config_memcpy(struct d40_chan *d40c) +{ + dma_cap_mask_t cap = d40c->chan.device->cap_mask; + + if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_log; + d40c->dma_cfg.src_dev_type = STEDMA40_DEV_SRC_MEMORY; + d40c->dma_cfg.dst_dev_type = d40c->base->plat_data-> + memcpy[d40c->chan.chan_id]; + + } else if (dma_has_cap(DMA_MEMCPY, cap) && + dma_has_cap(DMA_SLAVE, cap)) { + d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy; + } else { + dev_err(&d40c->chan.dev->device, "[%s] No memcpy\n", + __func__); + return -EINVAL; + } + + return 0; +} + + +static int d40_free_dma(struct d40_chan *d40c) +{ + + int res = 0; + u32 event, dir; + struct d40_phy_res *phy = d40c->phy_chan; + bool is_src; + + /* Terminate all queued and active transfers */ + d40_term_all(d40c); + + if (phy == NULL) { + dev_err(&d40c->chan.dev->device, "[%s] phy == null\n", + __func__); + return -EINVAL; + } + + if (phy->allocated_src == D40_ALLOC_FREE && + phy->allocated_dst == D40_ALLOC_FREE) { + dev_err(&d40c->chan.dev->device, "[%s] channel already free\n", + __func__); + return -EINVAL; + } + + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res) { + dev_err(&d40c->chan.dev->device, "[%s] suspend\n", + __func__); + return res; + } + + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + dir = D40_CHAN_REG_SDLNK; + is_src = false; + } else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) { + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + dir = D40_CHAN_REG_SSLNK; + is_src = true; + } else { + dev_err(&d40c->chan.dev->device, + "[%s] Unknown direction\n", __func__); + return -EINVAL; + } + + if (d40c->log_num != D40_PHY_CHAN) { + /* + * Release logical channel, deactivate the event line during + * the time physical res is suspended. + */ + writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event)) & + D40_EVENTLINE_MASK(event), + d40c->base->virtbase + D40_DREG_PCBASE + + phy->num * D40_DREG_PCDELTA + dir); + + d40c->base->lookup_log_chans[d40c->log_num] = NULL; + + /* + * Check if there are more logical allocation + * on this phy channel. + */ + if (!d40_alloc_mask_free(phy, is_src, event)) { + /* Resume the other logical channels if any */ + if (d40_chan_has_events(d40c)) { + res = d40_channel_execute_command(d40c, + D40_DMA_RUN); + if (res) { + dev_err(&d40c->chan.dev->device, + "[%s] Executing RUN command\n", + __func__); + return res; + } + } + return 0; + } + } else + d40_alloc_mask_free(phy, is_src, 0); + + /* Release physical channel */ + res = d40_channel_execute_command(d40c, D40_DMA_STOP); + if (res) { + dev_err(&d40c->chan.dev->device, + "[%s] Failed to stop channel\n", __func__); + return res; + } + d40c->phy_chan = NULL; + /* Invalidate channel type */ + d40c->dma_cfg.channel_type = 0; + d40c->base->lookup_phy_chans[phy->num] = NULL; + + return 0; + + +} + +static int d40_pause(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int res; + + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res == 0) { + if (d40c->log_num != D40_PHY_CHAN) { + d40_config_set_event(d40c, false); + /* Resume the other logical channels if any */ + if (d40_chan_has_events(d40c)) + res = d40_channel_execute_command(d40c, + D40_DMA_RUN); + } + } + + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static bool d40_is_paused(struct d40_chan *d40c) +{ + bool is_paused = false; + unsigned long flags; + void __iomem *active_reg; + u32 status; + u32 event; + int res; + + spin_lock_irqsave(&d40c->lock, flags); + + if (d40c->log_num == D40_PHY_CHAN) { + if (d40c->phy_chan->num % 2 == 0) + active_reg = d40c->base->virtbase + D40_DREG_ACTIVE; + else + active_reg = d40c->base->virtbase + D40_DREG_ACTIVO; + + status = (readl(active_reg) & + D40_CHAN_POS_MASK(d40c->phy_chan->num)) >> + D40_CHAN_POS(d40c->phy_chan->num); + if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP) + is_paused = true; + + goto _exit; + } + + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res != 0) + goto _exit; + + if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH || + d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type); + else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) + event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type); + else { + dev_err(&d40c->chan.dev->device, + "[%s] Unknown direction\n", __func__); + goto _exit; + } + status = d40_chan_has_events(d40c); + status = (status & D40_EVENTLINE_MASK(event)) >> + D40_EVENTLINE_POS(event); + + if (status != D40_DMA_RUN) + is_paused = true; + + /* Resume the other logical channels if any */ + if (d40_chan_has_events(d40c)) + res = d40_channel_execute_command(d40c, + D40_DMA_RUN); + +_exit: + spin_unlock_irqrestore(&d40c->lock, flags); + return is_paused; + +} + + +static bool d40_tx_is_linked(struct d40_chan *d40c) +{ + bool is_link; + + if (d40c->log_num != D40_PHY_CHAN) + is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK; + else + is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDLNK) & + D40_SREG_LNK_PHYS_LNK_MASK; + return is_link; +} + +static u32 d40_residue(struct d40_chan *d40c) +{ + u32 num_elt; + + if (d40c->log_num != D40_PHY_CHAN) + num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK) + >> D40_MEM_LCSP2_ECNT_POS; + else + num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE + + d40c->phy_chan->num * D40_DREG_PCDELTA + + D40_CHAN_REG_SDELT) & + D40_SREG_ELEM_PHY_ECNT_MASK) >> D40_SREG_ELEM_PHY_ECNT_POS; + return num_elt * (1 << d40c->dma_cfg.dst_info.data_width); +} + +static int d40_resume(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int res = 0; + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + if (d40c->log_num != D40_PHY_CHAN) { + res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ); + if (res) + goto out; + + /* If bytes left to transfer or linked tx resume job */ + if (d40_residue(d40c) || d40_tx_is_linked(d40c)) { + d40_config_set_event(d40c, true); + res = d40_channel_execute_command(d40c, D40_DMA_RUN); + } + } else if (d40_residue(d40c) || d40_tx_is_linked(d40c)) + res = d40_channel_execute_command(d40c, D40_DMA_RUN); + +out: + spin_unlock_irqrestore(&d40c->lock, flags); + return res; +} + +static u32 stedma40_residue(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + u32 bytes_left; + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + bytes_left = d40_residue(d40c); + spin_unlock_irqrestore(&d40c->lock, flags); + + return bytes_left; +} + +/* Public DMA functions in addition to the DMA engine framework */ + +int stedma40_set_psize(struct dma_chan *chan, + int src_psize, + int dst_psize) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + if (d40c->log_num != D40_PHY_CHAN) { + d40c->log_def.lcsp1 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK; + d40c->log_def.lcsp3 &= ~D40_MEM_LCSP1_SCFG_PSIZE_MASK; + d40c->log_def.lcsp1 |= src_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; + d40c->log_def.lcsp3 |= dst_psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; + goto out; + } + + if (src_psize == STEDMA40_PSIZE_PHY_1) + d40c->src_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS); + else { + d40c->src_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS; + d40c->src_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 << + D40_SREG_CFG_PSIZE_POS); + d40c->src_def_cfg |= src_psize << D40_SREG_CFG_PSIZE_POS; + } + + if (dst_psize == STEDMA40_PSIZE_PHY_1) + d40c->dst_def_cfg &= ~(1 << D40_SREG_CFG_PHY_PEN_POS); + else { + d40c->dst_def_cfg |= 1 << D40_SREG_CFG_PHY_PEN_POS; + d40c->dst_def_cfg &= ~(STEDMA40_PSIZE_PHY_16 << + D40_SREG_CFG_PSIZE_POS); + d40c->dst_def_cfg |= dst_psize << D40_SREG_CFG_PSIZE_POS; + } +out: + spin_unlock_irqrestore(&d40c->lock, flags); + return 0; +} +EXPORT_SYMBOL(stedma40_set_psize); + +struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, + struct scatterlist *sgl_dst, + struct scatterlist *sgl_src, + unsigned int sgl_len, + unsigned long flags) +{ + int res; + struct d40_desc *d40d; + struct d40_chan *d40c = container_of(chan, struct d40_chan, + chan); + unsigned long flg; + int lli_max = d40c->base->plat_data->llis_per_log; + + + spin_lock_irqsave(&d40c->lock, flg); + d40d = d40_desc_get(d40c); + + if (d40d == NULL) + goto err; + + memset(d40d, 0, sizeof(struct d40_desc)); + d40d->lli_len = sgl_len; + + d40d->txd.flags = flags; + + if (d40c->log_num != D40_PHY_CHAN) { + if (sgl_len > 1) + /* + * Check if there is space available in lcla. If not, + * split list into 1-length and run only in lcpa + * space. + */ + if (d40_lcla_id_get(d40c, + &d40c->base->lcla_pool) != 0) + lli_max = 1; + + if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + goto err; + } + + (void) d40_log_sg_to_lli(d40c->lcla.src_id, + sgl_src, + sgl_len, + d40d->lli_log.src, + d40c->log_def.lcsp1, + d40c->dma_cfg.src_info.data_width, + flags & DMA_PREP_INTERRUPT, lli_max, + d40c->base->plat_data->llis_per_log); + + (void) d40_log_sg_to_lli(d40c->lcla.dst_id, + sgl_dst, + sgl_len, + d40d->lli_log.dst, + d40c->log_def.lcsp3, + d40c->dma_cfg.dst_info.data_width, + flags & DMA_PREP_INTERRUPT, lli_max, + d40c->base->plat_data->llis_per_log); + + + } else { + if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + goto err; + } + + res = d40_phy_sg_to_lli(sgl_src, + sgl_len, + 0, + d40d->lli_phy.src, + d40d->lli_phy.src_addr, + d40c->src_def_cfg, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.src_info.psize, + true); + + if (res < 0) + goto err; + + res = d40_phy_sg_to_lli(sgl_dst, + sgl_len, + 0, + d40d->lli_phy.dst, + d40d->lli_phy.dst_addr, + d40c->dst_def_cfg, + d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.dst_info.psize, + true); + + if (res < 0) + goto err; + + (void) dma_map_single(d40c->base->dev, d40d->lli_phy.src, + d40d->lli_pool.size, DMA_TO_DEVICE); + } + + dma_async_tx_descriptor_init(&d40d->txd, chan); + + d40d->txd.tx_submit = d40_tx_submit; + + spin_unlock_irqrestore(&d40c->lock, flg); + + return &d40d->txd; +err: + spin_unlock_irqrestore(&d40c->lock, flg); + return NULL; +} +EXPORT_SYMBOL(stedma40_memcpy_sg); + +bool stedma40_filter(struct dma_chan *chan, void *data) +{ + struct stedma40_chan_cfg *info = data; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + + if (data) { + err = d40_validate_conf(d40c, info); + if (!err) + d40c->dma_cfg = *info; + } else + err = d40_config_memcpy(d40c); + + return err == 0; +} +EXPORT_SYMBOL(stedma40_filter); + +/* DMA ENGINE functions */ +static int d40_alloc_chan_resources(struct dma_chan *chan) +{ + int err; + unsigned long flags; + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + + spin_lock_irqsave(&d40c->lock, flags); + + d40c->completed = chan->cookie = 1; + + /* + * If no dma configuration is set (channel_type == 0) + * use default configuration + */ + if (d40c->dma_cfg.channel_type == 0) { + err = d40_config_memcpy(d40c); + if (err) + goto err_alloc; + } + + err = d40_allocate_channel(d40c); + if (err) { + dev_err(&d40c->chan.dev->device, + "[%s] Failed to allocate channel\n", __func__); + goto err_alloc; + } + + err = d40_config_chan(d40c, &d40c->dma_cfg); + if (err) { + dev_err(&d40c->chan.dev->device, + "[%s] Failed to configure channel\n", + __func__); + goto err_config; + } + + spin_unlock_irqrestore(&d40c->lock, flags); + return 0; + + err_config: + (void) d40_free_dma(d40c); + err_alloc: + spin_unlock_irqrestore(&d40c->lock, flags); + dev_err(&d40c->chan.dev->device, + "[%s] Channel allocation failed\n", __func__); + return -EINVAL; +} + +static void d40_free_chan_resources(struct dma_chan *chan) +{ + struct d40_chan *d40c = + container_of(chan, struct d40_chan, chan); + int err; + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + err = d40_free_dma(d40c); + + if (err) + dev_err(&d40c->chan.dev->device, + "[%s] Failed to free channel\n", __func__); + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, + dma_addr_t dst, + dma_addr_t src, + size_t size, + unsigned long flags) +{ + struct d40_desc *d40d; + struct d40_chan *d40c = container_of(chan, struct d40_chan, + chan); + unsigned long flg; + int err = 0; + + spin_lock_irqsave(&d40c->lock, flg); + d40d = d40_desc_get(d40c); + + if (d40d == NULL) { + dev_err(&d40c->chan.dev->device, + "[%s] Descriptor is NULL\n", __func__); + goto err; + } + + memset(d40d, 0, sizeof(struct d40_desc)); + + d40d->txd.flags = flags; + + dma_async_tx_descriptor_init(&d40d->txd, chan); + + d40d->txd.tx_submit = d40_tx_submit; + + if (d40c->log_num != D40_PHY_CHAN) { + + if (d40_pool_lli_alloc(d40d, 1, true) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + goto err; + } + d40d->lli_len = 1; + + d40_log_fill_lli(d40d->lli_log.src, + src, + size, + 0, + d40c->log_def.lcsp1, + d40c->dma_cfg.src_info.data_width, + true, true); + + d40_log_fill_lli(d40d->lli_log.dst, + dst, + size, + 0, + d40c->log_def.lcsp3, + d40c->dma_cfg.dst_info.data_width, + true, true); + + } else { + + if (d40_pool_lli_alloc(d40d, 1, false) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + goto err; + } + + err = d40_phy_fill_lli(d40d->lli_phy.src, + src, + size, + d40c->dma_cfg.src_info.psize, + 0, + d40c->src_def_cfg, + true, + d40c->dma_cfg.src_info.data_width, + false); + if (err) + goto err_fill_lli; + + err = d40_phy_fill_lli(d40d->lli_phy.dst, + dst, + size, + d40c->dma_cfg.dst_info.psize, + 0, + d40c->dst_def_cfg, + true, + d40c->dma_cfg.dst_info.data_width, + false); + + if (err) + goto err_fill_lli; + + (void) dma_map_single(d40c->base->dev, d40d->lli_phy.src, + d40d->lli_pool.size, DMA_TO_DEVICE); + } + + spin_unlock_irqrestore(&d40c->lock, flg); + return &d40d->txd; + +err_fill_lli: + dev_err(&d40c->chan.dev->device, + "[%s] Failed filling in PHY LLI\n", __func__); + d40_pool_lli_free(d40d); +err: + spin_unlock_irqrestore(&d40c->lock, flg); + return NULL; +} + +static int d40_prep_slave_sg_log(struct d40_desc *d40d, + struct d40_chan *d40c, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_data_direction direction, + unsigned long flags) +{ + dma_addr_t dev_addr = 0; + int total_size; + int lli_max = d40c->base->plat_data->llis_per_log; + + if (d40_pool_lli_alloc(d40d, sg_len, true) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + return -ENOMEM; + } + + d40d->lli_len = sg_len; + d40d->lli_tcount = 0; + + if (sg_len > 1) + /* + * Check if there is space available in lcla. + * If not, split list into 1-length and run only + * in lcpa space. + */ + if (d40_lcla_id_get(d40c, &d40c->base->lcla_pool) != 0) + lli_max = 1; + + if (direction == DMA_FROM_DEVICE) { + dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type]; + total_size = d40_log_sg_to_dev(&d40c->lcla, + sgl, sg_len, + &d40d->lli_log, + &d40c->log_def, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width, + direction, + flags & DMA_PREP_INTERRUPT, + dev_addr, lli_max, + d40c->base->plat_data->llis_per_log); + } else if (direction == DMA_TO_DEVICE) { + dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type]; + total_size = d40_log_sg_to_dev(&d40c->lcla, + sgl, sg_len, + &d40d->lli_log, + &d40c->log_def, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width, + direction, + flags & DMA_PREP_INTERRUPT, + dev_addr, lli_max, + d40c->base->plat_data->llis_per_log); + } else + return -EINVAL; + if (total_size < 0) + return -EINVAL; + + return 0; +} + +static int d40_prep_slave_sg_phy(struct d40_desc *d40d, + struct d40_chan *d40c, + struct scatterlist *sgl, + unsigned int sgl_len, + enum dma_data_direction direction, + unsigned long flags) +{ + dma_addr_t src_dev_addr; + dma_addr_t dst_dev_addr; + int res; + + if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Out of memory\n", __func__); + return -ENOMEM; + } + + d40d->lli_len = sgl_len; + d40d->lli_tcount = 0; + + if (direction == DMA_FROM_DEVICE) { + dst_dev_addr = 0; + src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type]; + } else if (direction == DMA_TO_DEVICE) { + dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type]; + src_dev_addr = 0; + } else + return -EINVAL; + + res = d40_phy_sg_to_lli(sgl, + sgl_len, + src_dev_addr, + d40d->lli_phy.src, + d40d->lli_phy.src_addr, + d40c->src_def_cfg, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.src_info.psize, + true); + if (res < 0) + return res; + + res = d40_phy_sg_to_lli(sgl, + sgl_len, + dst_dev_addr, + d40d->lli_phy.dst, + d40d->lli_phy.dst_addr, + d40c->dst_def_cfg, + d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.dst_info.psize, + true); + if (res < 0) + return res; + + (void) dma_map_single(d40c->base->dev, d40d->lli_phy.src, + d40d->lli_pool.size, DMA_TO_DEVICE); + return 0; +} + +static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, + unsigned int sg_len, + enum dma_data_direction direction, + unsigned long flags) +{ + struct d40_desc *d40d; + struct d40_chan *d40c = container_of(chan, struct d40_chan, + chan); + unsigned long flg; + int err; + + if (d40c->dma_cfg.pre_transfer) + d40c->dma_cfg.pre_transfer(chan, + d40c->dma_cfg.pre_transfer_data, + sg_dma_len(sgl)); + + spin_lock_irqsave(&d40c->lock, flg); + d40d = d40_desc_get(d40c); + spin_unlock_irqrestore(&d40c->lock, flg); + + if (d40d == NULL) + return NULL; + + memset(d40d, 0, sizeof(struct d40_desc)); + + if (d40c->log_num != D40_PHY_CHAN) + err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len, + direction, flags); + else + err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len, + direction, flags); + if (err) { + dev_err(&d40c->chan.dev->device, + "[%s] Failed to prepare %s slave sg job: %d\n", + __func__, + d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err); + return NULL; + } + + d40d->txd.flags = flags; + + dma_async_tx_descriptor_init(&d40d->txd, chan); + + d40d->txd.tx_submit = d40_tx_submit; + + return &d40d->txd; +} + +static enum dma_status d40_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + int ret; + + last_complete = d40c->completed; + last_used = chan->cookie; + + if (d40_is_paused(d40c)) + ret = DMA_PAUSED; + else + ret = dma_async_is_complete(cookie, last_complete, last_used); + + dma_set_tx_state(txstate, last_complete, last_used, + stedma40_residue(chan)); + + return ret; +} + +static void d40_issue_pending(struct dma_chan *chan) +{ + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + unsigned long flags; + + spin_lock_irqsave(&d40c->lock, flags); + + /* Busy means that pending jobs are already being processed */ + if (!d40c->busy) + (void) d40_queue_start(d40c); + + spin_unlock_irqrestore(&d40c->lock, flags); +} + +static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + unsigned long flags; + struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); + + switch (cmd) { + case DMA_TERMINATE_ALL: + spin_lock_irqsave(&d40c->lock, flags); + d40_term_all(d40c); + spin_unlock_irqrestore(&d40c->lock, flags); + return 0; + case DMA_PAUSE: + return d40_pause(chan); + case DMA_RESUME: + return d40_resume(chan); + } + + /* Other commands are unimplemented */ + return -ENXIO; +} + +/* Initialization functions */ + +static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, + struct d40_chan *chans, int offset, + int num_chans) +{ + int i = 0; + struct d40_chan *d40c; + + INIT_LIST_HEAD(&dma->channels); + + for (i = offset; i < offset + num_chans; i++) { + d40c = &chans[i]; + d40c->base = base; + d40c->chan.device = dma; + + /* Invalidate lcla element */ + d40c->lcla.src_id = -1; + d40c->lcla.dst_id = -1; + + spin_lock_init(&d40c->lock); + + d40c->log_num = D40_PHY_CHAN; + + INIT_LIST_HEAD(&d40c->free); + INIT_LIST_HEAD(&d40c->active); + INIT_LIST_HEAD(&d40c->queue); + INIT_LIST_HEAD(&d40c->client); + + d40c->free_len = 0; + + tasklet_init(&d40c->tasklet, dma_tasklet, + (unsigned long) d40c); + + list_add_tail(&d40c->chan.device_node, + &dma->channels); + } +} + +static int __init d40_dmaengine_init(struct d40_base *base, + int num_reserved_chans) +{ + int err ; + + d40_chan_init(base, &base->dma_slave, base->log_chans, + 0, base->num_log_chans); + + dma_cap_zero(base->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask); + + base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources; + base->dma_slave.device_free_chan_resources = d40_free_chan_resources; + base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg; + base->dma_slave.device_tx_status = d40_tx_status; + base->dma_slave.device_issue_pending = d40_issue_pending; + base->dma_slave.device_control = d40_control; + base->dma_slave.dev = base->dev; + + err = dma_async_device_register(&base->dma_slave); + + if (err) { + dev_err(base->dev, + "[%s] Failed to register slave channels\n", + __func__); + goto failure1; + } + + d40_chan_init(base, &base->dma_memcpy, base->log_chans, + base->num_log_chans, base->plat_data->memcpy_len); + + dma_cap_zero(base->dma_memcpy.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + + base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources; + base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources; + base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg; + base->dma_memcpy.device_tx_status = d40_tx_status; + base->dma_memcpy.device_issue_pending = d40_issue_pending; + base->dma_memcpy.device_control = d40_control; + base->dma_memcpy.dev = base->dev; + /* + * This controller can only access address at even + * 32bit boundaries, i.e. 2^2 + */ + base->dma_memcpy.copy_align = 2; + + err = dma_async_device_register(&base->dma_memcpy); + + if (err) { + dev_err(base->dev, + "[%s] Failed to regsiter memcpy only channels\n", + __func__); + goto failure2; + } + + d40_chan_init(base, &base->dma_both, base->phy_chans, + 0, num_reserved_chans); + + dma_cap_zero(base->dma_both.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask); + + base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources; + base->dma_both.device_free_chan_resources = d40_free_chan_resources; + base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy; + base->dma_both.device_prep_slave_sg = d40_prep_slave_sg; + base->dma_both.device_tx_status = d40_tx_status; + base->dma_both.device_issue_pending = d40_issue_pending; + base->dma_both.device_control = d40_control; + base->dma_both.dev = base->dev; + base->dma_both.copy_align = 2; + err = dma_async_device_register(&base->dma_both); + + if (err) { + dev_err(base->dev, + "[%s] Failed to register logical and physical capable channels\n", + __func__); + goto failure3; + } + return 0; +failure3: + dma_async_device_unregister(&base->dma_memcpy); +failure2: + dma_async_device_unregister(&base->dma_slave); +failure1: + return err; +} + +/* Initialization functions. */ + +static int __init d40_phy_res_init(struct d40_base *base) +{ + int i; + int num_phy_chans_avail = 0; + u32 val[2]; + int odd_even_bit = -2; + + val[0] = readl(base->virtbase + D40_DREG_PRSME); + val[1] = readl(base->virtbase + D40_DREG_PRSMO); + + for (i = 0; i < base->num_phy_chans; i++) { + base->phy_res[i].num = i; + odd_even_bit += 2 * ((i % 2) == 0); + if (((val[i % 2] >> odd_even_bit) & 3) == 1) { + /* Mark security only channels as occupied */ + base->phy_res[i].allocated_src = D40_ALLOC_PHY; + base->phy_res[i].allocated_dst = D40_ALLOC_PHY; + } else { + base->phy_res[i].allocated_src = D40_ALLOC_FREE; + base->phy_res[i].allocated_dst = D40_ALLOC_FREE; + num_phy_chans_avail++; + } + spin_lock_init(&base->phy_res[i].lock); + } + dev_info(base->dev, "%d of %d physical DMA channels available\n", + num_phy_chans_avail, base->num_phy_chans); + + /* Verify settings extended vs standard */ + val[0] = readl(base->virtbase + D40_DREG_PRTYP); + + for (i = 0; i < base->num_phy_chans; i++) { + + if (base->phy_res[i].allocated_src == D40_ALLOC_FREE && + (val[0] & 0x3) != 1) + dev_info(base->dev, + "[%s] INFO: channel %d is misconfigured (%d)\n", + __func__, i, val[0] & 0x3); + + val[0] = val[0] >> 2; + } + + return num_phy_chans_avail; +} + +static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) +{ + static const struct d40_reg_val dma_id_regs[] = { + /* Peripheral Id */ + { .reg = D40_DREG_PERIPHID0, .val = 0x0040}, + { .reg = D40_DREG_PERIPHID1, .val = 0x0000}, + /* + * D40_DREG_PERIPHID2 Depends on HW revision: + * MOP500/HREF ED has 0x0008, + * ? has 0x0018, + * HREF V1 has 0x0028 + */ + { .reg = D40_DREG_PERIPHID3, .val = 0x0000}, + + /* PCell Id */ + { .reg = D40_DREG_CELLID0, .val = 0x000d}, + { .reg = D40_DREG_CELLID1, .val = 0x00f0}, + { .reg = D40_DREG_CELLID2, .val = 0x0005}, + { .reg = D40_DREG_CELLID3, .val = 0x00b1} + }; + struct stedma40_platform_data *plat_data; + struct clk *clk = NULL; + void __iomem *virtbase = NULL; + struct resource *res = NULL; + struct d40_base *base = NULL; + int num_log_chans = 0; + int num_phy_chans; + int i; + + clk = clk_get(&pdev->dev, NULL); + + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "[%s] No matching clock found\n", + __func__); + goto failure; + } + + clk_enable(clk); + + /* Get IO for DMAC base address */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base"); + if (!res) + goto failure; + + if (request_mem_region(res->start, resource_size(res), + D40_NAME " I/O base") == NULL) + goto failure; + + virtbase = ioremap(res->start, resource_size(res)); + if (!virtbase) + goto failure; + + /* HW version check */ + for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) { + if (dma_id_regs[i].val != + readl(virtbase + dma_id_regs[i].reg)) { + dev_err(&pdev->dev, + "[%s] Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n", + __func__, + dma_id_regs[i].val, + dma_id_regs[i].reg, + readl(virtbase + dma_id_regs[i].reg)); + goto failure; + } + } + + i = readl(virtbase + D40_DREG_PERIPHID2); + + if ((i & 0xf) != D40_PERIPHID2_DESIGNER) { + dev_err(&pdev->dev, + "[%s] Unknown designer! Got %x wanted %x\n", + __func__, i & 0xf, D40_PERIPHID2_DESIGNER); + goto failure; + } + + /* The number of physical channels on this HW */ + num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; + + dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n", + (i >> 4) & 0xf, res->start); + + plat_data = pdev->dev.platform_data; + + /* Count the number of logical channels in use */ + for (i = 0; i < plat_data->dev_len; i++) + if (plat_data->dev_rx[i] != 0) + num_log_chans++; + + for (i = 0; i < plat_data->dev_len; i++) + if (plat_data->dev_tx[i] != 0) + num_log_chans++; + + base = kzalloc(ALIGN(sizeof(struct d40_base), 4) + + (num_phy_chans + num_log_chans + plat_data->memcpy_len) * + sizeof(struct d40_chan), GFP_KERNEL); + + if (base == NULL) { + dev_err(&pdev->dev, "[%s] Out of memory\n", __func__); + goto failure; + } + + base->clk = clk; + base->num_phy_chans = num_phy_chans; + base->num_log_chans = num_log_chans; + base->phy_start = res->start; + base->phy_size = resource_size(res); + base->virtbase = virtbase; + base->plat_data = plat_data; + base->dev = &pdev->dev; + base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4); + base->log_chans = &base->phy_chans[num_phy_chans]; + + base->phy_res = kzalloc(num_phy_chans * sizeof(struct d40_phy_res), + GFP_KERNEL); + if (!base->phy_res) + goto failure; + + base->lookup_phy_chans = kzalloc(num_phy_chans * + sizeof(struct d40_chan *), + GFP_KERNEL); + if (!base->lookup_phy_chans) + goto failure; + + if (num_log_chans + plat_data->memcpy_len) { + /* + * The max number of logical channels are event lines for all + * src devices and dst devices + */ + base->lookup_log_chans = kzalloc(plat_data->dev_len * 2 * + sizeof(struct d40_chan *), + GFP_KERNEL); + if (!base->lookup_log_chans) + goto failure; + } + base->lcla_pool.alloc_map = kzalloc(num_phy_chans * sizeof(u32), + GFP_KERNEL); + if (!base->lcla_pool.alloc_map) + goto failure; + + return base; + +failure: + if (clk) { + clk_disable(clk); + clk_put(clk); + } + if (virtbase) + iounmap(virtbase); + if (res) + release_mem_region(res->start, + resource_size(res)); + if (virtbase) + iounmap(virtbase); + + if (base) { + kfree(base->lcla_pool.alloc_map); + kfree(base->lookup_log_chans); + kfree(base->lookup_phy_chans); + kfree(base->phy_res); + kfree(base); + } + + return NULL; +} + +static void __init d40_hw_init(struct d40_base *base) +{ + + static const struct d40_reg_val dma_init_reg[] = { + /* Clock every part of the DMA block from start */ + { .reg = D40_DREG_GCC, .val = 0x0000ff01}, + + /* Interrupts on all logical channels */ + { .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF}, + { .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF} + }; + int i; + u32 prmseo[2] = {0, 0}; + u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF}; + u32 pcmis = 0; + u32 pcicr = 0; + + for (i = 0; i < ARRAY_SIZE(dma_init_reg); i++) + writel(dma_init_reg[i].val, + base->virtbase + dma_init_reg[i].reg); + + /* Configure all our dma channels to default settings */ + for (i = 0; i < base->num_phy_chans; i++) { + + activeo[i % 2] = activeo[i % 2] << 2; + + if (base->phy_res[base->num_phy_chans - i - 1].allocated_src + == D40_ALLOC_PHY) { + activeo[i % 2] |= 3; + continue; + } + + /* Enable interrupt # */ + pcmis = (pcmis << 1) | 1; + + /* Clear interrupt # */ + pcicr = (pcicr << 1) | 1; + + /* Set channel to physical mode */ + prmseo[i % 2] = prmseo[i % 2] << 2; + prmseo[i % 2] |= 1; + + } + + writel(prmseo[1], base->virtbase + D40_DREG_PRMSE); + writel(prmseo[0], base->virtbase + D40_DREG_PRMSO); + writel(activeo[1], base->virtbase + D40_DREG_ACTIVE); + writel(activeo[0], base->virtbase + D40_DREG_ACTIVO); + + /* Write which interrupt to enable */ + writel(pcmis, base->virtbase + D40_DREG_PCMIS); + + /* Write which interrupt to clear */ + writel(pcicr, base->virtbase + D40_DREG_PCICR); + +} + +static int __init d40_probe(struct platform_device *pdev) +{ + int err; + int ret = -ENOENT; + struct d40_base *base; + struct resource *res = NULL; + int num_reserved_chans; + u32 val; + + base = d40_hw_detect_init(pdev); + + if (!base) + goto failure; + + num_reserved_chans = d40_phy_res_init(base); + + platform_set_drvdata(pdev, base); + + spin_lock_init(&base->interrupt_lock); + spin_lock_init(&base->execmd_lock); + + /* Get IO for logical channel parameter address */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa"); + if (!res) { + ret = -ENOENT; + dev_err(&pdev->dev, + "[%s] No \"lcpa\" memory resource\n", + __func__); + goto failure; + } + base->lcpa_size = resource_size(res); + base->phy_lcpa = res->start; + + if (request_mem_region(res->start, resource_size(res), + D40_NAME " I/O lcpa") == NULL) { + ret = -EBUSY; + dev_err(&pdev->dev, + "[%s] Failed to request LCPA region 0x%x-0x%x\n", + __func__, res->start, res->end); + goto failure; + } + + /* We make use of ESRAM memory for this. */ + val = readl(base->virtbase + D40_DREG_LCPA); + if (res->start != val && val != 0) { + dev_warn(&pdev->dev, + "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n", + __func__, val, res->start); + } else + writel(res->start, base->virtbase + D40_DREG_LCPA); + + base->lcpa_base = ioremap(res->start, resource_size(res)); + if (!base->lcpa_base) { + ret = -ENOMEM; + dev_err(&pdev->dev, + "[%s] Failed to ioremap LCPA region\n", + __func__); + goto failure; + } + /* Get IO for logical channel link address */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcla"); + if (!res) { + ret = -ENOENT; + dev_err(&pdev->dev, + "[%s] No \"lcla\" resource defined\n", + __func__); + goto failure; + } + + base->lcla_pool.base_size = resource_size(res); + base->lcla_pool.phy = res->start; + + if (request_mem_region(res->start, resource_size(res), + D40_NAME " I/O lcla") == NULL) { + ret = -EBUSY; + dev_err(&pdev->dev, + "[%s] Failed to request LCLA region 0x%x-0x%x\n", + __func__, res->start, res->end); + goto failure; + } + val = readl(base->virtbase + D40_DREG_LCLA); + if (res->start != val && val != 0) { + dev_warn(&pdev->dev, + "[%s] Mismatch LCLA dma 0x%x, def 0x%x\n", + __func__, val, res->start); + } else + writel(res->start, base->virtbase + D40_DREG_LCLA); + + base->lcla_pool.base = ioremap(res->start, resource_size(res)); + if (!base->lcla_pool.base) { + ret = -ENOMEM; + dev_err(&pdev->dev, + "[%s] Failed to ioremap LCLA 0x%x-0x%x\n", + __func__, res->start, res->end); + goto failure; + } + + spin_lock_init(&base->lcla_pool.lock); + + base->lcla_pool.num_blocks = base->num_phy_chans; + + base->irq = platform_get_irq(pdev, 0); + + ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base); + + if (ret) { + dev_err(&pdev->dev, "[%s] No IRQ defined\n", __func__); + goto failure; + } + + err = d40_dmaengine_init(base, num_reserved_chans); + if (err) + goto failure; + + d40_hw_init(base); + + dev_info(base->dev, "initialized\n"); + return 0; + +failure: + if (base) { + if (base->virtbase) + iounmap(base->virtbase); + if (base->lcla_pool.phy) + release_mem_region(base->lcla_pool.phy, + base->lcla_pool.base_size); + if (base->phy_lcpa) + release_mem_region(base->phy_lcpa, + base->lcpa_size); + if (base->phy_start) + release_mem_region(base->phy_start, + base->phy_size); + if (base->clk) { + clk_disable(base->clk); + clk_put(base->clk); + } + + kfree(base->lcla_pool.alloc_map); + kfree(base->lookup_log_chans); + kfree(base->lookup_phy_chans); + kfree(base->phy_res); + kfree(base); + } + + dev_err(&pdev->dev, "[%s] probe failed\n", __func__); + return ret; +} + +static struct platform_driver d40_driver = { + .driver = { + .owner = THIS_MODULE, + .name = D40_NAME, + }, +}; + +int __init stedma40_init(void) +{ + return platform_driver_probe(&d40_driver, d40_probe); +} +arch_initcall(stedma40_init); diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c new file mode 100644 index 000000000000..561fdd8a80c1 --- /dev/null +++ b/drivers/dma/ste_dma40_ll.c @@ -0,0 +1,454 @@ +/* + * driver/dma/ste_dma40_ll.c + * + * Copyright (C) ST-Ericsson 2007-2010 + * License terms: GNU General Public License (GPL) version 2 + * Author: Per Friden <per.friden@stericsson.com> + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> + */ + +#include <linux/kernel.h> +#include <plat/ste_dma40.h> + +#include "ste_dma40_ll.h" + +/* Sets up proper LCSP1 and LCSP3 register for a logical channel */ +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, u32 *lcsp3) +{ + u32 l3 = 0; /* dst */ + u32 l1 = 0; /* src */ + + /* src is mem? -> increase address pos */ + if (cfg->dir == STEDMA40_MEM_TO_PERIPH || + cfg->dir == STEDMA40_MEM_TO_MEM) + l1 |= 1 << D40_MEM_LCSP1_SCFG_INCR_POS; + + /* dst is mem? -> increase address pos */ + if (cfg->dir == STEDMA40_PERIPH_TO_MEM || + cfg->dir == STEDMA40_MEM_TO_MEM) + l3 |= 1 << D40_MEM_LCSP3_DCFG_INCR_POS; + + /* src is hw? -> master port 1 */ + if (cfg->dir == STEDMA40_PERIPH_TO_MEM || + cfg->dir == STEDMA40_PERIPH_TO_PERIPH) + l1 |= 1 << D40_MEM_LCSP1_SCFG_MST_POS; + + /* dst is hw? -> master port 1 */ + if (cfg->dir == STEDMA40_MEM_TO_PERIPH || + cfg->dir == STEDMA40_PERIPH_TO_PERIPH) + l3 |= 1 << D40_MEM_LCSP3_DCFG_MST_POS; + + l3 |= 1 << D40_MEM_LCSP3_DCFG_TIM_POS; + l3 |= 1 << D40_MEM_LCSP3_DCFG_EIM_POS; + l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS; + l3 |= cfg->dst_info.data_width << D40_MEM_LCSP3_DCFG_ESIZE_POS; + l3 |= 1 << D40_MEM_LCSP3_DTCP_POS; + + l1 |= 1 << D40_MEM_LCSP1_SCFG_EIM_POS; + l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS; + l1 |= cfg->src_info.data_width << D40_MEM_LCSP1_SCFG_ESIZE_POS; + l1 |= 1 << D40_MEM_LCSP1_STCP_POS; + + *lcsp1 = l1; + *lcsp3 = l3; + +} + +/* Sets up SRC and DST CFG register for both logical and physical channels */ +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, + u32 *src_cfg, u32 *dst_cfg, bool is_log) +{ + u32 src = 0; + u32 dst = 0; + + if (!is_log) { + /* Physical channel */ + if ((cfg->dir == STEDMA40_PERIPH_TO_MEM) || + (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) { + /* Set master port to 1 */ + src |= 1 << D40_SREG_CFG_MST_POS; + src |= D40_TYPE_TO_EVENT(cfg->src_dev_type); + + if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + src |= 1 << D40_SREG_CFG_PHY_TM_POS; + else + src |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + if ((cfg->dir == STEDMA40_MEM_TO_PERIPH) || + (cfg->dir == STEDMA40_PERIPH_TO_PERIPH)) { + /* Set master port to 1 */ + dst |= 1 << D40_SREG_CFG_MST_POS; + dst |= D40_TYPE_TO_EVENT(cfg->dst_dev_type); + + if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL) + dst |= 1 << D40_SREG_CFG_PHY_TM_POS; + else + dst |= 3 << D40_SREG_CFG_PHY_TM_POS; + } + /* Interrupt on end of transfer for destination */ + dst |= 1 << D40_SREG_CFG_TIM_POS; + + /* Generate interrupt on error */ + src |= 1 << D40_SREG_CFG_EIM_POS; + dst |= 1 << D40_SREG_CFG_EIM_POS; + + /* PSIZE */ + if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) { + src |= 1 << D40_SREG_CFG_PHY_PEN_POS; + src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS; + } + if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) { + dst |= 1 << D40_SREG_CFG_PHY_PEN_POS; + dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS; + } + + /* Element size */ + src |= cfg->src_info.data_width << D40_SREG_CFG_ESIZE_POS; + dst |= cfg->dst_info.data_width << D40_SREG_CFG_ESIZE_POS; + + } else { + /* Logical channel */ + dst |= 1 << D40_SREG_CFG_LOG_GIM_POS; + src |= 1 << D40_SREG_CFG_LOG_GIM_POS; + } + + if (cfg->channel_type & STEDMA40_HIGH_PRIORITY_CHANNEL) { + src |= 1 << D40_SREG_CFG_PRI_POS; + dst |= 1 << D40_SREG_CFG_PRI_POS; + } + + src |= cfg->src_info.endianess << D40_SREG_CFG_LBE_POS; + dst |= cfg->dst_info.endianess << D40_SREG_CFG_LBE_POS; + + *src_cfg = src; + *dst_cfg = dst; +} + +int d40_phy_fill_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + int psize, + dma_addr_t next_lli, + u32 reg_cfg, + bool term_int, + u32 data_width, + bool is_device) +{ + int num_elems; + + if (psize == STEDMA40_PSIZE_PHY_1) + num_elems = 1; + else + num_elems = 2 << psize; + + /* + * Size is 16bit. data_width is 8, 16, 32 or 64 bit + * Block large than 64 KiB must be split. + */ + if (data_size > (0xffff << data_width)) + return -EINVAL; + + /* Must be aligned */ + if (!IS_ALIGNED(data, 0x1 << data_width)) + return -EINVAL; + + /* Transfer size can't be smaller than (num_elms * elem_size) */ + if (data_size < num_elems * (0x1 << data_width)) + return -EINVAL; + + /* The number of elements. IE now many chunks */ + lli->reg_elt = (data_size >> data_width) << D40_SREG_ELEM_PHY_ECNT_POS; + + /* + * Distance to next element sized entry. + * Usually the size of the element unless you want gaps. + */ + if (!is_device) + lli->reg_elt |= (0x1 << data_width) << + D40_SREG_ELEM_PHY_EIDX_POS; + + /* Where the data is */ + lli->reg_ptr = data; + lli->reg_cfg = reg_cfg; + + /* If this scatter list entry is the last one, no next link */ + if (next_lli == 0) + lli->reg_lnk = 0x1 << D40_SREG_LNK_PHY_TCP_POS; + else + lli->reg_lnk = next_lli; + + /* Set/clear interrupt generation on this link item.*/ + if (term_int) + lli->reg_cfg |= 0x1 << D40_SREG_CFG_TIM_POS; + else + lli->reg_cfg &= ~(0x1 << D40_SREG_CFG_TIM_POS); + + /* Post link */ + lli->reg_lnk |= 0 << D40_SREG_LNK_PHY_PRE_POS; + + return 0; +} + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli, + dma_addr_t lli_phys, + u32 reg_cfg, + u32 data_width, + int psize, + bool term_int) +{ + int total_size = 0; + int i; + struct scatterlist *current_sg = sg; + dma_addr_t next_lli_phys; + dma_addr_t dst; + int err = 0; + + for_each_sg(sg, current_sg, sg_len, i) { + + total_size += sg_dma_len(current_sg); + + /* If this scatter list entry is the last one, no next link */ + if (sg_len - 1 == i) + next_lli_phys = 0; + else + next_lli_phys = ALIGN(lli_phys + (i + 1) * + sizeof(struct d40_phy_lli), + D40_LLI_ALIGN); + + if (target) + dst = target; + else + dst = sg_phys(current_sg); + + err = d40_phy_fill_lli(&lli[i], + dst, + sg_dma_len(current_sg), + psize, + next_lli_phys, + reg_cfg, + !next_lli_phys, + data_width, + target == dst); + if (err) + goto err; + } + + return total_size; + err: + return err; +} + + +void d40_phy_lli_write(void __iomem *virtbase, + u32 phy_chan_num, + struct d40_phy_lli *lli_dst, + struct d40_phy_lli *lli_src) +{ + + writel(lli_src->reg_cfg, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSCFG); + writel(lli_src->reg_elt, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT); + writel(lli_src->reg_ptr, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSPTR); + writel(lli_src->reg_lnk, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSLNK); + + writel(lli_dst->reg_cfg, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDCFG); + writel(lli_dst->reg_elt, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT); + writel(lli_dst->reg_ptr, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDPTR); + writel(lli_dst->reg_lnk, virtbase + D40_DREG_PCBASE + + phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDLNK); + +} + +/* DMA logical lli operations */ + +void d40_log_fill_lli(struct d40_log_lli *lli, + dma_addr_t data, u32 data_size, + u32 lli_next_off, u32 reg_cfg, + u32 data_width, + bool term_int, bool addr_inc) +{ + lli->lcsp13 = reg_cfg; + + /* The number of elements to transfer */ + lli->lcsp02 = ((data_size >> data_width) << + D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK; + /* 16 LSBs address of the current element */ + lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK; + /* 16 MSBs address of the current element */ + lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK; + + if (addr_inc) + lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK; + + lli->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK; + /* If this scatter list entry is the last one, no next link */ + lli->lcsp13 |= (lli_next_off << D40_MEM_LCSP1_SLOS_POS) & + D40_MEM_LCSP1_SLOS_MASK; + + if (term_int) + lli->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK; + else + lli->lcsp13 &= ~D40_MEM_LCSP1_SCFG_TIM_MASK; +} + +int d40_log_sg_to_dev(struct d40_lcla_elem *lcla, + struct scatterlist *sg, + int sg_len, + struct d40_log_lli_bidir *lli, + struct d40_def_lcsp *lcsp, + u32 src_data_width, + u32 dst_data_width, + enum dma_data_direction direction, + bool term_int, dma_addr_t dev_addr, int max_len, + int llis_per_log) +{ + int total_size = 0; + struct scatterlist *current_sg = sg; + int i; + u32 next_lli_off_dst; + u32 next_lli_off_src; + + next_lli_off_src = 0; + next_lli_off_dst = 0; + + for_each_sg(sg, current_sg, sg_len, i) { + total_size += sg_dma_len(current_sg); + + /* + * If this scatter list entry is the last one or + * max length, terminate link. + */ + if (sg_len - 1 == i || ((i+1) % max_len == 0)) { + next_lli_off_src = 0; + next_lli_off_dst = 0; + } else { + if (next_lli_off_dst == 0 && + next_lli_off_src == 0) { + /* The first lli will be at next_lli_off */ + next_lli_off_dst = (lcla->dst_id * + llis_per_log + 1); + next_lli_off_src = (lcla->src_id * + llis_per_log + 1); + } else { + next_lli_off_dst++; + next_lli_off_src++; + } + } + + if (direction == DMA_TO_DEVICE) { + d40_log_fill_lli(&lli->src[i], + sg_phys(current_sg), + sg_dma_len(current_sg), + next_lli_off_src, + lcsp->lcsp1, src_data_width, + term_int && !next_lli_off_src, + true); + d40_log_fill_lli(&lli->dst[i], + dev_addr, + sg_dma_len(current_sg), + next_lli_off_dst, + lcsp->lcsp3, dst_data_width, + /* No next == terminal interrupt */ + term_int && !next_lli_off_dst, + false); + } else { + d40_log_fill_lli(&lli->dst[i], + sg_phys(current_sg), + sg_dma_len(current_sg), + next_lli_off_dst, + lcsp->lcsp3, dst_data_width, + /* No next == terminal interrupt */ + term_int && !next_lli_off_dst, + true); + d40_log_fill_lli(&lli->src[i], + dev_addr, + sg_dma_len(current_sg), + next_lli_off_src, + lcsp->lcsp1, src_data_width, + term_int && !next_lli_off_src, + false); + } + } + return total_size; +} + +int d40_log_sg_to_lli(int lcla_id, + struct scatterlist *sg, + int sg_len, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width, + bool term_int, int max_len, int llis_per_log) +{ + int total_size = 0; + struct scatterlist *current_sg = sg; + int i; + u32 next_lli_off = 0; + + for_each_sg(sg, current_sg, sg_len, i) { + total_size += sg_dma_len(current_sg); + + /* + * If this scatter list entry is the last one or + * max length, terminate link. + */ + if (sg_len - 1 == i || ((i+1) % max_len == 0)) + next_lli_off = 0; + else { + if (next_lli_off == 0) + /* The first lli will be at next_lli_off */ + next_lli_off = lcla_id * llis_per_log + 1; + else + next_lli_off++; + } + + d40_log_fill_lli(&lli_sg[i], + sg_phys(current_sg), + sg_dma_len(current_sg), + next_lli_off, + lcsp13, data_width, + term_int && !next_lli_off, + true); + } + return total_size; +} + +void d40_log_lli_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lcla_src, + struct d40_log_lli *lcla_dst, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int llis_per_log) +{ + u32 slos = 0; + u32 dlos = 0; + int i; + + lcpa->lcsp0 = lli_src->lcsp02; + lcpa->lcsp1 = lli_src->lcsp13; + lcpa->lcsp2 = lli_dst->lcsp02; + lcpa->lcsp3 = lli_dst->lcsp13; + + slos = lli_src->lcsp13 & D40_MEM_LCSP1_SLOS_MASK; + dlos = lli_dst->lcsp13 & D40_MEM_LCSP3_DLOS_MASK; + + for (i = 0; (i < llis_per_log) && slos && dlos; i++) { + writel(lli_src[i+1].lcsp02, &lcla_src[i].lcsp02); + writel(lli_src[i+1].lcsp13, &lcla_src[i].lcsp13); + writel(lli_dst[i+1].lcsp02, &lcla_dst[i].lcsp02); + writel(lli_dst[i+1].lcsp13, &lcla_dst[i].lcsp13); + + slos = lli_src[i+1].lcsp13 & D40_MEM_LCSP1_SLOS_MASK; + dlos = lli_dst[i+1].lcsp13 & D40_MEM_LCSP3_DLOS_MASK; + } +} diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h new file mode 100644 index 000000000000..2029280cb332 --- /dev/null +++ b/drivers/dma/ste_dma40_ll.h @@ -0,0 +1,354 @@ +/* + * driver/dma/ste_dma40_ll.h + * + * Copyright (C) ST-Ericsson 2007-2010 + * License terms: GNU General Public License (GPL) version 2 + * Author: Per Friden <per.friden@stericsson.com> + * Author: Jonas Aaberg <jonas.aberg@stericsson.com> + */ +#ifndef STE_DMA40_LL_H +#define STE_DMA40_LL_H + +#define D40_DREG_PCBASE 0x400 +#define D40_DREG_PCDELTA (8 * 4) +#define D40_LLI_ALIGN 16 /* LLI alignment must be 16 bytes. */ + +#define D40_TYPE_TO_GROUP(type) (type / 16) +#define D40_TYPE_TO_EVENT(type) (type % 16) + +/* Most bits of the CFG register are the same in log as in phy mode */ +#define D40_SREG_CFG_MST_POS 15 +#define D40_SREG_CFG_TIM_POS 14 +#define D40_SREG_CFG_EIM_POS 13 +#define D40_SREG_CFG_LOG_INCR_POS 12 +#define D40_SREG_CFG_PHY_PEN_POS 12 +#define D40_SREG_CFG_PSIZE_POS 10 +#define D40_SREG_CFG_ESIZE_POS 8 +#define D40_SREG_CFG_PRI_POS 7 +#define D40_SREG_CFG_LBE_POS 6 +#define D40_SREG_CFG_LOG_GIM_POS 5 +#define D40_SREG_CFG_LOG_MFU_POS 4 +#define D40_SREG_CFG_PHY_TM_POS 4 +#define D40_SREG_CFG_PHY_EVTL_POS 0 + + +/* Standard channel parameters - basic mode (element register) */ +#define D40_SREG_ELEM_PHY_ECNT_POS 16 +#define D40_SREG_ELEM_PHY_EIDX_POS 0 + +#define D40_SREG_ELEM_PHY_ECNT_MASK (0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS) + +/* Standard channel parameters - basic mode (Link register) */ +#define D40_SREG_LNK_PHY_TCP_POS 0 +#define D40_SREG_LNK_PHY_LMP_POS 1 +#define D40_SREG_LNK_PHY_PRE_POS 2 +/* + * Source destination link address. Contains the + * 29-bit byte word aligned address of the reload area. + */ +#define D40_SREG_LNK_PHYS_LNK_MASK 0xFFFFFFF8UL + +/* Standard basic channel logical mode */ + +/* Element register */ +#define D40_SREG_ELEM_LOG_ECNT_POS 16 +#define D40_SREG_ELEM_LOG_LIDX_POS 8 +#define D40_SREG_ELEM_LOG_LOS_POS 1 +#define D40_SREG_ELEM_LOG_TCP_POS 0 + +#define D40_SREG_ELEM_LOG_LIDX_MASK (0xFF << D40_SREG_ELEM_LOG_LIDX_POS) + +/* Link register */ +#define D40_DEACTIVATE_EVENTLINE 0x0 +#define D40_ACTIVATE_EVENTLINE 0x1 +#define D40_EVENTLINE_POS(i) (2 * i) +#define D40_EVENTLINE_MASK(i) (0x3 << D40_EVENTLINE_POS(i)) + +/* Standard basic channel logical params in memory */ + +/* LCSP0 */ +#define D40_MEM_LCSP0_ECNT_POS 16 +#define D40_MEM_LCSP0_SPTR_POS 0 + +#define D40_MEM_LCSP0_ECNT_MASK (0xFFFF << D40_MEM_LCSP0_ECNT_POS) +#define D40_MEM_LCSP0_SPTR_MASK (0xFFFF << D40_MEM_LCSP0_SPTR_POS) + +/* LCSP1 */ +#define D40_MEM_LCSP1_SPTR_POS 16 +#define D40_MEM_LCSP1_SCFG_MST_POS 15 +#define D40_MEM_LCSP1_SCFG_TIM_POS 14 +#define D40_MEM_LCSP1_SCFG_EIM_POS 13 +#define D40_MEM_LCSP1_SCFG_INCR_POS 12 +#define D40_MEM_LCSP1_SCFG_PSIZE_POS 10 +#define D40_MEM_LCSP1_SCFG_ESIZE_POS 8 +#define D40_MEM_LCSP1_SLOS_POS 1 +#define D40_MEM_LCSP1_STCP_POS 0 + +#define D40_MEM_LCSP1_SPTR_MASK (0xFFFF << D40_MEM_LCSP1_SPTR_POS) +#define D40_MEM_LCSP1_SCFG_TIM_MASK (0x1 << D40_MEM_LCSP1_SCFG_TIM_POS) +#define D40_MEM_LCSP1_SCFG_INCR_MASK (0x1 << D40_MEM_LCSP1_SCFG_INCR_POS) +#define D40_MEM_LCSP1_SCFG_PSIZE_MASK (0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS) +#define D40_MEM_LCSP1_SLOS_MASK (0x7F << D40_MEM_LCSP1_SLOS_POS) +#define D40_MEM_LCSP1_STCP_MASK (0x1 << D40_MEM_LCSP1_STCP_POS) + +/* LCSP2 */ +#define D40_MEM_LCSP2_ECNT_POS 16 + +#define D40_MEM_LCSP2_ECNT_MASK (0xFFFF << D40_MEM_LCSP2_ECNT_POS) + +/* LCSP3 */ +#define D40_MEM_LCSP3_DCFG_MST_POS 15 +#define D40_MEM_LCSP3_DCFG_TIM_POS 14 +#define D40_MEM_LCSP3_DCFG_EIM_POS 13 +#define D40_MEM_LCSP3_DCFG_INCR_POS 12 +#define D40_MEM_LCSP3_DCFG_PSIZE_POS 10 +#define D40_MEM_LCSP3_DCFG_ESIZE_POS 8 +#define D40_MEM_LCSP3_DLOS_POS 1 +#define D40_MEM_LCSP3_DTCP_POS 0 + +#define D40_MEM_LCSP3_DLOS_MASK (0x7F << D40_MEM_LCSP3_DLOS_POS) +#define D40_MEM_LCSP3_DTCP_MASK (0x1 << D40_MEM_LCSP3_DTCP_POS) + + +/* Standard channel parameter register offsets */ +#define D40_CHAN_REG_SSCFG 0x00 +#define D40_CHAN_REG_SSELT 0x04 +#define D40_CHAN_REG_SSPTR 0x08 +#define D40_CHAN_REG_SSLNK 0x0C +#define D40_CHAN_REG_SDCFG 0x10 +#define D40_CHAN_REG_SDELT 0x14 +#define D40_CHAN_REG_SDPTR 0x18 +#define D40_CHAN_REG_SDLNK 0x1C + +/* DMA Register Offsets */ +#define D40_DREG_GCC 0x000 +#define D40_DREG_PRTYP 0x004 +#define D40_DREG_PRSME 0x008 +#define D40_DREG_PRSMO 0x00C +#define D40_DREG_PRMSE 0x010 +#define D40_DREG_PRMSO 0x014 +#define D40_DREG_PRMOE 0x018 +#define D40_DREG_PRMOO 0x01C +#define D40_DREG_LCPA 0x020 +#define D40_DREG_LCLA 0x024 +#define D40_DREG_ACTIVE 0x050 +#define D40_DREG_ACTIVO 0x054 +#define D40_DREG_FSEB1 0x058 +#define D40_DREG_FSEB2 0x05C +#define D40_DREG_PCMIS 0x060 +#define D40_DREG_PCICR 0x064 +#define D40_DREG_PCTIS 0x068 +#define D40_DREG_PCEIS 0x06C +#define D40_DREG_LCMIS0 0x080 +#define D40_DREG_LCMIS1 0x084 +#define D40_DREG_LCMIS2 0x088 +#define D40_DREG_LCMIS3 0x08C +#define D40_DREG_LCICR0 0x090 +#define D40_DREG_LCICR1 0x094 +#define D40_DREG_LCICR2 0x098 +#define D40_DREG_LCICR3 0x09C +#define D40_DREG_LCTIS0 0x0A0 +#define D40_DREG_LCTIS1 0x0A4 +#define D40_DREG_LCTIS2 0x0A8 +#define D40_DREG_LCTIS3 0x0AC +#define D40_DREG_LCEIS0 0x0B0 +#define D40_DREG_LCEIS1 0x0B4 +#define D40_DREG_LCEIS2 0x0B8 +#define D40_DREG_LCEIS3 0x0BC +#define D40_DREG_STFU 0xFC8 +#define D40_DREG_ICFG 0xFCC +#define D40_DREG_PERIPHID0 0xFE0 +#define D40_DREG_PERIPHID1 0xFE4 +#define D40_DREG_PERIPHID2 0xFE8 +#define D40_DREG_PERIPHID3 0xFEC +#define D40_DREG_CELLID0 0xFF0 +#define D40_DREG_CELLID1 0xFF4 +#define D40_DREG_CELLID2 0xFF8 +#define D40_DREG_CELLID3 0xFFC + +/* LLI related structures */ + +/** + * struct d40_phy_lli - The basic configration register for each physical + * channel. + * + * @reg_cfg: The configuration register. + * @reg_elt: The element register. + * @reg_ptr: The pointer register. + * @reg_lnk: The link register. + * + * These registers are set up for both physical and logical transfers + * Note that the bit in each register means differently in logical and + * physical(standard) mode. + * + * This struct must be 16 bytes aligned, and only contain physical registers + * since it will be directly accessed by the DMA. + */ +struct d40_phy_lli { + u32 reg_cfg; + u32 reg_elt; + u32 reg_ptr; + u32 reg_lnk; +}; + +/** + * struct d40_phy_lli_bidir - struct for a transfer. + * + * @src: Register settings for src channel. + * @dst: Register settings for dst channel. + * @dst_addr: Physical destination address. + * @src_addr: Physical source address. + * + * All DMA transfers have a source and a destination. + */ + +struct d40_phy_lli_bidir { + struct d40_phy_lli *src; + struct d40_phy_lli *dst; + dma_addr_t dst_addr; + dma_addr_t src_addr; +}; + + +/** + * struct d40_log_lli - logical lli configuration + * + * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst. + * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst. + * + * This struct must be 8 bytes aligned since it will be accessed directy by + * the DMA. Never add any none hw mapped registers to this struct. + */ + +struct d40_log_lli { + u32 lcsp02; + u32 lcsp13; +}; + +/** + * struct d40_log_lli_bidir - For both src and dst + * + * @src: pointer to src lli configuration. + * @dst: pointer to dst lli configuration. + * + * You always have a src and a dst when doing DMA transfers. + */ + +struct d40_log_lli_bidir { + struct d40_log_lli *src; + struct d40_log_lli *dst; +}; + +/** + * struct d40_log_lli_full - LCPA layout + * + * @lcsp0: Logical Channel Standard Param 0 - Src. + * @lcsp1: Logical Channel Standard Param 1 - Src. + * @lcsp2: Logical Channel Standard Param 2 - Dst. + * @lcsp3: Logical Channel Standard Param 3 - Dst. + * + * This struct maps to LCPA physical memory layout. Must map to + * the hw. + */ +struct d40_log_lli_full { + u32 lcsp0; + u32 lcsp1; + u32 lcsp2; + u32 lcsp3; +}; + +/** + * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings + * + * @lcsp3: The default configuration for dst. + * @lcsp1: The default configuration for src. + */ +struct d40_def_lcsp { + u32 lcsp3; + u32 lcsp1; +}; + +/** + * struct d40_lcla_elem - Info for one LCA element. + * + * @src_id: logical channel src id + * @dst_id: logical channel dst id + * @src: LCPA formated src parameters + * @dst: LCPA formated dst parameters + * + */ +struct d40_lcla_elem { + int src_id; + int dst_id; + struct d40_log_lli *src; + struct d40_log_lli *dst; +}; + +/* Physical channels */ + +void d40_phy_cfg(struct stedma40_chan_cfg *cfg, + u32 *src_cfg, u32 *dst_cfg, bool is_log); + +void d40_log_cfg(struct stedma40_chan_cfg *cfg, + u32 *lcsp1, u32 *lcsp2); + +int d40_phy_sg_to_lli(struct scatterlist *sg, + int sg_len, + dma_addr_t target, + struct d40_phy_lli *lli, + dma_addr_t lli_phys, + u32 reg_cfg, + u32 data_width, + int psize, + bool term_int); + +int d40_phy_fill_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + int psize, + dma_addr_t next_lli, + u32 reg_cfg, + bool term_int, + u32 data_width, + bool is_device); + +void d40_phy_lli_write(void __iomem *virtbase, + u32 phy_chan_num, + struct d40_phy_lli *lli_dst, + struct d40_phy_lli *lli_src); + +/* Logical channels */ + +void d40_log_fill_lli(struct d40_log_lli *lli, + dma_addr_t data, u32 data_size, + u32 lli_next_off, u32 reg_cfg, + u32 data_width, + bool term_int, bool addr_inc); + +int d40_log_sg_to_dev(struct d40_lcla_elem *lcla, + struct scatterlist *sg, + int sg_len, + struct d40_log_lli_bidir *lli, + struct d40_def_lcsp *lcsp, + u32 src_data_width, + u32 dst_data_width, + enum dma_data_direction direction, + bool term_int, dma_addr_t dev_addr, int max_len, + int llis_per_log); + +void d40_log_lli_write(struct d40_log_lli_full *lcpa, + struct d40_log_lli *lcla_src, + struct d40_log_lli *lcla_dst, + struct d40_log_lli *lli_dst, + struct d40_log_lli *lli_src, + int llis_per_log); + +int d40_log_sg_to_lli(int lcla_id, + struct scatterlist *sg, + int sg_len, + struct d40_log_lli *lli_sg, + u32 lcsp13, /* src or dst*/ + u32 data_width, + bool term_int, int max_len, int llis_per_log); + +#endif /* STE_DMA40_LLI_H */ diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c new file mode 100644 index 000000000000..0172fa3c7a2b --- /dev/null +++ b/drivers/dma/timb_dma.c @@ -0,0 +1,860 @@ +/* + * timb_dma.c timberdale FPGA DMA driver + * Copyright (c) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA DMA engine + */ + +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <linux/timb_dma.h> + +#define DRIVER_NAME "timb-dma" + +/* Global DMA registers */ +#define TIMBDMA_ACR 0x34 +#define TIMBDMA_32BIT_ADDR 0x01 + +#define TIMBDMA_ISR 0x080000 +#define TIMBDMA_IPR 0x080004 +#define TIMBDMA_IER 0x080008 + +/* Channel specific registers */ +/* RX instances base addresses are 0x00, 0x40, 0x80 ... + * TX instances base addresses are 0x18, 0x58, 0x98 ... + */ +#define TIMBDMA_INSTANCE_OFFSET 0x40 +#define TIMBDMA_INSTANCE_TX_OFFSET 0x18 + +/* RX registers, relative the instance base */ +#define TIMBDMA_OFFS_RX_DHAR 0x00 +#define TIMBDMA_OFFS_RX_DLAR 0x04 +#define TIMBDMA_OFFS_RX_LR 0x0C +#define TIMBDMA_OFFS_RX_BLR 0x10 +#define TIMBDMA_OFFS_RX_ER 0x14 +#define TIMBDMA_RX_EN 0x01 +/* bytes per Row, video specific register + * which is placed after the TX registers... + */ +#define TIMBDMA_OFFS_RX_BPRR 0x30 + +/* TX registers, relative the instance base */ +#define TIMBDMA_OFFS_TX_DHAR 0x00 +#define TIMBDMA_OFFS_TX_DLAR 0x04 +#define TIMBDMA_OFFS_TX_BLR 0x0C +#define TIMBDMA_OFFS_TX_LR 0x14 + + +#define TIMB_DMA_DESC_SIZE 8 + +struct timb_dma_desc { + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + u8 *desc_list; + unsigned int desc_list_len; + bool interrupt; +}; + +struct timb_dma_chan { + struct dma_chan chan; + void __iomem *membase; + spinlock_t lock; /* Used to protect data structures, + especially the lists and descriptors, + from races between the tasklet and calls + from above */ + dma_cookie_t last_completed_cookie; + bool ongoing; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + unsigned int bytes_per_line; + enum dma_data_direction direction; + unsigned int descs; /* Descriptors to allocate */ + unsigned int desc_elems; /* number of elems per descriptor */ +}; + +struct timb_dma { + struct dma_device dma; + void __iomem *membase; + struct tasklet_struct tasklet; + struct timb_dma_chan channels[0]; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} +static struct device *chan2dmadev(struct dma_chan *chan) +{ + return chan2dev(chan)->parent->parent; +} + +static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + return (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); +} + +/* Must be called with the spinlock held */ +static void __td_enable_chan_irq(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = tdchantotd(td_chan); + u32 ier; + + /* enable interrupt for this channel */ + ier = ioread32(td->membase + TIMBDMA_IER); + ier |= 1 << id; + dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id, + ier); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + +/* Should be called with the spinlock held */ +static bool __td_dma_done_ack(struct timb_dma_chan *td_chan) +{ + int id = td_chan->chan.chan_id; + struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan - + id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma)); + u32 isr; + bool done = false; + + dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td); + + isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id); + if (isr) { + iowrite32(isr, td->membase + TIMBDMA_ISR); + done = true; + } + + return done; +} + +static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc, + bool single) +{ + dma_addr_t addr; + int len; + + addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) | + dma_desc[4]; + + len = (dma_desc[3] << 8) | dma_desc[2]; + + if (single) + dma_unmap_single(chan2dev(&td_chan->chan), addr, len, + td_chan->direction); + else + dma_unmap_page(chan2dev(&td_chan->chan), addr, len, + td_chan->direction); +} + +static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single) +{ + struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan, + struct timb_dma_chan, chan); + u8 *descs; + + for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) { + __td_unmap_desc(td_chan, descs, single); + if (descs[0] & 0x02) + break; + } +} + +static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc, + struct scatterlist *sg, bool last) +{ + if (sg_dma_len(sg) > USHORT_MAX) { + dev_err(chan2dev(&td_chan->chan), "Too big sg element\n"); + return -EINVAL; + } + + /* length must be word aligned */ + if (sg_dma_len(sg) % sizeof(u32)) { + dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n", + sg_dma_len(sg)); + return -EINVAL; + } + + dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: %p\n", + dma_desc, (void *)sg_dma_address(sg)); + + dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff; + dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff; + dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff; + dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff; + + dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff; + dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff; + + dma_desc[1] = 0x00; + dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */ + + return 0; +} + +/* Must be called with the spinlock held */ +static void __td_start_dma(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + if (td_chan->ongoing) { + dev_err(chan2dev(&td_chan->chan), + "Transfer already ongoing\n"); + return; + } + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), + "td_chan: %p, chan: %d, membase: %p\n", + td_chan, td_chan->chan.chan_id, td_chan->membase); + + if (td_chan->direction == DMA_FROM_DEVICE) { + + /* descriptor address */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_RX_DLAR); + /* Bytes per line */ + iowrite32(td_chan->bytes_per_line, td_chan->membase + + TIMBDMA_OFFS_RX_BPRR); + /* enable RX */ + iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER); + } else { + /* address high */ + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR); + iowrite32(td_desc->txd.phys, td_chan->membase + + TIMBDMA_OFFS_TX_DLAR); + } + + td_chan->ongoing = true; + + if (td_desc->interrupt) + __td_enable_chan_irq(td_chan); +} + +static void __td_finish(struct timb_dma_chan *td_chan) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd; + struct timb_dma_desc *td_desc; + + /* can happen if the descriptor is canceled */ + if (list_empty(&td_chan->active_list)) + return; + + td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc, + desc_node); + txd = &td_desc->txd; + + dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n", + txd->cookie); + + /* make sure to stop the transfer */ + if (td_chan->direction == DMA_FROM_DEVICE) + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER); +/* Currently no support for stopping DMA transfers + else + iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR); +*/ + td_chan->last_completed_cookie = txd->cookie; + td_chan->ongoing = false; + + callback = txd->callback; + param = txd->callback_param; + + list_move(&td_desc->desc_node, &td_chan->free_list); + + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) + __td_unmap_descs(td_desc, + txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE); + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); +} + +static u32 __td_ier_mask(struct timb_dma *td) +{ + int i; + u32 ret = 0; + + for (i = 0; i < td->dma.chancnt; i++) { + struct timb_dma_chan *td_chan = td->channels + i; + if (td_chan->ongoing) { + struct timb_dma_desc *td_desc = + list_entry(td_chan->active_list.next, + struct timb_dma_desc, desc_node); + if (td_desc->interrupt) + ret |= 1 << i; + } + } + + return ret; +} + +static void __td_start_next(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc; + + BUG_ON(list_empty(&td_chan->queue)); + BUG_ON(td_chan->ongoing); + + td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc, + desc_node); + + dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n", + __func__, td_desc->txd.cookie); + + list_move(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); +} + +static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd) +{ + struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc, + txd); + struct timb_dma_chan *td_chan = container_of(txd->chan, + struct timb_dma_chan, chan); + dma_cookie_t cookie; + + spin_lock_bh(&td_chan->lock); + + cookie = txd->chan->cookie; + if (++cookie < 0) + cookie = 1; + txd->chan->cookie = cookie; + txd->cookie = cookie; + + if (list_empty(&td_chan->active_list)) { + dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__, + txd->cookie); + list_add_tail(&td_desc->desc_node, &td_chan->active_list); + __td_start_dma(td_chan); + } else { + dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n", + txd->cookie); + + list_add_tail(&td_desc->desc_node, &td_chan->queue); + } + + spin_unlock_bh(&td_chan->lock); + + return cookie; +} + +static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan) +{ + struct dma_chan *chan = &td_chan->chan; + struct timb_dma_desc *td_desc; + int err; + + td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL); + if (!td_desc) { + dev_err(chan2dev(chan), "Failed to alloc descriptor\n"); + goto err; + } + + td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE; + + td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL); + if (!td_desc->desc_list) { + dev_err(chan2dev(chan), "Failed to alloc descriptor\n"); + goto err; + } + + dma_async_tx_descriptor_init(&td_desc->txd, chan); + td_desc->txd.tx_submit = td_tx_submit; + td_desc->txd.flags = DMA_CTRL_ACK; + + td_desc->txd.phys = dma_map_single(chan2dmadev(chan), + td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE); + + err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys); + if (err) { + dev_err(chan2dev(chan), "DMA mapping error: %d\n", err); + goto err; + } + + return td_desc; +err: + kfree(td_desc->desc_list); + kfree(td_desc); + + return NULL; + +} + +static void td_free_desc(struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc); + dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_TO_DEVICE); + + kfree(td_desc->desc_list); + kfree(td_desc); +} + +static void td_desc_put(struct timb_dma_chan *td_chan, + struct timb_dma_desc *td_desc) +{ + dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc); + + spin_lock_bh(&td_chan->lock); + list_add(&td_desc->desc_node, &td_chan->free_list); + spin_unlock_bh(&td_chan->lock); +} + +static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan) +{ + struct timb_dma_desc *td_desc, *_td_desc; + struct timb_dma_desc *ret = NULL; + + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list, + desc_node) { + if (async_tx_test_ack(&td_desc->txd)) { + list_del(&td_desc->desc_node); + ret = td_desc; + break; + } + dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n", + td_desc); + } + spin_unlock_bh(&td_chan->lock); + + return ret; +} + +static int td_alloc_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + int i; + + dev_dbg(chan2dev(chan), "%s: entry\n", __func__); + + BUG_ON(!list_empty(&td_chan->free_list)); + for (i = 0; i < td_chan->descs; i++) { + struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan); + if (!td_desc) { + if (i) + break; + else { + dev_err(chan2dev(chan), + "Couldnt allocate any descriptors\n"); + return -ENOMEM; + } + } + + td_desc_put(td_chan, td_desc); + } + + spin_lock_bh(&td_chan->lock); + td_chan->last_completed_cookie = 1; + chan->cookie = 1; + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_free_chan_resources(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + LIST_HEAD(list); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + /* check that all descriptors are free */ + BUG_ON(!list_empty(&td_chan->active_list)); + BUG_ON(!list_empty(&td_chan->queue)); + + spin_lock_bh(&td_chan->lock); + list_splice_init(&td_chan->free_list, &list); + spin_unlock_bh(&td_chan->lock); + + list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) { + dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__, + td_desc); + td_free_desc(td_desc); + } +} + +static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + int ret; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + last_complete = td_chan->last_completed_cookie; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + + dma_set_tx_state(txstate, last_complete, last_used, 0); + + dev_dbg(chan2dev(chan), + "%s: exit, ret: %d, last_complete: %d, last_used: %d\n", + __func__, ret, last_complete, last_used); + + return ret; +} + +static void td_issue_pending(struct dma_chan *chan) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + spin_lock_bh(&td_chan->lock); + + if (!list_empty(&td_chan->active_list)) + /* transfer ongoing */ + if (__td_dma_done_ack(td_chan)) + __td_finish(td_chan); + + if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue)) + __td_start_next(td_chan); + + spin_unlock_bh(&td_chan->lock); +} + +static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan, + struct scatterlist *sgl, unsigned int sg_len, + enum dma_data_direction direction, unsigned long flags) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc; + struct scatterlist *sg; + unsigned int i; + unsigned int desc_usage = 0; + + if (!sgl || !sg_len) { + dev_err(chan2dev(chan), "%s: No SG list\n", __func__); + return NULL; + } + + /* even channels are for RX, odd for TX */ + if (td_chan->direction != direction) { + dev_err(chan2dev(chan), + "Requesting channel in wrong direction\n"); + return NULL; + } + + td_desc = td_desc_get(td_chan); + if (!td_desc) { + dev_err(chan2dev(chan), "Not enough descriptors available\n"); + return NULL; + } + + td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0; + + for_each_sg(sgl, sg, sg_len, i) { + int err; + if (desc_usage > td_desc->desc_list_len) { + dev_err(chan2dev(chan), "No descriptor space\n"); + return NULL; + } + + err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg, + i == (sg_len - 1)); + if (err) { + dev_err(chan2dev(chan), "Failed to update desc: %d\n", + err); + td_desc_put(td_chan, td_desc); + return NULL; + } + desc_usage += TIMB_DMA_DESC_SIZE; + } + + dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys, + td_desc->desc_list_len, DMA_TO_DEVICE); + + return &td_desc->txd; +} + +static int td_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct timb_dma_chan *td_chan = + container_of(chan, struct timb_dma_chan, chan); + struct timb_dma_desc *td_desc, *_td_desc; + + dev_dbg(chan2dev(chan), "%s: Entry\n", __func__); + + if (cmd != DMA_TERMINATE_ALL) + return -ENXIO; + + /* first the easy part, put the queue into the free list */ + spin_lock_bh(&td_chan->lock); + list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue, + desc_node) + list_move(&td_desc->desc_node, &td_chan->free_list); + + /* now tear down the runnning */ + __td_finish(td_chan); + spin_unlock_bh(&td_chan->lock); + + return 0; +} + +static void td_tasklet(unsigned long data) +{ + struct timb_dma *td = (struct timb_dma *)data; + u32 isr; + u32 ipr; + u32 ier; + int i; + + isr = ioread32(td->membase + TIMBDMA_ISR); + ipr = isr & __td_ier_mask(td); + + /* ack the interrupts */ + iowrite32(ipr, td->membase + TIMBDMA_ISR); + + for (i = 0; i < td->dma.chancnt; i++) + if (ipr & (1 << i)) { + struct timb_dma_chan *td_chan = td->channels + i; + spin_lock(&td_chan->lock); + __td_finish(td_chan); + if (!list_empty(&td_chan->queue)) + __td_start_next(td_chan); + spin_unlock(&td_chan->lock); + } + + ier = __td_ier_mask(td); + iowrite32(ier, td->membase + TIMBDMA_IER); +} + + +static irqreturn_t td_irq(int irq, void *devid) +{ + struct timb_dma *td = devid; + u32 ipr = ioread32(td->membase + TIMBDMA_IPR); + + if (ipr) { + /* disable interrupts, will be re-enabled in tasklet */ + iowrite32(0, td->membase + TIMBDMA_IER); + + tasklet_schedule(&td->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + + +static int __devinit td_probe(struct platform_device *pdev) +{ + struct timb_dma_platform_data *pdata = pdev->dev.platform_data; + struct timb_dma *td; + struct resource *iomem; + int irq; + int err; + int i; + + if (!pdata) { + dev_err(&pdev->dev, "No platform data\n"); + return -EINVAL; + } + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iomem) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + if (!request_mem_region(iomem->start, resource_size(iomem), + DRIVER_NAME)) + return -EBUSY; + + td = kzalloc(sizeof(struct timb_dma) + + sizeof(struct timb_dma_chan) * pdata->nr_channels, GFP_KERNEL); + if (!td) { + err = -ENOMEM; + goto err_release_region; + } + + dev_dbg(&pdev->dev, "Allocated TD: %p\n", td); + + td->membase = ioremap(iomem->start, resource_size(iomem)); + if (!td->membase) { + dev_err(&pdev->dev, "Failed to remap I/O memory\n"); + err = -ENOMEM; + goto err_free_mem; + } + + /* 32bit addressing */ + iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR); + + /* disable and clear any interrupts */ + iowrite32(0x0, td->membase + TIMBDMA_IER); + iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR); + + tasklet_init(&td->tasklet, td_tasklet, (unsigned long)td); + + err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td); + if (err) { + dev_err(&pdev->dev, "Failed to request IRQ\n"); + goto err_tasklet_kill; + } + + td->dma.device_alloc_chan_resources = td_alloc_chan_resources; + td->dma.device_free_chan_resources = td_free_chan_resources; + td->dma.device_tx_status = td_tx_status; + td->dma.device_issue_pending = td_issue_pending; + + dma_cap_set(DMA_SLAVE, td->dma.cap_mask); + dma_cap_set(DMA_PRIVATE, td->dma.cap_mask); + td->dma.device_prep_slave_sg = td_prep_slave_sg; + td->dma.device_control = td_control; + + td->dma.dev = &pdev->dev; + + INIT_LIST_HEAD(&td->dma.channels); + + for (i = 0; i < pdata->nr_channels; i++, td->dma.chancnt++) { + struct timb_dma_chan *td_chan = &td->channels[i]; + struct timb_dma_platform_data_channel *pchan = + pdata->channels + i; + + /* even channels are RX, odd are TX */ + if (((i % 2) && pchan->rx) || (!(i % 2) && !pchan->rx)) { + dev_err(&pdev->dev, "Wrong channel configuration\n"); + err = -EINVAL; + goto err_tasklet_kill; + } + + td_chan->chan.device = &td->dma; + td_chan->chan.cookie = 1; + td_chan->chan.chan_id = i; + spin_lock_init(&td_chan->lock); + INIT_LIST_HEAD(&td_chan->active_list); + INIT_LIST_HEAD(&td_chan->queue); + INIT_LIST_HEAD(&td_chan->free_list); + + td_chan->descs = pchan->descriptors; + td_chan->desc_elems = pchan->descriptor_elements; + td_chan->bytes_per_line = pchan->bytes_per_line; + td_chan->direction = pchan->rx ? DMA_FROM_DEVICE : + DMA_TO_DEVICE; + + td_chan->membase = td->membase + + (i / 2) * TIMBDMA_INSTANCE_OFFSET + + (pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET); + + dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n", + i, td_chan->membase); + + list_add_tail(&td_chan->chan.device_node, &td->dma.channels); + } + + err = dma_async_device_register(&td->dma); + if (err) { + dev_err(&pdev->dev, "Failed to register async device\n"); + goto err_free_irq; + } + + platform_set_drvdata(pdev, td); + + dev_dbg(&pdev->dev, "Probe result: %d\n", err); + return err; + +err_free_irq: + free_irq(irq, td); +err_tasklet_kill: + tasklet_kill(&td->tasklet); + iounmap(td->membase); +err_free_mem: + kfree(td); +err_release_region: + release_mem_region(iomem->start, resource_size(iomem)); + + return err; + +} + +static int __devexit td_remove(struct platform_device *pdev) +{ + struct timb_dma *td = platform_get_drvdata(pdev); + struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + + dma_async_device_unregister(&td->dma); + free_irq(irq, td); + tasklet_kill(&td->tasklet); + iounmap(td->membase); + kfree(td); + release_mem_region(iomem->start, resource_size(iomem)); + + platform_set_drvdata(pdev, NULL); + + dev_dbg(&pdev->dev, "Removed...\n"); + return 0; +} + +static struct platform_driver td_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = td_probe, + .remove = __exit_p(td_remove), +}; + +static int __init td_init(void) +{ + return platform_driver_register(&td_driver); +} +module_init(td_init); + +static void __exit td_exit(void) +{ + platform_driver_unregister(&td_driver); +} +module_exit(td_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Timberdale DMA controller driver"); +MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>"); +MODULE_ALIAS("platform:"DRIVER_NAME); diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 75fcf1ac8bb7..cbd83e362b5e 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -938,12 +938,17 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, return &first->txd; } -static void txx9dmac_terminate_all(struct dma_chan *chan) +static int txx9dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) { struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); struct txx9dmac_desc *desc, *_desc; LIST_HEAD(list); + /* Only supports DMA_TERMINATE_ALL */ + if (cmd != DMA_TERMINATE_ALL) + return -EINVAL; + dev_vdbg(chan2dev(chan), "terminate_all\n"); spin_lock_bh(&dc->lock); @@ -958,12 +963,13 @@ static void txx9dmac_terminate_all(struct dma_chan *chan) /* Flush all pending and queued descriptors */ list_for_each_entry_safe(desc, _desc, &list, desc_node) txx9dmac_descriptor_complete(dc, desc); + + return 0; } static enum dma_status -txx9dmac_is_tx_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, + struct dma_tx_state *txstate) { struct txx9dmac_chan *dc = to_txx9dmac_chan(chan); dma_cookie_t last_used; @@ -985,10 +991,7 @@ txx9dmac_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - if (done) - *done = last_complete; - if (used) - *used = last_used; + dma_set_tx_state(txstate, last_complete, last_used, 0); return ret; } @@ -1153,8 +1156,8 @@ static int __init txx9dmac_chan_probe(struct platform_device *pdev) dc->dma.dev = &pdev->dev; dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources; dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources; - dc->dma.device_terminate_all = txx9dmac_terminate_all; - dc->dma.device_is_tx_complete = txx9dmac_is_tx_complete; + dc->dma.device_control = txx9dmac_control; + dc->dma.device_tx_status = txx9dmac_tx_status; dc->dma.device_issue_pending = txx9dmac_issue_pending; if (pdata && pdata->memcpy_chan == ch) { dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy; |