From 61f135b92f4758bc4d4767cd0a5d2da954e27f14 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 19 Nov 2009 19:49:17 +0100 Subject: Add COH 901 318 DMA block driver v5 This patch adds support for the ST-Ericsson COH 901 318 DMA block, found in the U300 series platforms. It registers a DMA slave for device I/O and also a memcpy slave for memcpy. Signed-off-by: Linus Walleij Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 7 + drivers/dma/Makefile | 1 + drivers/dma/coh901318.c | 1325 +++++++++++++++++++++++++++++++++++++++++++ drivers/dma/coh901318_lli.c | 318 +++++++++++ drivers/dma/coh901318_lli.h | 124 ++++ 5 files changed, 1775 insertions(+) create mode 100644 drivers/dma/coh901318.c create mode 100644 drivers/dma/coh901318_lli.c create mode 100644 drivers/dma/coh901318_lli.h (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 5903a88351bf..24cdd20fe462 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -109,6 +109,13 @@ config SH_DMAE help Enable support for the Renesas SuperH DMA controllers. +config COH901318 + bool "ST-Ericsson COH901318 DMA support" + select DMA_ENGINE + depends on ARCH_U300 + help + Enable support for ST-Ericsson COH 901 318 DMA. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index eca71ba78ae9..4db768e09cf3 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_SH_DMAE) += shdma.o +obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c new file mode 100644 index 000000000000..4a99cd94536b --- /dev/null +++ b/drivers/dma/coh901318.c @@ -0,0 +1,1325 @@ +/* + * driver/dma/coh901318.c + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * DMA driver for COH 901 318 + * Author: Per Friden + */ + +#include +#include +#include /* printk() */ +#include /* everything... */ +#include /* kmalloc() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coh901318_lli.h" + +#define COHC_2_DEV(cohc) (&cohc->chan.dev->device) + +#ifdef VERBOSE_DEBUG +#define COH_DBG(x) ({ if (1) x; 0; }) +#else +#define COH_DBG(x) ({ if (0) x; 0; }) +#endif + +struct coh901318_desc { + struct dma_async_tx_descriptor desc; + struct list_head node; + struct scatterlist *sg; + unsigned int sg_len; + struct coh901318_lli *data; + enum dma_data_direction dir; + int pending_irqs; + unsigned long flags; +}; + +struct coh901318_base { + struct device *dev; + void __iomem *virtbase; + struct coh901318_pool pool; + struct powersave pm; + struct dma_device dma_slave; + struct dma_device dma_memcpy; + struct coh901318_chan *chans; + struct coh901318_platform *platform; +}; + +struct coh901318_chan { + spinlock_t lock; + int allocated; + int completed; + int id; + int stopped; + + struct work_struct free_work; + struct dma_chan chan; + + struct tasklet_struct tasklet; + + struct list_head active; + struct list_head queue; + struct list_head free; + + unsigned long nbr_active_done; + unsigned long busy; + int pending_irqs; + + struct coh901318_base *base; +}; + +static void coh901318_list_print(struct coh901318_chan *cohc, + struct coh901318_lli *lli) +{ + struct coh901318_lli *l; + dma_addr_t addr = virt_to_phys(lli); + int i = 0; + + while (addr) { + l = phys_to_virt(addr); + dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src 0x%x" + ", dst 0x%x, link 0x%x link_virt 0x%p\n", + i, l, l->control, l->src_addr, l->dst_addr, + l->link_addr, phys_to_virt(l->link_addr)); + i++; + addr = l->link_addr; + } +} + +#ifdef CONFIG_DEBUG_FS + +#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y) + +static struct coh901318_base *debugfs_dma_base; +static struct dentry *dma_dentry; + +static int coh901318_debugfs_open(struct inode *inode, struct file *file) +{ + + file->private_data = inode->i_private; + return 0; +} + +static int coh901318_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *f_pos) +{ + u64 started_channels = debugfs_dma_base->pm.started_channels; + int pool_count = debugfs_dma_base->pool.debugfs_pool_counter; + int i; + int ret = 0; + char *dev_buf; + char *tmp; + int dev_size; + + dev_buf = kmalloc(4*1024, GFP_KERNEL); + if (dev_buf == NULL) + goto err_kmalloc; + tmp = dev_buf; + + tmp += sprintf(tmp, "DMA -- enable dma channels\n"); + + for (i = 0; i < debugfs_dma_base->platform->max_channels; i++) + if (started_channels & (1 << i)) + tmp += sprintf(tmp, "channel %d\n", i); + + tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count); + dev_size = tmp - dev_buf; + + /* No more to read if offset != 0 */ + if (*f_pos > dev_size) + goto out; + + if (count > dev_size - *f_pos) + count = dev_size - *f_pos; + + if (copy_to_user(buf, dev_buf + *f_pos, count)) + ret = -EINVAL; + ret = count; + *f_pos += count; + + out: + kfree(dev_buf); + return ret; + + err_kmalloc: + return 0; +} + +static const struct file_operations coh901318_debugfs_status_operations = { + .owner = THIS_MODULE, + .open = coh901318_debugfs_open, + .read = coh901318_debugfs_read, +}; + + +static int __init init_coh901318_debugfs(void) +{ + + dma_dentry = debugfs_create_dir("dma", NULL); + + (void) debugfs_create_file("status", + S_IFREG | S_IRUGO, + dma_dentry, NULL, + &coh901318_debugfs_status_operations); + return 0; +} + +static void __exit exit_coh901318_debugfs(void) +{ + debugfs_remove_recursive(dma_dentry); +} + +module_init(init_coh901318_debugfs); +module_exit(exit_coh901318_debugfs); +#else + +#define COH901318_DEBUGFS_ASSIGN(x, y) + +#endif /* CONFIG_DEBUG_FS */ + +static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan) +{ + return container_of(chan, struct coh901318_chan, chan); +} + +static inline dma_addr_t +cohc_dev_addr(struct coh901318_chan *cohc) +{ + return cohc->base->platform->chan_conf[cohc->id].dev_addr; +} + +static inline const struct coh901318_params * +cohc_chan_param(struct coh901318_chan *cohc) +{ + return &cohc->base->platform->chan_conf[cohc->id].param; +} + +static inline const struct coh_dma_channel * +cohc_chan_conf(struct coh901318_chan *cohc) +{ + return &cohc->base->platform->chan_conf[cohc->id]; +} + +static void enable_powersave(struct coh901318_chan *cohc) +{ + unsigned long flags; + struct powersave *pm = &cohc->base->pm; + + spin_lock_irqsave(&pm->lock, flags); + + pm->started_channels &= ~(1ULL << cohc->id); + + if (!pm->started_channels) { + /* DMA no longer intends to access memory */ + cohc->base->platform->access_memory_state(cohc->base->dev, + false); + } + + spin_unlock_irqrestore(&pm->lock, flags); +} +static void disable_powersave(struct coh901318_chan *cohc) +{ + unsigned long flags; + struct powersave *pm = &cohc->base->pm; + + spin_lock_irqsave(&pm->lock, flags); + + if (!pm->started_channels) { + /* DMA intends to access memory */ + cohc->base->platform->access_memory_state(cohc->base->dev, + true); + } + + pm->started_channels |= (1ULL << cohc->id); + + spin_unlock_irqrestore(&pm->lock, flags); +} + +static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + writel(control, + virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * channel); + return 0; +} + +static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + writel(conf, + virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + return 0; +} + + +static int coh901318_start(struct coh901318_chan *cohc) +{ + u32 val; + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + disable_powersave(cohc); + + val = readl(virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Enable channel */ + val |= COH901318_CX_CFG_CH_ENABLE; + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + return 0; +} + +static int coh901318_prep_linked_list(struct coh901318_chan *cohc, + struct coh901318_lli *data) +{ + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + BUG_ON(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*channel) & + COH901318_CX_STAT_ACTIVE); + + writel(data->src_addr, + virtbase + COH901318_CX_SRC_ADDR + + COH901318_CX_SRC_ADDR_SPACING * channel); + + writel(data->dst_addr, virtbase + + COH901318_CX_DST_ADDR + + COH901318_CX_DST_ADDR_SPACING * channel); + + writel(data->link_addr, virtbase + COH901318_CX_LNK_ADDR + + COH901318_CX_LNK_ADDR_SPACING * channel); + + writel(data->control, virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING * channel); + + return 0; +} +static dma_cookie_t +coh901318_assign_cookie(struct coh901318_chan *cohc, + struct coh901318_desc *cohd) +{ + dma_cookie_t cookie = cohc->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + cohc->chan.cookie = cookie; + cohd->desc.cookie = cookie; + + return cookie; +} + +static struct coh901318_desc * +coh901318_desc_get(struct coh901318_chan *cohc) +{ + struct coh901318_desc *desc; + + if (list_empty(&cohc->free)) { + /* alloc new desc because we're out of used ones + * TODO: alloc a pile of descs instead of just one, + * avoid many small allocations. + */ + desc = kmalloc(sizeof(struct coh901318_desc), GFP_NOWAIT); + if (desc == NULL) + goto out; + INIT_LIST_HEAD(&desc->node); + } else { + /* Reuse an old desc. */ + desc = list_first_entry(&cohc->free, + struct coh901318_desc, + node); + list_del(&desc->node); + } + + out: + return desc; +} + +static void +coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd) +{ + list_add_tail(&cohd->node, &cohc->free); +} + +/* call with irq lock held */ +static void +coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc) +{ + list_add_tail(&desc->node, &cohc->active); + + BUG_ON(cohc->pending_irqs != 0); + + cohc->pending_irqs = desc->pending_irqs; +} + +static struct coh901318_desc * +coh901318_first_active_get(struct coh901318_chan *cohc) +{ + struct coh901318_desc *d; + + if (list_empty(&cohc->active)) + return NULL; + + d = list_first_entry(&cohc->active, + struct coh901318_desc, + node); + return d; +} + +static void +coh901318_desc_remove(struct coh901318_desc *cohd) +{ + list_del(&cohd->node); +} + +static void +coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc) +{ + list_add_tail(&desc->node, &cohc->queue); +} + +static struct coh901318_desc * +coh901318_first_queued(struct coh901318_chan *cohc) +{ + struct coh901318_desc *d; + + if (list_empty(&cohc->queue)) + return NULL; + + d = list_first_entry(&cohc->queue, + struct coh901318_desc, + node); + return d; +} + +/* + * DMA start/stop controls + */ +u32 coh901318_get_bytes_left(struct dma_chan *chan) +{ + unsigned long flags; + u32 ret; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + + spin_lock_irqsave(&cohc->lock, flags); + + /* Read transfer count value */ + ret = readl(cohc->base->virtbase + + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING * + cohc->id) & COH901318_CX_CTRL_TC_VALUE_MASK; + + spin_unlock_irqrestore(&cohc->lock, flags); + + return ret; +} +EXPORT_SYMBOL(coh901318_get_bytes_left); + + +/* Stops a transfer without losing data. Enables power save. + Use this function in conjunction with coh901318_continue(..) +*/ +void coh901318_stop(struct dma_chan *chan) +{ + u32 val; + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + spin_lock_irqsave(&cohc->lock, flags); + + /* Disable channel in HW */ + val = readl(virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Stopping infinit transfer */ + if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 && + (val & COH901318_CX_CFG_CH_ENABLE)) + cohc->stopped = 1; + + + val &= ~COH901318_CX_CFG_CH_ENABLE; + /* Enable twice, HW bug work around */ + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + writel(val, virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + /* Spin-wait for it to actually go inactive */ + while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING * + channel) & COH901318_CX_STAT_ACTIVE) + cpu_relax(); + + /* Check if we stopped an active job */ + if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING * + channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0) + cohc->stopped = 1; + + enable_powersave(cohc); + + spin_unlock_irqrestore(&cohc->lock, flags); +} +EXPORT_SYMBOL(coh901318_stop); + +/* Continues a transfer that has been stopped via 300_dma_stop(..). + Power save is handled. +*/ +void coh901318_continue(struct dma_chan *chan) +{ + u32 val; + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + + spin_lock_irqsave(&cohc->lock, flags); + + disable_powersave(cohc); + + if (cohc->stopped) { + /* Enable channel in HW */ + val = readl(cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING * channel); + + val |= COH901318_CX_CFG_CH_ENABLE; + + writel(val, cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + + cohc->stopped = 0; + } + + spin_unlock_irqrestore(&cohc->lock, flags); +} +EXPORT_SYMBOL(coh901318_continue); + +bool coh901318_filter_id(struct dma_chan *chan, void *chan_id) +{ + unsigned int ch_nr = (unsigned int) chan_id; + + if (ch_nr == to_coh901318_chan(chan)->id) + return true; + + return false; +} +EXPORT_SYMBOL(coh901318_filter_id); + +/* + * DMA channel allocation + */ +static int coh901318_config(struct coh901318_chan *cohc, + struct coh901318_params *param) +{ + unsigned long flags; + const struct coh901318_params *p; + int channel = cohc->id; + void __iomem *virtbase = cohc->base->virtbase; + + spin_lock_irqsave(&cohc->lock, flags); + + if (param) + p = param; + else + p = &cohc->base->platform->chan_conf[channel].param; + + /* Clear any pending BE or TC interrupt */ + if (channel < 32) { + writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1); + writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1); + } else { + writel(1 << (channel - 32), virtbase + + COH901318_BE_INT_CLEAR2); + writel(1 << (channel - 32), virtbase + + COH901318_TC_INT_CLEAR2); + } + + coh901318_set_conf(cohc, p->config); + coh901318_set_ctrl(cohc, p->ctrl_lli_last); + + spin_unlock_irqrestore(&cohc->lock, flags); + + return 0; +} + +/* must lock when calling this function + * start queued jobs, if any + * TODO: start all queued jobs in one go + * + * Returns descriptor if queued job is started otherwise NULL. + * If the queue is empty NULL is returned. + */ +static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) +{ + struct coh901318_desc *cohd_que; + + /* start queued jobs, if any + * TODO: transmit all queued jobs in one go + */ + cohd_que = coh901318_first_queued(cohc); + + if (cohd_que != NULL) { + /* Remove from queue */ + coh901318_desc_remove(cohd_que); + /* initiate DMA job */ + cohc->busy = 1; + + coh901318_desc_submit(cohc, cohd_que); + + coh901318_prep_linked_list(cohc, cohd_que->data); + + /* start dma job */ + coh901318_start(cohc); + + } + + return cohd_que; +} + +static void dma_tasklet(unsigned long data) +{ + struct coh901318_chan *cohc = (struct coh901318_chan *) data; + struct coh901318_desc *cohd_fin; + unsigned long flags; + dma_async_tx_callback callback; + void *callback_param; + + spin_lock_irqsave(&cohc->lock, flags); + + /* get first active entry from list */ + cohd_fin = coh901318_first_active_get(cohc); + + BUG_ON(cohd_fin->pending_irqs == 0); + + if (cohd_fin == NULL) + goto err; + + cohd_fin->pending_irqs--; + cohc->completed = cohd_fin->desc.cookie; + + BUG_ON(cohc->nbr_active_done && cohd_fin == NULL); + + if (cohc->nbr_active_done == 0) + return; + + if (!cohd_fin->pending_irqs) { + /* release the lli allocation*/ + coh901318_lli_free(&cohc->base->pool, &cohd_fin->data); + } + + dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d pending_irqs %d" + " nbr_active_done %ld\n", __func__, + cohc->id, cohc->pending_irqs, cohc->nbr_active_done); + + /* callback to client */ + callback = cohd_fin->desc.callback; + callback_param = cohd_fin->desc.callback_param; + + if (!cohd_fin->pending_irqs) { + coh901318_desc_remove(cohd_fin); + + /* return desc to free-list */ + coh901318_desc_free(cohc, cohd_fin); + } + + if (cohc->nbr_active_done) + cohc->nbr_active_done--; + + if (cohc->nbr_active_done) { + if (cohc_chan_conf(cohc)->priority_high) + tasklet_hi_schedule(&cohc->tasklet); + else + tasklet_schedule(&cohc->tasklet); + } + spin_unlock_irqrestore(&cohc->lock, flags); + + if (callback) + callback(callback_param); + + return; + + err: + spin_unlock_irqrestore(&cohc->lock, flags); + dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__); +} + + +/* called from interrupt context */ +static void dma_tc_handle(struct coh901318_chan *cohc) +{ + BUG_ON(!cohc->allocated && (list_empty(&cohc->active) || + list_empty(&cohc->queue))); + + if (!cohc->allocated) + return; + + BUG_ON(cohc->pending_irqs == 0); + + cohc->pending_irqs--; + cohc->nbr_active_done++; + + if (cohc->pending_irqs == 0 && coh901318_queue_start(cohc) == NULL) + cohc->busy = 0; + + BUG_ON(list_empty(&cohc->active)); + + if (cohc_chan_conf(cohc)->priority_high) + tasklet_hi_schedule(&cohc->tasklet); + else + tasklet_schedule(&cohc->tasklet); +} + + +static irqreturn_t dma_irq_handler(int irq, void *dev_id) +{ + u32 status1; + u32 status2; + int i; + int ch; + struct coh901318_base *base = dev_id; + struct coh901318_chan *cohc; + void __iomem *virtbase = base->virtbase; + + status1 = readl(virtbase + COH901318_INT_STATUS1); + status2 = readl(virtbase + COH901318_INT_STATUS2); + + if (unlikely(status1 == 0 && status2 == 0)) { + dev_warn(base->dev, "spurious DMA IRQ from no channel!\n"); + return IRQ_HANDLED; + } + + /* TODO: consider handle IRQ in tasklet here to + * minimize interrupt latency */ + + /* Check the first 32 DMA channels for IRQ */ + while (status1) { + /* Find first bit set, return as a number. */ + i = ffs(status1) - 1; + ch = i; + + cohc = &base->chans[ch]; + spin_lock(&cohc->lock); + + /* Mask off this bit */ + status1 &= ~(1 << i); + /* Check the individual channel bits */ + if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) { + dev_crit(COHC_2_DEV(cohc), + "DMA bus error on channel %d!\n", ch); + BUG_ON(1); + /* Clear BE interrupt */ + __set_bit(i, virtbase + COH901318_BE_INT_CLEAR1); + } else { + /* Caused by TC, really? */ + if (unlikely(!test_bit(i, virtbase + + COH901318_TC_INT_STATUS1))) { + dev_warn(COHC_2_DEV(cohc), + "ignoring interrupt not caused by terminal count on channel %d\n", ch); + /* Clear TC interrupt */ + BUG_ON(1); + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1); + } else { + /* Enable powersave if transfer has finished */ + if (!(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*ch) & + COH901318_CX_STAT_ENABLED)) { + enable_powersave(cohc); + } + + /* Must clear TC interrupt before calling + * dma_tc_handle + * in case tc_handle initate a new dma job + */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1); + + dma_tc_handle(cohc); + } + } + spin_unlock(&cohc->lock); + } + + /* Check the remaining 32 DMA channels for IRQ */ + while (status2) { + /* Find first bit set, return as a number. */ + i = ffs(status2) - 1; + ch = i + 32; + cohc = &base->chans[ch]; + spin_lock(&cohc->lock); + + /* Mask off this bit */ + status2 &= ~(1 << i); + /* Check the individual channel bits */ + if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) { + dev_crit(COHC_2_DEV(cohc), + "DMA bus error on channel %d!\n", ch); + /* Clear BE interrupt */ + BUG_ON(1); + __set_bit(i, virtbase + COH901318_BE_INT_CLEAR2); + } else { + /* Caused by TC, really? */ + if (unlikely(!test_bit(i, virtbase + + COH901318_TC_INT_STATUS2))) { + dev_warn(COHC_2_DEV(cohc), + "ignoring interrupt not caused by terminal count on channel %d\n", ch); + /* Clear TC interrupt */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2); + BUG_ON(1); + } else { + /* Enable powersave if transfer has finished */ + if (!(readl(virtbase + COH901318_CX_STAT + + COH901318_CX_STAT_SPACING*ch) & + COH901318_CX_STAT_ENABLED)) { + enable_powersave(cohc); + } + /* Must clear TC interrupt before calling + * dma_tc_handle + * in case tc_handle initate a new dma job + */ + __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2); + + dma_tc_handle(cohc); + } + } + spin_unlock(&cohc->lock); + } + + return IRQ_HANDLED; +} + +static int coh901318_alloc_chan_resources(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + + dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n", + __func__, cohc->id); + + if (chan->client_count > 1) + return -EBUSY; + + coh901318_config(cohc, NULL); + + cohc->allocated = 1; + cohc->completed = chan->cookie = 1; + + return 1; +} + +static void +coh901318_free_chan_resources(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int channel = cohc->id; + unsigned long flags; + + spin_lock_irqsave(&cohc->lock, flags); + + /* Disable HW */ + writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG + + COH901318_CX_CFG_SPACING*channel); + writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL + + COH901318_CX_CTRL_SPACING*channel); + + cohc->allocated = 0; + + spin_unlock_irqrestore(&cohc->lock, flags); + + chan->device->device_terminate_all(chan); +} + + +static dma_cookie_t +coh901318_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc, + desc); + struct coh901318_chan *cohc = to_coh901318_chan(tx->chan); + unsigned long flags; + + spin_lock_irqsave(&cohc->lock, flags); + + tx->cookie = coh901318_assign_cookie(cohc, cohd); + + coh901318_desc_queue(cohc, cohd); + + spin_unlock_irqrestore(&cohc->lock, flags); + + return tx->cookie; +} + +static struct dma_async_tx_descriptor * +coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t size, unsigned long flags) +{ + struct coh901318_lli *data; + struct coh901318_desc *cohd; + unsigned long flg; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + int lli_len; + u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last; + + spin_lock_irqsave(&cohc->lock, flg); + + dev_vdbg(COHC_2_DEV(cohc), + "[%s] channel %d src 0x%x dest 0x%x size %d\n", + __func__, cohc->id, src, dest, size); + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last lli */ + ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE; + + lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT; + if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size) + lli_len++; + + data = coh901318_lli_alloc(&cohc->base->pool, lli_len); + + if (data == NULL) + goto err; + + cohd = coh901318_desc_get(cohc); + cohd->sg = NULL; + cohd->sg_len = 0; + cohd->data = data; + + cohd->pending_irqs = + coh901318_lli_fill_memcpy( + &cohc->base->pool, data, src, size, dest, + cohc_chan_param(cohc)->ctrl_lli_chained, + ctrl_last); + cohd->flags = flags; + + COH_DBG(coh901318_list_print(cohc, data)); + + dma_async_tx_descriptor_init(&cohd->desc, chan); + + cohd->desc.tx_submit = coh901318_tx_submit; + + spin_unlock_irqrestore(&cohc->lock, flg); + + return &cohd->desc; + err: + spin_unlock_irqrestore(&cohc->lock, flg); + return NULL; +} + +static struct dma_async_tx_descriptor * +coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_lli *data; + struct coh901318_desc *cohd; + struct scatterlist *sg; + int len = 0; + int size; + int i; + u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained; + u32 ctrl = cohc_chan_param(cohc)->ctrl_lli; + u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last; + unsigned long flg; + + if (!sgl) + goto out; + if (sgl->length == 0) + goto out; + + spin_lock_irqsave(&cohc->lock, flg); + + dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n", + __func__, sg_len, direction); + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last lli */ + ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE; + + cohd = coh901318_desc_get(cohc); + cohd->sg = NULL; + cohd->sg_len = 0; + cohd->dir = direction; + + if (direction == DMA_TO_DEVICE) { + u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE | + COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE; + + ctrl_chained |= tx_flags; + ctrl_last |= tx_flags; + ctrl |= tx_flags; + } else if (direction == DMA_FROM_DEVICE) { + u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST | + COH901318_CX_CTRL_DST_ADDR_INC_ENABLE; + + ctrl_chained |= rx_flags; + ctrl_last |= rx_flags; + ctrl |= rx_flags; + } else + goto err_direction; + + dma_async_tx_descriptor_init(&cohd->desc, chan); + + cohd->desc.tx_submit = coh901318_tx_submit; + + + /* The dma only supports transmitting packages up to + * MAX_DMA_PACKET_SIZE. Calculate to total number of + * dma elemts required to send the entire sg list + */ + for_each_sg(sgl, sg, sg_len, i) { + unsigned int factor; + size = sg_dma_len(sg); + + if (size <= MAX_DMA_PACKET_SIZE) { + len++; + continue; + } + + factor = size >> MAX_DMA_PACKET_SIZE_SHIFT; + if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size) + factor++; + + len += factor; + } + + data = coh901318_lli_alloc(&cohc->base->pool, len); + + if (data == NULL) + goto err_dma_alloc; + + /* initiate allocated data list */ + cohd->pending_irqs = + coh901318_lli_fill_sg(&cohc->base->pool, data, sgl, sg_len, + cohc_dev_addr(cohc), + ctrl_chained, + ctrl, + ctrl_last, + direction, COH901318_CX_CTRL_TC_IRQ_ENABLE); + cohd->data = data; + + cohd->flags = flags; + + COH_DBG(coh901318_list_print(cohc, data)); + + spin_unlock_irqrestore(&cohc->lock, flg); + + return &cohd->desc; + err_dma_alloc: + err_direction: + coh901318_desc_remove(cohd); + coh901318_desc_free(cohc, cohd); + spin_unlock_irqrestore(&cohc->lock, flg); + out: + return NULL; +} + +static enum dma_status +coh901318_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *done, + dma_cookie_t *used) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + int ret; + + last_complete = cohc->completed; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return ret; +} + +static void +coh901318_issue_pending(struct dma_chan *chan) +{ + struct coh901318_chan *cohc = to_coh901318_chan(chan); + unsigned long flags; + + spin_lock_irqsave(&cohc->lock, flags); + + /* Busy means that pending jobs are already being processed */ + if (!cohc->busy) + coh901318_queue_start(cohc); + + spin_unlock_irqrestore(&cohc->lock, flags); +} + +static void +coh901318_terminate_all(struct dma_chan *chan) +{ + unsigned long flags; + struct coh901318_chan *cohc = to_coh901318_chan(chan); + struct coh901318_desc *cohd; + void __iomem *virtbase = cohc->base->virtbase; + + coh901318_stop(chan); + + spin_lock_irqsave(&cohc->lock, flags); + + /* Clear any pending BE or TC interrupt */ + if (cohc->id < 32) { + writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1); + writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1); + } else { + writel(1 << (cohc->id - 32), virtbase + + COH901318_BE_INT_CLEAR2); + writel(1 << (cohc->id - 32), virtbase + + COH901318_TC_INT_CLEAR2); + } + + enable_powersave(cohc); + + while ((cohd = coh901318_first_active_get(cohc))) { + /* release the lli allocation*/ + coh901318_lli_free(&cohc->base->pool, &cohd->data); + + coh901318_desc_remove(cohd); + + /* return desc to free-list */ + coh901318_desc_free(cohc, cohd); + } + + while ((cohd = coh901318_first_queued(cohc))) { + /* release the lli allocation*/ + coh901318_lli_free(&cohc->base->pool, &cohd->data); + + coh901318_desc_remove(cohd); + + /* return desc to free-list */ + coh901318_desc_free(cohc, cohd); + } + + + cohc->nbr_active_done = 0; + cohc->busy = 0; + cohc->pending_irqs = 0; + + spin_unlock_irqrestore(&cohc->lock, flags); +} +void coh901318_base_init(struct dma_device *dma, const int *pick_chans, + struct coh901318_base *base) +{ + int chans_i; + int i = 0; + struct coh901318_chan *cohc; + + INIT_LIST_HEAD(&dma->channels); + + for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) { + for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) { + cohc = &base->chans[i]; + + cohc->base = base; + cohc->chan.device = dma; + cohc->id = i; + + /* TODO: do we really need this lock if only one + * client is connected to each channel? + */ + + spin_lock_init(&cohc->lock); + + cohc->pending_irqs = 0; + cohc->nbr_active_done = 0; + cohc->busy = 0; + INIT_LIST_HEAD(&cohc->free); + INIT_LIST_HEAD(&cohc->active); + INIT_LIST_HEAD(&cohc->queue); + + tasklet_init(&cohc->tasklet, dma_tasklet, + (unsigned long) cohc); + + list_add_tail(&cohc->chan.device_node, + &dma->channels); + } + } +} + +static int __init coh901318_probe(struct platform_device *pdev) +{ + int err = 0; + struct coh901318_platform *pdata; + struct coh901318_base *base; + int irq; + struct resource *io; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + goto err_get_resource; + + /* Map DMA controller registers to virtual memory */ + if (request_mem_region(io->start, + resource_size(io), + pdev->dev.driver->name) == NULL) { + err = -EBUSY; + goto err_request_mem; + } + + pdata = pdev->dev.platform_data; + if (!pdata) + goto err_no_platformdata; + + base = kmalloc(ALIGN(sizeof(struct coh901318_base), 4) + + pdata->max_channels * + sizeof(struct coh901318_chan), + GFP_KERNEL); + if (!base) + goto err_alloc_coh_dma_channels; + + base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4); + + base->virtbase = ioremap(io->start, resource_size(io)); + if (!base->virtbase) { + err = -ENOMEM; + goto err_no_ioremap; + } + + base->dev = &pdev->dev; + base->platform = pdata; + spin_lock_init(&base->pm.lock); + base->pm.started_channels = 0; + + COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base); + + platform_set_drvdata(pdev, base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + goto err_no_irq; + + err = request_irq(irq, dma_irq_handler, IRQF_DISABLED, + "coh901318", base); + if (err) { + dev_crit(&pdev->dev, + "Cannot allocate IRQ for DMA controller!\n"); + goto err_request_irq; + } + + err = coh901318_pool_create(&base->pool, &pdev->dev, + sizeof(struct coh901318_lli), + 32); + if (err) + goto err_pool_create; + + /* init channels for device transfers */ + coh901318_base_init(&base->dma_slave, base->platform->chans_slave, + base); + + dma_cap_zero(base->dma_slave.cap_mask); + dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask); + + base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources; + base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources; + base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg; + base->dma_slave.device_is_tx_complete = coh901318_is_tx_complete; + base->dma_slave.device_issue_pending = coh901318_issue_pending; + base->dma_slave.device_terminate_all = coh901318_terminate_all; + base->dma_slave.dev = &pdev->dev; + + err = dma_async_device_register(&base->dma_slave); + + if (err) + goto err_register_slave; + + /* init channels for memcpy */ + coh901318_base_init(&base->dma_memcpy, base->platform->chans_memcpy, + base); + + dma_cap_zero(base->dma_memcpy.cap_mask); + dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); + + base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources; + base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources; + base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy; + base->dma_memcpy.device_is_tx_complete = coh901318_is_tx_complete; + base->dma_memcpy.device_issue_pending = coh901318_issue_pending; + base->dma_memcpy.device_terminate_all = coh901318_terminate_all; + base->dma_memcpy.dev = &pdev->dev; + err = dma_async_device_register(&base->dma_memcpy); + + if (err) + goto err_register_memcpy; + + dev_dbg(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n", + (u32) base->virtbase); + + return err; + + err_register_memcpy: + dma_async_device_unregister(&base->dma_slave); + err_register_slave: + coh901318_pool_destroy(&base->pool); + err_pool_create: + free_irq(platform_get_irq(pdev, 0), base); + err_request_irq: + err_no_irq: + iounmap(base->virtbase); + err_no_ioremap: + kfree(base); + err_alloc_coh_dma_channels: + err_no_platformdata: + release_mem_region(pdev->resource->start, + resource_size(pdev->resource)); + err_request_mem: + err_get_resource: + return err; +} + +static int __exit coh901318_remove(struct platform_device *pdev) +{ + struct coh901318_base *base = platform_get_drvdata(pdev); + + dma_async_device_unregister(&base->dma_memcpy); + dma_async_device_unregister(&base->dma_slave); + coh901318_pool_destroy(&base->pool); + free_irq(platform_get_irq(pdev, 0), base); + kfree(base); + iounmap(base->virtbase); + release_mem_region(pdev->resource->start, + resource_size(pdev->resource)); + return 0; +} + + +static struct platform_driver coh901318_driver = { + .remove = __exit_p(coh901318_remove), + .driver = { + .name = "coh901318", + }, +}; + +int __init coh901318_init(void) +{ + return platform_driver_probe(&coh901318_driver, coh901318_probe); +} +subsys_initcall(coh901318_init); + +void __exit coh901318_exit(void) +{ + platform_driver_unregister(&coh901318_driver); +} +module_exit(coh901318_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Per Friden"); diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c new file mode 100644 index 000000000000..f5120f238a4d --- /dev/null +++ b/drivers/dma/coh901318_lli.c @@ -0,0 +1,318 @@ +/* + * driver/dma/coh901318_lli.c + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * Support functions for handling lli for dma + * Author: Per Friden + */ + +#include +#include +#include +#include +#include + +#include "coh901318_lli.h" + +#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG)) +#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0) +#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add) +#else +#define DEBUGFS_POOL_COUNTER_RESET(pool) +#define DEBUGFS_POOL_COUNTER_ADD(pool, add) +#endif + +static struct coh901318_lli * +coh901318_lli_next(struct coh901318_lli *data) +{ + if (data == NULL || data->link_addr == 0) + return NULL; + + return (struct coh901318_lli *) data->virt_link_addr; +} + +int coh901318_pool_create(struct coh901318_pool *pool, + struct device *dev, + size_t size, size_t align) +{ + spin_lock_init(&pool->lock); + pool->dev = dev; + pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0); + + DEBUGFS_POOL_COUNTER_RESET(pool); + return 0; +} + +int coh901318_pool_destroy(struct coh901318_pool *pool) +{ + + dma_pool_destroy(pool->dmapool); + return 0; +} + +struct coh901318_lli * +coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len) +{ + int i; + struct coh901318_lli *head; + struct coh901318_lli *lli; + struct coh901318_lli *lli_prev; + dma_addr_t phy; + + if (len == 0) + goto err; + + spin_lock(&pool->lock); + + head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy); + + if (head == NULL) + goto err; + + DEBUGFS_POOL_COUNTER_ADD(pool, 1); + + lli = head; + lli->phy_this = phy; + + for (i = 1; i < len; i++) { + lli_prev = lli; + + lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy); + + if (lli == NULL) + goto err_clean_up; + + DEBUGFS_POOL_COUNTER_ADD(pool, 1); + lli->phy_this = phy; + + lli_prev->link_addr = phy; + lli_prev->virt_link_addr = lli; + } + + lli->link_addr = 0x00000000U; + + spin_unlock(&pool->lock); + + return head; + + err: + spin_unlock(&pool->lock); + return NULL; + + err_clean_up: + lli_prev->link_addr = 0x00000000U; + spin_unlock(&pool->lock); + coh901318_lli_free(pool, &head); + return NULL; +} + +void coh901318_lli_free(struct coh901318_pool *pool, + struct coh901318_lli **lli) +{ + struct coh901318_lli *l; + struct coh901318_lli *next; + + if (lli == NULL) + return; + + l = *lli; + + if (l == NULL) + return; + + spin_lock(&pool->lock); + + while (l->link_addr) { + next = l->virt_link_addr; + dma_pool_free(pool->dmapool, l, l->phy_this); + DEBUGFS_POOL_COUNTER_ADD(pool, -1); + l = next; + } + dma_pool_free(pool->dmapool, l, l->phy_this); + DEBUGFS_POOL_COUNTER_ADD(pool, -1); + + spin_unlock(&pool->lock); + *lli = NULL; +} + +int +coh901318_lli_fill_memcpy(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t source, unsigned int size, + dma_addr_t destination, u32 ctrl_chained, + u32 ctrl_eom) +{ + int s = size; + dma_addr_t src = source; + dma_addr_t dst = destination; + + lli->src_addr = src; + lli->dst_addr = dst; + + while (lli->link_addr) { + lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE; + lli->src_addr = src; + lli->dst_addr = dst; + + s -= MAX_DMA_PACKET_SIZE; + lli = coh901318_lli_next(lli); + + src += MAX_DMA_PACKET_SIZE; + dst += MAX_DMA_PACKET_SIZE; + } + + lli->control = ctrl_eom | s; + lli->src_addr = src; + lli->dst_addr = dst; + + /* One irq per single transfer */ + return 1; +} + +int +coh901318_lli_fill_single(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t buf, unsigned int size, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom, + enum dma_data_direction dir) +{ + int s = size; + dma_addr_t src; + dma_addr_t dst; + + + if (dir == DMA_TO_DEVICE) { + src = buf; + dst = dev_addr; + + } else if (dir == DMA_FROM_DEVICE) { + + src = dev_addr; + dst = buf; + } else { + return -EINVAL; + } + + while (lli->link_addr) { + size_t block_size = MAX_DMA_PACKET_SIZE; + lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE; + + /* If we are on the next-to-final block and there will + * be less than half a DMA packet left for the last + * block, then we want to make this block a little + * smaller to balance the sizes. This is meant to + * avoid too small transfers if the buffer size is + * (MAX_DMA_PACKET_SIZE*N + 1) */ + if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2)) + block_size = MAX_DMA_PACKET_SIZE/2; + + s -= block_size; + lli->src_addr = src; + lli->dst_addr = dst; + + lli = coh901318_lli_next(lli); + + if (dir == DMA_TO_DEVICE) + src += block_size; + else if (dir == DMA_FROM_DEVICE) + dst += block_size; + } + + lli->control = ctrl_eom | s; + lli->src_addr = src; + lli->dst_addr = dst; + + /* One irq per single transfer */ + return 1; +} + +int +coh901318_lli_fill_sg(struct coh901318_pool *pool, + struct coh901318_lli *lli, + struct scatterlist *sgl, unsigned int nents, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl, + u32 ctrl_last, + enum dma_data_direction dir, u32 ctrl_irq_mask) +{ + int i; + struct scatterlist *sg; + u32 ctrl_sg; + dma_addr_t src = 0; + dma_addr_t dst = 0; + int nbr_of_irq = 0; + u32 bytes_to_transfer; + u32 elem_size; + + if (lli == NULL) + goto err; + + spin_lock(&pool->lock); + + if (dir == DMA_TO_DEVICE) + dst = dev_addr; + else if (dir == DMA_FROM_DEVICE) + src = dev_addr; + else + goto err; + + for_each_sg(sgl, sg, nents, i) { + if (sg_is_chain(sg)) { + /* sg continues to the next sg-element don't + * send ctrl_finish until the last + * sg-element in the chain + */ + ctrl_sg = ctrl_chained; + } else if (i == nents - 1) + ctrl_sg = ctrl_last; + else + ctrl_sg = ctrl ? ctrl : ctrl_last; + + + if ((ctrl_sg & ctrl_irq_mask)) + nbr_of_irq++; + + if (dir == DMA_TO_DEVICE) + /* increment source address */ + src = sg_dma_address(sg); + else + /* increment destination address */ + dst = sg_dma_address(sg); + + bytes_to_transfer = sg_dma_len(sg); + + while (bytes_to_transfer) { + u32 val; + + if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) { + elem_size = MAX_DMA_PACKET_SIZE; + val = ctrl_chained; + } else { + elem_size = bytes_to_transfer; + val = ctrl_sg; + } + + lli->control = val | elem_size; + lli->src_addr = src; + lli->dst_addr = dst; + + if (dir == DMA_FROM_DEVICE) + dst += elem_size; + else + src += elem_size; + + BUG_ON(lli->link_addr & 3); + + bytes_to_transfer -= elem_size; + lli = coh901318_lli_next(lli); + } + + } + spin_unlock(&pool->lock); + + /* There can be many IRQs per sg transfer */ + return nbr_of_irq; + err: + spin_unlock(&pool->lock); + return -EINVAL; +} diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h new file mode 100644 index 000000000000..7bf713b79c6b --- /dev/null +++ b/drivers/dma/coh901318_lli.h @@ -0,0 +1,124 @@ +/* + * driver/dma/coh901318_lli.h + * + * Copyright (C) 2007-2009 ST-Ericsson + * License terms: GNU General Public License (GPL) version 2 + * Support functions for handling lli for coh901318 + * Author: Per Friden + */ + +#ifndef COH901318_LLI_H +#define COH901318_LLI_H + +#include + +struct device; + +struct coh901318_pool { + spinlock_t lock; + struct dma_pool *dmapool; + struct device *dev; + +#ifdef CONFIG_DEBUG_FS + int debugfs_pool_counter; +#endif +}; + +struct device; +/** + * coh901318_pool_create() - Creates an dma pool for lli:s + * @pool: pool handle + * @dev: dma device + * @lli_nbr: number of lli:s in the pool + * @algin: adress alignemtn of lli:s + * returns 0 on success otherwise none zero + */ +int coh901318_pool_create(struct coh901318_pool *pool, + struct device *dev, + size_t lli_nbr, size_t align); + +/** + * coh901318_pool_destroy() - Destroys the dma pool + * @pool: pool handle + * returns 0 on success otherwise none zero + */ +int coh901318_pool_destroy(struct coh901318_pool *pool); + +/** + * coh901318_lli_alloc() - Allocates a linked list + * + * @pool: pool handle + * @len: length to list + * return: none NULL if success otherwise NULL + */ +struct coh901318_lli * +coh901318_lli_alloc(struct coh901318_pool *pool, + unsigned int len); + +/** + * coh901318_lli_free() - Returns the linked list items to the pool + * @pool: pool handle + * @lli: reference to lli pointer to be freed + */ +void coh901318_lli_free(struct coh901318_pool *pool, + struct coh901318_lli **lli); + +/** + * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy + * @pool: pool handle + * @lli: allocated lli + * @src: src address + * @size: transfer size + * @dst: destination address + * @ctrl_chained: ctrl for chained lli + * @ctrl_last: ctrl for the last lli + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_memcpy(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t src, unsigned int size, + dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last); + +/** + * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer + * @pool: pool handle + * @lli: allocated lli + * @buf: transfer buffer + * @size: transfer size + * @dev_addr: address of periphal + * @ctrl_chained: ctrl for chained lli + * @ctrl_last: ctrl for the last lli + * @dir: direction of transfer (to or from device) + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_single(struct coh901318_pool *pool, + struct coh901318_lli *lli, + dma_addr_t buf, unsigned int size, + dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last, + enum dma_data_direction dir); + +/** + * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer + * @pool: pool handle + * @lli: allocated lli + * @sg: scatter gather list + * @nents: number of entries in sg + * @dev_addr: address of periphal + * @ctrl_chained: ctrl for chained lli + * @ctrl: ctrl of middle lli + * @ctrl_last: ctrl for the last lli + * @dir: direction of transfer (to or from device) + * @ctrl_irq_mask: ctrl mask for CPU interrupt + * returns number of CPU interrupts for the lli, negative on error. + */ +int +coh901318_lli_fill_sg(struct coh901318_pool *pool, + struct coh901318_lli *lli, + struct scatterlist *sg, unsigned int nents, + dma_addr_t dev_addr, u32 ctrl_chained, + u32 ctrl, u32 ctrl_last, + enum dma_data_direction dir, u32 ctrl_irq_mask); + +#endif /* COH901318_LLI_H */ -- cgit v1.2.3 From ddb4f0f0e05871c7ac540cc778993c06ff53b765 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 4 Dec 2009 19:44:41 +0100 Subject: sh: DMA driver has to specify its alignment requirements The SH DMA driver by default uses 32-byte transfers, in this mode buffers and sizes have to be 32-byte aligned. Specifying this requirement also fixes Oopses with dmatest. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- drivers/dma/shdma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/dma') diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index b3b065c4e5c1..f5fae1258b54 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -677,6 +677,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) shdev->common.device_is_tx_complete = sh_dmae_is_complete; shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; shdev->common.dev = &pdev->dev; + /* Default transfer size of 32 bytes requires 32-byte alignment */ + shdev->common.copy_align = 5; #if defined(CONFIG_CPU_SH4) /* Non Mix IRQ mode SH7722/SH7730 etc... */ -- cgit v1.2.3 From cfe4f2751ef1a5390b56c5d263f90b6ff138ba31 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 4 Dec 2009 19:44:48 +0100 Subject: dmaengine: fix dmatest to verify minimum transfer length and test buffer size Transfers and the test buffer have to be at least align bytes long. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index a32a4cf7b1e0..8b905161fbf4 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -298,10 +298,6 @@ static int dmatest_func(void *data) total_tests++; - len = dmatest_random() % test_buf_size + 1; - src_off = dmatest_random() % (test_buf_size - len + 1); - dst_off = dmatest_random() % (test_buf_size - len + 1); - /* honor alignment restrictions */ if (thread->type == DMA_MEMCPY) align = dev->copy_align; @@ -310,7 +306,19 @@ static int dmatest_func(void *data) else if (thread->type == DMA_PQ) align = dev->pq_align; + if (1 << align > test_buf_size) { + pr_err("%u-byte buffer too small for %d-byte alignment\n", + test_buf_size, 1 << align); + break; + } + + len = dmatest_random() % test_buf_size + 1; len = (len >> align) << align; + if (!len) + len = 1 << align; + src_off = dmatest_random() % (test_buf_size - len + 1); + dst_off = dmatest_random() % (test_buf_size - len + 1); + src_off = (src_off >> align) << align; dst_off = (dst_off >> align) << align; -- cgit v1.2.3 From 86d61b33e48f1da5a6b310d3de93187db62ab72a Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 10 Dec 2009 18:35:07 +0100 Subject: sh: stylistic improvements for the DMA driver Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- drivers/dma/shdma.c | 34 +++++++++++++++++----------------- drivers/dma/shdma.h | 14 +++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index f5fae1258b54..8d9acd751ed6 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -80,17 +80,17 @@ static int sh_dmae_rst(int id) unsigned short dmaor; sh_dmae_ctl_stop(id); - dmaor = (dmaor_read_reg(id)|DMAOR_INIT); + dmaor = dmaor_read_reg(id) | DMAOR_INIT; dmaor_write_reg(id, dmaor); - if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) { + if (dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF)) { pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n"); return -EINVAL; } return 0; } -static int dmae_is_idle(struct sh_dmae_chan *sh_chan) +static int dmae_is_busy(struct sh_dmae_chan *sh_chan) { u32 chcr = sh_dmae_readl(sh_chan, CHCR); if (chcr & CHCR_DE) { @@ -110,15 +110,14 @@ static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) { sh_dmae_writel(sh_chan, hw.sar, SAR); sh_dmae_writel(sh_chan, hw.dar, DAR); - sh_dmae_writel(sh_chan, - (hw.tcr >> calc_xmit_shift(sh_chan)), TCR); + sh_dmae_writel(sh_chan, hw.tcr >> calc_xmit_shift(sh_chan), TCR); } static void dmae_start(struct sh_dmae_chan *sh_chan) { u32 chcr = sh_dmae_readl(sh_chan, CHCR); - chcr |= (CHCR_DE|CHCR_IE); + chcr |= CHCR_DE | CHCR_IE; sh_dmae_writel(sh_chan, chcr, CHCR); } @@ -132,7 +131,7 @@ static void dmae_halt(struct sh_dmae_chan *sh_chan) static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) { - int ret = dmae_is_idle(sh_chan); + int ret = dmae_is_busy(sh_chan); /* When DMA was working, can not set data to CHCR */ if (ret) return ret; @@ -149,7 +148,7 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) { u32 addr; int shift = 0; - int ret = dmae_is_idle(sh_chan); + int ret = dmae_is_busy(sh_chan); if (ret) return ret; @@ -307,7 +306,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( new = sh_dmae_get_desc(sh_chan); if (!new) { dev_err(sh_chan->dev, - "No free memory for link descriptor\n"); + "No free memory for link descriptor\n"); goto err_get_desc; } @@ -388,7 +387,7 @@ static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) struct sh_dmae_regs hw; /* DMA work check */ - if (dmae_is_idle(sh_chan)) + if (dmae_is_busy(sh_chan)) return; /* Find the first un-transfer desciptor */ @@ -497,8 +496,9 @@ static void dmae_do_tasklet(unsigned long data) struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; struct sh_desc *desc, *_desc, *cur_desc = NULL; u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + list_for_each_entry_safe(desc, _desc, - &sh_chan->ld_queue, node) { + &sh_chan->ld_queue, node) { if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { cur_desc = desc; break; @@ -543,8 +543,8 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) /* alloc channel */ new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); if (!new_sh_chan) { - dev_err(shdev->common.dev, "No free memory for allocating " - "dma channels!\n"); + dev_err(shdev->common.dev, + "No free memory for allocating dma channels!\n"); return -ENOMEM; } @@ -586,8 +586,8 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) "sh-dmae%d", new_sh_chan->id); /* set up channel irq */ - err = request_irq(irq, &sh_dmae_interrupt, - irqflags, new_sh_chan->dev_id, new_sh_chan); + err = request_irq(irq, &sh_dmae_interrupt, irqflags, + new_sh_chan->dev_id, new_sh_chan); if (err) { dev_err(shdev->common.dev, "DMA channel %d request_irq error " "with return %d\n", id, err); @@ -691,8 +691,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) } for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) { - err = request_irq(eirq[ecnt], sh_dmae_err, - irqflags, "DMAC Address Error", shdev); + err = request_irq(eirq[ecnt], sh_dmae_err, irqflags, + "DMAC Address Error", shdev); if (err) { dev_err(&pdev->dev, "DMA device request_irq" "error (irq %d) with return %d\n", diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index 2b4bc15a2c0a..60b81e529b42 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -35,15 +35,15 @@ struct sh_desc { struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ - spinlock_t desc_lock; /* Descriptor operation lock */ - struct list_head ld_queue; /* Link descriptors queue */ - struct list_head ld_free; /* Link descriptors free */ - struct dma_chan common; /* DMA common channel */ - struct device *dev; /* Channel device */ + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct list_head ld_free; /* Link descriptors free */ + struct dma_chan common; /* DMA common channel */ + struct device *dev; /* Channel device */ struct tasklet_struct tasklet; /* Tasklet */ - int descs_allocated; /* desc count */ + int descs_allocated; /* desc count */ int id; /* Raw id of this channel */ - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[16]; /* unique name per DMAC of channel */ /* Set chcr */ int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs); -- cgit v1.2.3 From 2e032b62c4c8560d6416ad3cc925cfc2a5eafb07 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 11 Dec 2009 21:24:33 -0700 Subject: iop-adma.c: use resource_size() The size of the requested and ioremaped memory is off by 1. Use resource_size() to get the correct value. Signed-off-by: H Hartley Sweeten Signed-off-by: Dan Williams --- drivers/dma/iop-adma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 645ca8d54ec4..ca6e6a0cb793 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1470,7 +1470,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) return -ENODEV; if (!devm_request_mem_region(&pdev->dev, res->start, - res->end - res->start, pdev->name)) + resource_size(res), pdev->name)) return -EBUSY; adev = kzalloc(sizeof(*adev), GFP_KERNEL); @@ -1542,7 +1542,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) iop_chan->device = adev; iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, - res->end - res->start); + resource_size(res)); if (!iop_chan->mmr_base) { ret = -ENOMEM; goto err_free_iop_chan; -- cgit v1.2.3 From 12458ea06efd7b44281e68fe59c950ec7d59c649 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Fri, 11 Dec 2009 21:24:44 -0700 Subject: ppc440spe-adma: adds updated ppc440spe adma driver This patch adds new version of the PPC440SPe ADMA driver. Signed-off-by: Yuri Tikhonov Signed-off-by: Anatolij Gustschin Signed-off-by: Dan Williams --- drivers/dma/Kconfig | 11 + drivers/dma/Makefile | 1 + drivers/dma/ppc4xx/Makefile | 1 + drivers/dma/ppc4xx/adma.c | 5027 +++++++++++++++++++++++++++++++++++++++++++ drivers/dma/ppc4xx/adma.h | 195 ++ drivers/dma/ppc4xx/dma.h | 223 ++ drivers/dma/ppc4xx/xor.h | 110 + 7 files changed, 5568 insertions(+) create mode 100644 drivers/dma/ppc4xx/Makefile create mode 100644 drivers/dma/ppc4xx/adma.c create mode 100644 drivers/dma/ppc4xx/adma.h create mode 100644 drivers/dma/ppc4xx/dma.h create mode 100644 drivers/dma/ppc4xx/xor.h (limited to 'drivers/dma') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 24cdd20fe462..fe93d70f2e37 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -116,6 +116,17 @@ config COH901318 help Enable support for ST-Ericsson COH 901 318 DMA. +config AMCC_PPC440SPE_ADMA + tristate "AMCC PPC440SPe ADMA support" + depends on 440SPe || 440SP + select DMA_ENGINE + select ARCH_HAS_ASYNC_TX_FIND_CHANNEL + help + Enable support for the AMCC PPC440SPe RAID engines. + +config ARCH_HAS_ASYNC_TX_FIND_CHANNEL + bool + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 4db768e09cf3..807053d48232 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o obj-$(CONFIG_SH_DMAE) += shdma.o obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile new file mode 100644 index 000000000000..b3d259b3e52a --- /dev/null +++ b/drivers/dma/ppc4xx/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c new file mode 100644 index 000000000000..0a3478e910f0 --- /dev/null +++ b/drivers/dma/ppc4xx/adma.c @@ -0,0 +1,5027 @@ +/* + * Copyright (C) 2006-2009 DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * Further porting to arch/powerpc by + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the AMCC PPC440SPe Processors. + * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + * ADMA driver written by D.Williams. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "adma.h" + +enum ppc_adma_init_code { + PPC_ADMA_INIT_OK = 0, + PPC_ADMA_INIT_MEMRES, + PPC_ADMA_INIT_MEMREG, + PPC_ADMA_INIT_ALLOC, + PPC_ADMA_INIT_COHERENT, + PPC_ADMA_INIT_CHANNEL, + PPC_ADMA_INIT_IRQ1, + PPC_ADMA_INIT_IRQ2, + PPC_ADMA_INIT_REGISTER +}; + +static char *ppc_adma_errors[] = { + [PPC_ADMA_INIT_OK] = "ok", + [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource", + [PPC_ADMA_INIT_MEMREG] = "failed to request memory region", + [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev " + "structure", + [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for " + "hardware descriptors", + [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel", + [PPC_ADMA_INIT_IRQ1] = "failed to request first irq", + [PPC_ADMA_INIT_IRQ2] = "failed to request second irq", + [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device", +}; + +static enum ppc_adma_init_code +ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM]; + +struct ppc_dma_chan_ref { + struct dma_chan *chan; + struct list_head node; +}; + +/* The list of channels exported by ppc440spe ADMA */ +struct list_head +ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list); + +/* This flag is set when want to refetch the xor chain in the interrupt + * handler + */ +static u32 do_xor_refetch; + +/* Pointer to DMA0, DMA1 CP/CS FIFO */ +static void *ppc440spe_dma_fifo_buf; + +/* Pointers to last submitted to DMA0, DMA1 CDBs */ +static struct ppc440spe_adma_desc_slot *chan_last_sub[3]; +static struct ppc440spe_adma_desc_slot *chan_first_cdb[3]; + +/* Pointer to last linked and submitted xor CB */ +static struct ppc440spe_adma_desc_slot *xor_last_linked; +static struct ppc440spe_adma_desc_slot *xor_last_submit; + +/* This array is used in data-check operations for storing a pattern */ +static char ppc440spe_qword[16]; + +static atomic_t ppc440spe_adma_err_irq_ref; +static dcr_host_t ppc440spe_mq_dcr_host; +static unsigned int ppc440spe_mq_dcr_len; + +/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up + * the block size in transactions, then we do not allow to activate more than + * only one RXOR transactions simultaneously. So use this var to store + * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is + * set) or not (PPC440SPE_RXOR_RUN is clear). + */ +static unsigned long ppc440spe_rxor_state; + +/* These are used in enable & check routines + */ +static u32 ppc440spe_r6_enabled; +static struct ppc440spe_adma_chan *ppc440spe_r6_tchan; +static struct completion ppc440spe_r6_test_comp; + +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr); +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr); +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult); + +#ifdef ADMA_LL_DEBUG +#define ADMA_LL_DBG(x) ({ if (1) x; 0; }) +#else +#define ADMA_LL_DBG(x) ({ if (0) x; 0; }) +#endif + +static void print_cb(struct ppc440spe_adma_chan *chan, void *block) +{ + struct dma_cdb *cdb; + struct xor_cb *cb; + int i; + + switch (chan->device->id) { + case 0: + case 1: + cdb = block; + + pr_debug("CDB at %p [%d]:\n" + "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n" + "\t sg1u 0x%08x sg1l 0x%08x\n" + "\t sg2u 0x%08x sg2l 0x%08x\n" + "\t sg3u 0x%08x sg3l 0x%08x\n", + cdb, chan->device->id, + cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt), + le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l), + le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l), + le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l) + ); + break; + case 2: + cb = block; + + pr_debug("CB at %p [%d]:\n" + "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n" + "\t cbtah 0x%08x cbtal 0x%08x\n" + "\t cblah 0x%08x cblal 0x%08x\n", + cb, chan->device->id, + cb->cbc, cb->cbbc, cb->cbs, + cb->cbtah, cb->cbtal, + cb->cblah, cb->cblal); + for (i = 0; i < 16; i++) { + if (i && !cb->ops[i].h && !cb->ops[i].l) + continue; + pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n", + i, cb->ops[i].h, cb->ops[i].l); + } + break; + } +} + +static void print_cb_list(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *iter) +{ + for (; iter; iter = iter->hw_next) + print_cb(chan, iter->hw_desc); +} + +static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst:\n\t0x%016llx\n", dst); +} + +static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt) +{ + int i; + + pr_debug("\n%s(%d):\nsrc: ", __func__, id); + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx ", src[i]); + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", dst[i]); +} + +static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src, + unsigned int src_cnt, + const unsigned char *scf) +{ + int i; + + pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id); + if (scf) { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]); + } else { + for (i = 0; i < src_cnt; i++) + pr_debug("\t0x%016llx(no) ", src[i]); + } + + pr_debug("dst: "); + for (i = 0; i < 2; i++) + pr_debug("\t0x%016llx ", src[src_cnt + i]); +} + +/****************************************************************************** + * Command (Descriptor) Blocks low-level routines + ******************************************************************************/ +/** + * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT + * pseudo operation + */ +static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct xor_cb *p; + + switch (chan->device->id) { + case PPC440SPE_XOR_ID: + p = desc->hw_desc; + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + /* NOP with Command Block Complete Enable */ + p->cbc = XOR_CBCR_CBCE_BIT; + break; + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + /* NOP with interrupt */ + set_bit(PPC440SPE_DESC_INT, &desc->flags); + break; + default: + printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id, + __func__); + break; + } +} + +/** + * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR + * pseudo operation + */ +static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc) +{ + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = 0; + desc->dst_cnt = 1; +} + +/** + * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation + */ +static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc, + int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = 1; + + hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +/** + * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ + * operation in DMA2 controller + */ +static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct xor_cb)); + desc->hw_next = NULL; + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags)); + desc->descs_per_op = 0; + + hw_desc->cbc = XOR_CBCR_TGT_BIT; + if (flags & DMA_PREP_INTERRUPT) + /* Enable interrupt on completion */ + hw_desc->cbc |= XOR_CBCR_CBCE_BIT; +} + +#define DMA_CTRL_FLAGS_LAST DMA_PREP_FENCE +#define DMA_PREP_ZERO_P (DMA_CTRL_FLAGS_LAST << 1) +#define DMA_PREP_ZERO_Q (DMA_PREP_ZERO_P << 1) + +/** + * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation + * with DMA0/1 + */ +static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt, unsigned long flags, + unsigned long op) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + u8 dopc; + + /* Common initialization of a PQ descriptors chain */ + set_bits(op, &desc->flags); + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; + + /* WXOR MULTICAST if both P and Q are being computed + * MV_SG1_SG2 if Q only + */ + dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ? + DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2; + + list_for_each_entry(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + + /* Set OPS depending on WXOR/RXOR type of operation */ + if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) { + /* This is a WXOR only chain: + * - first descriptors are for zeroing destinations + * if PPC440SPE_ZERO_P/Q set; + * - descriptors remained are for GF-XOR operations. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + + if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) { + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } else { + /* This is either RXOR-only or mixed RXOR/WXOR */ + + /* The first 1 or 2 slots in chain are always RXOR, + * if need to calculate P & Q, then there are two + * RXOR slots; if only P or only Q, then there is one + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + + if (desc->dst_cnt == DMA_DEST_MAX_NUM) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + } + + /* The remaining descs (if any) are WXORs */ + if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) { + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + list_for_each_entry_from(iter, &desc->group_list, + chain_node) { + hw_desc = iter->hw_desc; + hw_desc->opc = dopc; + } + } + } +} + +/** + * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor + * for PQ_ZERO_SUM operation + */ +static void ppc440spe_desc_init_dma01pqzero_sum( + struct ppc440spe_adma_desc_slot *desc, + int dst_cnt, int src_cnt) +{ + struct dma_cdb *hw_desc; + struct ppc440spe_adma_desc_slot *iter; + int i = 0; + u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST : + DMA_CDB_OPC_MV_SG1_SG2; + /* + * Initialize starting from 2nd or 3rd descriptor dependent + * on dst_cnt. First one or two slots are for cloning P + * and/or Q to chan->pdest and/or chan->qdest as we have + * to preserve original P/Q. + */ + iter = list_first_entry(&desc->group_list, + struct ppc440spe_adma_desc_slot, chain_node); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + + if (dst_cnt > 1) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + } + /* initialize each source descriptor in chain */ + list_for_each_entry_from(iter, &desc->group_list, chain_node) { + hw_desc = iter->hw_desc; + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->src_cnt = 0; + iter->dst_cnt = 0; + + /* This is a ZERO_SUM operation: + * - descriptors starting from 2nd or 3rd + * descriptor are for GF-XOR operations; + * - remaining descriptors are for checking the result + */ + if (i++ < src_cnt) + /* MV_SG1_SG2 if only Q is being verified + * MULTICAST if both P and Q are being verified + */ + hw_desc->opc = dopc; + else + /* DMA_CDB_OPC_DCHECK128 operation */ + hw_desc->opc = DMA_CDB_OPC_DCHECK128; + + if (likely(!list_is_last(&iter->chain_node, + &desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } else { + /* this is the last descriptor. + * this slot will be pasted from ADMA level + * each time it wants to configure parameters + * of the transaction (src, dst, ...) + */ + iter->hw_next = NULL; + /* always enable interrupt generation since we get + * the status of pqzero from the handler + */ + set_bit(PPC440SPE_DESC_INT, &iter->flags); + } + } + desc->src_cnt = src_cnt; + desc->dst_cnt = dst_cnt; +} + +/** + * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation + */ +static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc, + unsigned long flags) +{ + struct dma_cdb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + desc->hw_next = NULL; + desc->src_cnt = 1; + desc->dst_cnt = 1; + + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &desc->flags); + else + clear_bit(PPC440SPE_DESC_INT, &desc->flags); + + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; +} + +/** + * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation + */ +static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc, + int value, unsigned long flags) +{ + struct dma_cdb *hw_desc = desc->hw_desc; + + memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); + desc->hw_next = NULL; + desc->src_cnt = 1; + desc->dst_cnt = 1; + + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &desc->flags); + else + clear_bit(PPC440SPE_DESC_INT, &desc->flags); + + hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value); + hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value); + hw_desc->opc = DMA_CDB_OPC_DFILL128; +} + +/** + * ppc440spe_desc_set_src_addr - set source address into the descriptor + */ +static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + int src_idx, dma_addr_t addrh, + dma_addr_t addrl) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmplow, tmphi; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow); + dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->ops[src_idx].l = addrl; + xor_hw_desc->ops[src_idx].h |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_src_mult - set source address mult into the descriptor + */ +static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u32 mult_index, + int sg_index, unsigned char mult_value) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + u32 *psgu; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + switch (sg_index) { + /* for RXOR operations set multiplier + * into source cued address + */ + case DMA_CDB_SG_SRC: + psgu = &dma_hw_desc->sg1u; + break; + /* for WXOR operations set multiplier + * into destination cued address(es) + */ + case DMA_CDB_SG_DST1: + psgu = &dma_hw_desc->sg2u; + break; + case DMA_CDB_SG_DST2: + psgu = &dma_hw_desc->sg3u; + break; + default: + BUG(); + } + + *psgu |= cpu_to_le32(mult_value << mult_index); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + break; + default: + BUG(); + } +} + +/** + * ppc440spe_desc_set_dest_addr - set destination address into the descriptor + */ +static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_addr_t addrh, dma_addr_t addrl, + u32 dst_idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + phys_addr_t addr64, tmphi, tmplow; + u32 *psgu, *psgl; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (!addrh) { + addr64 = addrl; + tmphi = (addr64 >> 32); + tmplow = (addr64 & 0xFFFFFFFF); + } else { + tmphi = addrh; + tmplow = addrl; + } + dma_hw_desc = desc->hw_desc; + + psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u; + psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l; + + *psgl = cpu_to_le32((u32)tmplow); + *psgu |= cpu_to_le32((u32)tmphi); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbtal = addrl; + xor_hw_desc->cbtah |= addrh; + break; + } +} + +/** + * ppc440spe_desc_set_byte_count - set number of data bytes involved + * into the operation + */ +static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + u32 byte_count) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + dma_hw_desc->cnt = cpu_to_le32(byte_count); + break; + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + xor_hw_desc->cbbc = byte_count; + break; + } +} + +/** + * ppc440spe_desc_set_rxor_block_size - set RXOR block size + */ +static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count) +{ + /* assume that byte_count is aligned on the 512-boundary; + * thus write it directly to the register (bits 23:31 are + * reserved there). + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count); +} + +/** + * ppc440spe_desc_set_dcheck - set CHECK pattern + */ +static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, u8 *qword) +{ + struct dma_cdb *dma_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + iowrite32(qword[0], &dma_hw_desc->sg3l); + iowrite32(qword[4], &dma_hw_desc->sg3u); + iowrite32(qword[8], &dma_hw_desc->sg2l); + iowrite32(qword[12], &dma_hw_desc->sg2u); + break; + default: + BUG(); + } +} + +/** + * ppc440spe_xor_set_link - set link address in xor CB + */ +static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_cb *xor_hw_desc = prev_desc->hw_desc; + + if (unlikely(!next_desc || !(next_desc->phys))) { + printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n", + __func__, next_desc, + next_desc ? next_desc->phys : 0); + BUG(); + } + + xor_hw_desc->cbs = 0; + xor_hw_desc->cblal = next_desc->phys; + xor_hw_desc->cblah = 0; + xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT; +} + +/** + * ppc440spe_desc_set_link - set the address of descriptor following this + * descriptor in chain + */ +static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *prev_desc, + struct ppc440spe_adma_desc_slot *next_desc) +{ + unsigned long flags; + struct ppc440spe_adma_desc_slot *tail = next_desc; + + if (unlikely(!prev_desc || !next_desc || + (prev_desc->hw_next && prev_desc->hw_next != next_desc))) { + /* If previous next is overwritten something is wrong. + * though we may refetch from append to initiate list + * processing; in this case - it's ok. + */ + printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; " + "prev->hw_next=0x%p\n", __func__, prev_desc, + next_desc, prev_desc ? prev_desc->hw_next : 0); + BUG(); + } + + local_irq_save(flags); + + /* do s/w chaining both for DMA and XOR descriptors */ + prev_desc->hw_next = next_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + break; + case PPC440SPE_XOR_ID: + /* bind descriptor to the chain */ + while (tail->hw_next) + tail = tail->hw_next; + xor_last_linked = tail; + + if (prev_desc == xor_last_submit) + /* do not link to the last submitted CB */ + break; + ppc440spe_xor_set_link(prev_desc, next_desc); + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_desc_get_src_addr - extract the source address from the descriptor + */ +static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, int src_idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + /* May have 0, 1, 2, or 3 sources */ + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DFILL128: + return 0; + case DMA_CDB_OPC_DCHECK128: + if (unlikely(src_idx)) { + printk(KERN_ERR "%s: try to get %d source for" + " DCHECK128\n", __func__, src_idx); + BUG(); + } + return le32_to_cpu(dma_hw_desc->sg1l); + case DMA_CDB_OPC_MULTICAST: + case DMA_CDB_OPC_MV_SG1_SG2: + if (unlikely(src_idx > 2)) { + printk(KERN_ERR "%s: try to get %d source from" + " DMA descr\n", __func__, src_idx); + BUG(); + } + if (src_idx) { + if (le32_to_cpu(dma_hw_desc->sg1u) & + DMA_CUED_XOR_WIN_MSK) { + u8 region; + + if (src_idx == 1) + return le32_to_cpu( + dma_hw_desc->sg1l) + + desc->unmap_len; + + region = (le32_to_cpu( + dma_hw_desc->sg1u)) >> + DMA_CUED_REGION_OFF; + + region &= DMA_CUED_REGION_MSK; + switch (region) { + case DMA_RXOR123: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len << 1); + case DMA_RXOR124: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len * 3); + case DMA_RXOR125: + return le32_to_cpu( + dma_hw_desc->sg1l) + + (desc->unmap_len << 2); + default: + printk(KERN_ERR + "%s: try to" + " get src3 for region %02x" + "PPC440SPE_DESC_RXOR12?\n", + __func__, region); + BUG(); + } + } else { + printk(KERN_ERR + "%s: try to get %d" + " source for non-cued descr\n", + __func__, src_idx); + BUG(); + } + } + return le32_to_cpu(dma_hw_desc->sg1l); + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + return le32_to_cpu(dma_hw_desc->sg1l); + case PPC440SPE_XOR_ID: + /* May have up to 16 sources */ + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->ops[src_idx].l; + } + return 0; +} + +/** + * ppc440spe_desc_get_dest_addr - extract the destination address from the + * descriptor + */ +static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, int idx) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + if (likely(!idx)) + return le32_to_cpu(dma_hw_desc->sg2l); + return le32_to_cpu(dma_hw_desc->sg3l); + case PPC440SPE_XOR_ID: + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->cbtal; + } + return 0; +} + +/** + * ppc440spe_desc_get_src_num - extract the number of source addresses from + * the descriptor + */ +static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct dma_cdb *dma_hw_desc; + struct xor_cb *xor_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_hw_desc = desc->hw_desc; + + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DFILL128: + return 0; + case DMA_CDB_OPC_DCHECK128: + return 1; + case DMA_CDB_OPC_MV_SG1_SG2: + case DMA_CDB_OPC_MULTICAST: + /* + * Only for RXOR operations we have more than + * one source + */ + if (le32_to_cpu(dma_hw_desc->sg1u) & + DMA_CUED_XOR_WIN_MSK) { + /* RXOR op, there are 2 or 3 sources */ + if (((le32_to_cpu(dma_hw_desc->sg1u) >> + DMA_CUED_REGION_OFF) & + DMA_CUED_REGION_MSK) == DMA_RXOR12) { + /* RXOR 1-2 */ + return 2; + } else { + /* RXOR 1-2-3/1-2-4/1-2-5 */ + return 3; + } + } + return 1; + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + case PPC440SPE_XOR_ID: + /* up to 16 sources */ + xor_hw_desc = desc->hw_desc; + return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK; + default: + BUG(); + } + return 0; +} + +/** + * ppc440spe_desc_get_dst_num - get the number of destination addresses in + * this descriptor + */ +static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + struct dma_cdb *dma_hw_desc; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* May be 1 or 2 destinations */ + dma_hw_desc = desc->hw_desc; + switch (dma_hw_desc->opc) { + case DMA_CDB_OPC_NO_OP: + case DMA_CDB_OPC_DCHECK128: + return 0; + case DMA_CDB_OPC_MV_SG1_SG2: + case DMA_CDB_OPC_DFILL128: + return 1; + case DMA_CDB_OPC_MULTICAST: + if (desc->dst_cnt == 2) + return 2; + else + return 1; + default: + printk(KERN_ERR "%s: unknown OPC 0x%02x\n", + __func__, dma_hw_desc->opc); + BUG(); + } + case PPC440SPE_XOR_ID: + /* Always only 1 destination */ + return 1; + default: + BUG(); + } + return 0; +} + +/** + * ppc440spe_desc_get_link - get the address of the descriptor that + * follows this one + */ +static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + if (!desc->hw_next) + return 0; + + return desc->hw_next->phys; +} + +/** + * ppc440spe_desc_is_aligned - check alignment + */ +static inline int ppc440spe_desc_is_aligned( + struct ppc440spe_adma_desc_slot *desc, int num_slots) +{ + return (desc->idx & (num_slots - 1)) ? 0 : 1; +} + +/** + * ppc440spe_chan_xor_slot_count - get the number of slots necessary for + * XOR operation + */ +static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt, + int *slots_per_op) +{ + int slot_cnt; + + /* each XOR descriptor provides up to 16 source operands */ + slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS; + + if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)) + return slot_cnt; + + printk(KERN_ERR "%s: len %d > max %d !!\n", + __func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); + BUG(); + return slot_cnt; +} + +/** + * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for + * DMA2 PQ operation + */ +static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs, + int src_cnt, size_t len) +{ + signed long long order = 0; + int state = 0; + int addr_count = 0; + int i; + for (i = 1; i < src_cnt; i++) { + dma_addr_t cur_addr = srcs[i]; + dma_addr_t old_addr = srcs[i-1]; + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + if (i == src_cnt-1) + addr_count++; + } else { + state = 3; + } + break; + case 1: + if (i == src_cnt-2 || (order == -1 + && cur_addr != old_addr - len)) { + order = 0; + state = 0; + addr_count++; + } else if (cur_addr == old_addr + len*order) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 2*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else if (cur_addr == old_addr + 3*len) { + state = 2; + if (i == src_cnt-1) + addr_count++; + } else { + order = 0; + state = 0; + addr_count++; + } + break; + case 2: + order = 0; + state = 0; + addr_count++; + break; + } + if (state == 3) + break; + } + if (src_cnt <= 1 || (state != 1 && state != 2)) { + pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n", + __func__, src_cnt, state, addr_count, order); + for (i = 0; i < src_cnt; i++) + pr_err("\t[%d] 0x%llx \n", i, srcs[i]); + BUG(); + } + + return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS; +} + + +/****************************************************************************** + * ADMA channel low-level routines + ******************************************************************************/ + +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan); +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan); + +/** + * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine + */ +static void ppc440spe_adma_device_clear_eot_status( + struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + u8 *p = chan->device->dma_desc_pool_virt; + struct dma_cdb *cdb; + u32 rv, i; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* read FIFO to ack */ + dma_reg = chan->device->dma_reg; + while ((rv = ioread32(&dma_reg->csfpl))) { + i = rv & DMA_CDB_ADDR_MSK; + cdb = (struct dma_cdb *)&p[i - + (u32)chan->device->dma_desc_pool]; + + /* Clear opcode to ack. This is necessary for + * ZeroSum operations only + */ + cdb->opc = 0; + + if (test_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state)) { + /* probably this is a completed RXOR op, + * get pointer to CDB using the fact that + * physical and virtual addresses of CDB + * in pools have the same offsets + */ + if (le32_to_cpu(cdb->sg1u) & + DMA_CUED_XOR_BASE) { + /* this is a RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } + } + + if (rv & DMA_CDB_STATUS_MSK) { + /* ZeroSum check failed + */ + struct ppc440spe_adma_desc_slot *iter; + dma_addr_t phys = rv & ~DMA_CDB_MSK; + + /* + * Update the status of corresponding + * descriptor. + */ + list_for_each_entry(iter, &chan->chain, + chain_node) { + if (iter->phys == phys) + break; + } + /* + * if cannot find the corresponding + * slot it's a bug + */ + BUG_ON(&iter->chain_node == &chan->chain); + + if (iter->xor_check_result) { + if (test_bit(PPC440SPE_DESC_PCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_P_RESULT; + } else + if (test_bit(PPC440SPE_DESC_QCHECK, + &iter->flags)) { + *iter->xor_check_result |= + SUM_CHECK_Q_RESULT; + } else + BUG(); + } + } + } + + rv = ioread32(&dma_reg->dsts); + if (rv) { + pr_err("DMA%d err status: 0x%x\n", + chan->device->id, rv); + /* write back to clear */ + iowrite32(rv, &dma_reg->dsts); + } + break; + case PPC440SPE_XOR_ID: + /* reset status bits to ack */ + xor_reg = chan->device->xor_reg; + rv = ioread32be(&xor_reg->sr); + iowrite32be(rv, &xor_reg->sr); + + if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) { + if (rv & XOR_IE_RPTIE_BIT) { + /* Read PLB Timeout Error. + * Try to resubmit the CB + */ + u32 val = ioread32be(&xor_reg->ccbalr); + + iowrite32be(val, &xor_reg->cblalr); + + val = ioread32be(&xor_reg->crsr); + iowrite32be(val | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + } else + pr_err("XOR ERR 0x%x status\n", rv); + break; + } + + /* if the XORcore is idle, but there are unprocessed CBs + * then refetch the s/w chain here + */ + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) && + do_xor_refetch) + ppc440spe_chan_append(chan); + break; + } +} + +/** + * ppc440spe_chan_is_busy - get the channel status + */ +static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + int busy = 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + /* if command FIFO's head and tail pointers are equal and + * status tail is the same as command, then channel is free + */ + if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) || + ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp)) + busy = 1; + break; + case PPC440SPE_XOR_ID: + /* use the special status bit for the XORcore + */ + xor_reg = chan->device->xor_reg; + busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0; + break; + } + + return busy; +} + +/** + * ppc440spe_chan_set_first_xor_descriptor - init XORcore chain + */ +static void ppc440spe_chan_set_first_xor_descriptor( + struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *next_desc) +{ + struct xor_regs *xor_reg = chan->device->xor_reg; + + if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) + printk(KERN_INFO "%s: Warn: XORcore is running " + "when try to set the first CDB!\n", + __func__); + + xor_last_submit = xor_last_linked = next_desc; + + iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr); + + iowrite32be(next_desc->phys, &xor_reg->cblalr); + iowrite32be(0, &xor_reg->cblahr); + iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT, + &xor_reg->cbcr); + + chan->hw_chain_inited = 1; +} + +/** + * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO. + * called with irqs disabled + */ +static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + u32 pcdb; + struct dma_regs *dma_reg = chan->device->dma_reg; + + pcdb = desc->phys; + if (!test_bit(PPC440SPE_DESC_INT, &desc->flags)) + pcdb |= DMA_CDB_NO_INT; + + chan_last_sub[chan->device->id] = desc; + + ADMA_LL_DBG(print_cb(chan, desc->hw_desc)); + + iowrite32(pcdb, &dma_reg->cpfpl); +} + +/** + * ppc440spe_chan_append - update the h/w chain in the channel + */ +static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + struct ppc440spe_adma_desc_slot *iter; + struct xor_cb *xcb; + u32 cur_desc; + unsigned long flags; + + local_irq_save(flags); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + cur_desc = ppc440spe_chan_get_current_descriptor(chan); + + if (likely(cur_desc)) { + iter = chan_last_sub[chan->device->id]; + BUG_ON(!iter); + } else { + /* first peer */ + iter = chan_first_cdb[chan->device->id]; + BUG_ON(!iter); + ppc440spe_dma_put_desc(chan, iter); + chan->hw_chain_inited = 1; + } + + /* is there something new to append */ + if (!iter->hw_next) + break; + + /* flush descriptors from the s/w queue to fifo */ + list_for_each_entry_continue(iter, &chan->chain, chain_node) { + ppc440spe_dma_put_desc(chan, iter); + if (!iter->hw_next) + break; + } + break; + case PPC440SPE_XOR_ID: + /* update h/w links and refetch */ + if (!xor_last_submit->hw_next) + break; + + xor_reg = chan->device->xor_reg; + /* the last linked CDB has to generate an interrupt + * that we'd be able to append the next lists to h/w + * regardless of the XOR engine state at the moment of + * appending of these next lists + */ + xcb = xor_last_linked->hw_desc; + xcb->cbc |= XOR_CBCR_CBCE_BIT; + + if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) { + /* XORcore is idle. Refetch now */ + do_xor_refetch = 0; + ppc440spe_xor_set_link(xor_last_submit, + xor_last_submit->hw_next); + + ADMA_LL_DBG(print_cb_list(chan, + xor_last_submit->hw_next)); + + xor_last_submit = xor_last_linked; + iowrite32be(ioread32be(&xor_reg->crsr) | + XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT, + &xor_reg->crsr); + } else { + /* XORcore is running. Refetch later in the handler */ + do_xor_refetch = 1; + } + + break; + } + + local_irq_restore(flags); +} + +/** + * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor + */ +static u32 +ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan) +{ + struct dma_regs *dma_reg; + struct xor_regs *xor_reg; + + if (unlikely(!chan->hw_chain_inited)) + /* h/w descriptor chain is not initialized yet */ + return 0; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_reg = chan->device->dma_reg; + return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK); + case PPC440SPE_XOR_ID: + xor_reg = chan->device->xor_reg; + return ioread32be(&xor_reg->ccbalr); + } + return 0; +} + +/** + * ppc440spe_chan_run - enable the channel + */ +static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan) +{ + struct xor_regs *xor_reg; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMAs are always enabled, do nothing */ + break; + case PPC440SPE_XOR_ID: + /* drain write buffer */ + xor_reg = chan->device->xor_reg; + + /* fetch descriptor pointed to in */ + iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT, + &xor_reg->crsr); + break; + } +} + +/****************************************************************************** + * ADMA device level + ******************************************************************************/ + +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan); +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan); + +static dma_cookie_t +ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx); + +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); + +static void +ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t *paddr, unsigned long flags); +static void +ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t addr, int index); +static void +ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx, + unsigned char mult, int index, int dst_pos); +static void +ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx, + dma_addr_t paddr, dma_addr_t qaddr); + +static struct page *ppc440spe_rxor_srcs[32]; + +/** + * ppc440spe_can_rxor - check if the operands may be processed with RXOR + */ +static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len) +{ + int i, order = 0, state = 0; + int idx = 0; + + if (unlikely(!(src_cnt > 1))) + return 0; + + BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs)); + + /* Skip holes in the source list before checking */ + for (i = 0; i < src_cnt; i++) { + if (!srcs[i]) + continue; + ppc440spe_rxor_srcs[idx++] = srcs[i]; + } + src_cnt = idx; + + for (i = 1; i < src_cnt; i++) { + char *cur_addr = page_address(ppc440spe_rxor_srcs[i]); + char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]); + + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + } else + goto out; + break; + case 1: + if ((i == src_cnt - 2) || + (order == -1 && cur_addr != old_addr - len)) { + order = 0; + state = 0; + } else if ((cur_addr == old_addr + len * order) || + (cur_addr == old_addr + 2 * len) || + (cur_addr == old_addr + 3 * len)) { + state = 2; + } else { + order = 0; + state = 0; + } + break; + case 2: + order = 0; + state = 0; + break; + } + } + +out: + if (state == 1 || state == 2) + return 1; + + return 0; +} + +/** + * ppc440spe_adma_device_estimate - estimate the efficiency of processing + * the operation given on this channel. It's assumed that 'chan' is + * capable to process 'cap' type of operation. + * @chan: channel to use + * @cap: type of transaction + * @dst_lst: array of destination pointers + * @dst_cnt: number of destination operands + * @src_lst: array of source pointers + * @src_cnt: number of source operands + * @src_sz: size of each source operand + */ +static int ppc440spe_adma_estimate(struct dma_chan *chan, + enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt, + struct page **src_lst, int src_cnt, size_t src_sz) +{ + int ef = 1; + + if (cap == DMA_PQ || cap == DMA_PQ_VAL) { + /* If RAID-6 capabilities were not activated don't try + * to use them + */ + if (unlikely(!ppc440spe_r6_enabled)) + return -1; + } + /* In the current implementation of ppc440spe ADMA driver it + * makes sense to pick out only pq case, because it may be + * processed: + * (1) either using Biskup method on DMA2; + * (2) or on DMA0/1. + * Thus we give a favour to (1) if the sources are suitable; + * else let it be processed on one of the DMA0/1 engines. + * In the sum_product case where destination is also the + * source process it on DMA0/1 only. + */ + if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) { + + if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1]) + ef = 0; /* sum_product case, process on DMA0/1 */ + else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz)) + ef = 3; /* override (DMA0/1 + idle) */ + else + ef = 0; /* can't process on DMA2 if !rxor */ + } + + /* channel idleness increases the priority */ + if (likely(ef) && + !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan))) + ef++; + + return ef; +} + +struct dma_chan * +ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap, + struct page **dst_lst, int dst_cnt, struct page **src_lst, + int src_cnt, size_t src_sz) +{ + struct dma_chan *best_chan = NULL; + struct ppc_dma_chan_ref *ref; + int best_rank = -1; + + if (unlikely(!src_sz)) + return NULL; + if (src_sz > PAGE_SIZE) { + /* + * should a user of the api ever pass > PAGE_SIZE requests + * we sort out cases where temporary page-sized buffers + * are used. + */ + switch (cap) { + case DMA_PQ: + if (src_cnt == 1 && dst_lst[1] == src_lst[0]) + return NULL; + if (src_cnt == 2 && dst_lst[1] == src_lst[1]) + return NULL; + break; + case DMA_PQ_VAL: + case DMA_XOR_VAL: + return NULL; + default: + break; + } + } + + list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) { + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { + int rank; + + rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst, + dst_cnt, src_lst, src_cnt, src_sz); + if (rank > best_rank) { + best_rank = rank; + best_chan = ref->chan; + } + } + } + + return best_chan; +} +EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel); + +/** + * ppc440spe_get_group_entry - get group entry with index idx + * @tdesc: is the last allocated slot in the group. + */ +static struct ppc440spe_adma_desc_slot * +ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx) +{ + struct ppc440spe_adma_desc_slot *iter = tdesc->group_head; + int i = 0; + + if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) { + printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n", + __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt); + BUG(); + } + + list_for_each_entry(iter, &tdesc->group_list, chain_node) { + if (i++ == entry_idx) + break; + } + return iter; +} + +/** + * ppc440spe_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &ppc440spe_chan->lock while calling this function + */ +static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot, + struct ppc440spe_adma_chan *chan) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } +} + +static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + u32 src_cnt, dst_cnt; + dma_addr_t addr; + + /* + * get the number of sources & destination + * included in this descriptor and unmap + * them all + */ + src_cnt = ppc440spe_desc_get_src_num(desc, chan); + dst_cnt = ppc440spe_desc_get_dst_num(desc, chan); + + /* unmap destinations */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + while (dst_cnt--) { + addr = ppc440spe_desc_get_dest_addr( + desc, chan, dst_cnt); + dma_unmap_page(chan->device->dev, + addr, desc->unmap_len, + DMA_FROM_DEVICE); + } + } + + /* unmap sources */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = ppc440spe_desc_get_src_addr( + desc, chan, src_cnt); + dma_unmap_page(chan->device->dev, + addr, desc->unmap_len, + DMA_TO_DEVICE); + } + } +} + +/** + * ppc440spe_adma_run_tx_complete_actions - call functions to be called + * upon completion + */ +static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan, + dma_cookie_t cookie) +{ + int i; + + BUG_ON(desc->async_tx.cookie < 0); + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + * + * actually, ppc's dma_unmap_page() functions are empty, so + * the following code is just for the sake of completeness + */ + if (chan && chan->needs_unmap && desc->group_head && + desc->unmap_len) { + struct ppc440spe_adma_desc_slot *unmap = + desc->group_head; + /* assume 1 slot per op always */ + u32 slot_count = unmap->slot_cnt; + + /* Run through the group list and unmap addresses */ + for (i = 0; i < slot_count; i++) { + BUG_ON(!unmap); + ppc440spe_adma_unmap(chan, unmap); + unmap = unmap->hw_next; + } + } + } + + /* run dependent operations */ + dma_run_dependencies(&desc->async_tx); + + return cookie; +} + +/** + * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set) + */ +static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_adma_chan *chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (list_is_last(&desc->chain_node, &chan->chain) || + desc->phys == ppc440spe_chan_get_current_descriptor(chan)) + return 1; + + if (chan->device->id != PPC440SPE_XOR_ID) { + /* our DMA interrupt handler clears opc field of + * each processed descriptor. For all types of + * operations except for ZeroSum we do not actually + * need ack from the interrupt handler. ZeroSum is a + * special case since the result of this operation + * is available from the handler only, so if we see + * such type of descriptor (which is unprocessed yet) + * then leave it in chain. + */ + struct dma_cdb *cdb = desc->hw_desc; + if (cdb->opc == DMA_CDB_OPC_DCHECK128) + return 1; + } + + dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n", + desc->phys, desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + ppc440spe_adma_free_slots(desc, chan); + return 0; +} + +/** + * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine + * which runs through the channel CDBs list until reach the descriptor + * currently processed. When routine determines that all CDBs of group + * are completed then corresponding callbacks (if any) are called and slots + * are freed. + */ +static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = ppc440spe_chan_get_current_descriptor(chan); + int busy = ppc440spe_chan_is_busy(chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n", + chan->device->id, __func__); + + if (!current_desc) { + /* There were no transactions yet, so + * nothing to clean + */ + return; + } + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &chan->chain, + chain_node) { + dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d " + "busy: %d this_desc: %#llx next_desc: %#x " + "cur: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, iter->phys, + ppc440spe_desc_get_link(iter, chan), current_desc, + async_tx_test_ack(&iter->async_tx)); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel,subsequent descriptors are either in process + * or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->phys == current_desc) { + BUG_ON(seen_current++); + if (busy || ppc440spe_desc_get_link(iter, chan)) { + /* not all descriptors of the group have + * been completed; exit. + */ + break; + } + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + if (!group_start) + group_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + + /* clean up the group */ + slot_cnt = group_start->slot_cnt; + grp_iter = group_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &chan->chain, chain_node) { + + cookie = ppc440spe_adma_run_tx_complete_actions( + grp_iter, chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = ppc440spe_adma_clean_slot( + grp_iter, chan); + if (end_of_chain && slot_cnt) { + /* Should wait for ZeroSum completion */ + if (cookie > 0) + chan->completed_cookie = cookie; + return; + } + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + group_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan, + cookie); + + if (ppc440spe_adma_clean_slot(iter, chan)) + break; + } + + BUG_ON(!seen_current); + + if (cookie > 0) { + chan->completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } + +} + +/** + * ppc440spe_adma_tasklet - clean up watch-dog initiator + */ +static void ppc440spe_adma_tasklet(unsigned long data) +{ + struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data; + + spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock(&chan->lock); +} + +/** + * ppc440spe_adma_slot_cleanup - clean up scheduled initiator + */ +static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan) +{ + spin_lock_bh(&chan->lock); + __ppc440spe_adma_slot_cleanup(chan); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_adma_alloc_slots - allocate free slots (if any) + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots( + struct ppc440spe_adma_chan *chan, int num_slots, + int slots_per_op) +{ + struct ppc440spe_adma_desc_slot *iter = NULL, *_iter; + struct ppc440spe_adma_desc_slot *alloc_start = NULL; + struct list_head chain = LIST_HEAD_INIT(chain); + int slots_found, retry = 0; + + + BUG_ON(!num_slots || !slots_per_op); + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = chan->last_used; + else + iter = list_entry(&chan->all_slots, + struct ppc440spe_adma_desc_slot, + slot_node); + list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots, + slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct ppc440spe_adma_desc_slot *alloc_tail = NULL; + struct ppc440spe_adma_desc_slot *last_used = NULL; + + iter = alloc_start; + while (num_slots) { + int i; + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->hw_next = NULL; + iter->flags = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->group_list); + chan->last_used = last_used; + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&chan->irq_tasklet); + return NULL; +} + +/** + * ppc440spe_adma_alloc_chan_resources - allocate pools for CDB slots + */ +static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *slot = NULL; + char *hw_desc; + int i, db_sz; + int init; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + init = ppc440spe_chan->slots_allocated ? 0 : 1; + chan->chan_id = ppc440spe_chan->device->id; + + /* Allocate descriptor slots */ + i = ppc440spe_chan->slots_allocated; + if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID) + db_sz = sizeof(struct dma_cdb); + else + db_sz = sizeof(struct xor_cb); + + for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) { + slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot), + GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "SPE ADMA Channel only initialized" + " %d descriptor slots", i--); + break; + } + + hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[i * db_sz]; + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = ppc440spe_adma_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->group_list); + slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz; + slot->idx = i; + + spin_lock_bh(&ppc440spe_chan->lock); + ppc440spe_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots); + spin_unlock_bh(&ppc440spe_chan->lock); + } + + if (i && !ppc440spe_chan->last_used) { + ppc440spe_chan->last_used = + list_entry(ppc440spe_chan->all_slots.next, + struct ppc440spe_adma_desc_slot, + slot_node); + } + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: allocated %d descriptor slots\n", + ppc440spe_chan->device->id, i); + + /* initialize the channel and the chain with a null operation */ + if (init) { + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + ppc440spe_chan->hw_chain_inited = 0; + /* Use WXOR for self-testing */ + if (!ppc440spe_r6_tchan) + ppc440spe_r6_tchan = ppc440spe_chan; + break; + case PPC440SPE_XOR_ID: + ppc440spe_chan_start_null_xor(ppc440spe_chan); + break; + default: + BUG(); + } + ppc440spe_chan->needs_unmap = 1; + } + + return (i > 0) ? i : -ENOMEM; +} + +/** + * ppc440spe_desc_assign_cookie - assign a cookie + */ +static dma_cookie_t ppc440spe_desc_assign_cookie( + struct ppc440spe_adma_chan *chan, + struct ppc440spe_adma_desc_slot *desc) +{ + dma_cookie_t cookie = chan->common.cookie; + + cookie++; + if (cookie < 0) + cookie = 1; + chan->common.cookie = desc->async_tx.cookie = cookie; + return cookie; +} + +/** + * ppc440spe_rxor_set_region_data - + */ +static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u32 mask) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mask; +} + +/** + * ppc440spe_rxor_set_src - + */ +static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE; + xcb->ops[xor_arg_no].l = addr; +} + +/** + * ppc440spe_rxor_set_mult - + */ +static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc, + u8 xor_arg_no, u8 idx, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + + xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8); +} + +/** + * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold + * has been achieved + */ +static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan) +{ + dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n", + chan->device->id, chan->pending); + + if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) { + chan->pending = 0; + ppc440spe_chan_append(chan); + } +} + +/** + * ppc440spe_adma_tx_submit - submit new descriptor group to the channel + * (it's not necessary that descriptors will be submitted to the h/w + * chains too right now) + */ +static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ppc440spe_adma_desc_slot *sw_desc; + struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan); + struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + + sw_desc = tx_to_ppc440spe_adma_slot(tx); + + group_start = sw_desc->group_head; + slot_cnt = group_start->slot_cnt; + slots_per_op = group_start->slots_per_op; + + spin_lock_bh(&chan->lock); + + cookie = ppc440spe_desc_assign_cookie(chan, sw_desc); + + if (unlikely(list_empty(&chan->chain))) { + /* first peer */ + list_splice_init(&sw_desc->group_list, &chan->chain); + chan_first_cdb[chan->device->id] = group_start; + } else { + /* isn't first peer, bind CDBs to chain */ + old_chain_tail = list_entry(chan->chain.prev, + struct ppc440spe_adma_desc_slot, + chain_node); + list_splice_init(&sw_desc->group_list, + &old_chain_tail->chain_node); + /* fix up the hardware chain */ + ppc440spe_desc_set_link(chan, old_chain_tail, group_start); + } + + /* increment the pending count by the number of operations */ + chan->pending += slot_cnt / slots_per_op; + ppc440spe_adma_check_threshold(chan); + spin_unlock_bh(&chan->lock); + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n", + chan->device->id, __func__, + sw_desc->async_tx.cookie, sw_desc->idx, sw_desc); + + return cookie; +} + +/** + * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt( + struct dma_chan *chan, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s\n", ppc440spe_chan->device->id, + __func__); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan); + group_start->unmap_len = 0; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (unlikely(!len)) + return NULL; + + BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + + spin_lock_bh(&ppc440spe_chan->lock); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s len: %u int_en %d\n", + ppc440spe_chan->device->id, __func__, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_memcpy(group_start, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_memset - prepare CDB for a MEMSET operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( + struct dma_chan *chan, dma_addr_t dma_dest, int value, + size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (unlikely(!len)) + return NULL; + + BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + + spin_lock_bh(&ppc440spe_chan->lock); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s cal: %u len: %u int_en %d\n", + ppc440spe_chan->device->id, __func__, value, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + slot_cnt = slots_per_op = 1; + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_memset(group_start, value, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t *dma_src, u32 src_cnt, size_t len, + unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + int slot_cnt, slots_per_op; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id, + dma_dest, dma_src, src_cnt)); + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + spin_lock_bh(&ppc440spe_chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + ppc440spe_desc_init_xor(group_start, src_cnt, flags); + ppc440spe_adma_set_dest(group_start, dma_dest, 0); + while (src_cnt--) + ppc440spe_adma_memcpy_xor_set_src(group_start, + dma_src[src_cnt], src_cnt); + ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len); + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static inline void +ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc, + int src_cnt); +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor); + +/** + * ppc440spe_adma_init_dma2rxor_slot - + */ +static void ppc440spe_adma_init_dma2rxor_slot( + struct ppc440spe_adma_desc_slot *desc, + dma_addr_t *src, int src_cnt) +{ + int i; + + /* initialize CDB */ + for (i = 0; i < src_cnt; i++) { + ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i, + desc->src_cnt, (u32)src[i]); + } +} + +/** + * ppc440spe_dma01_prep_mult - + * for Q operation where destination is also the source + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 2; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* use WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = dst_cnt; + /* First descriptor, zero data in the destination and copy it + * to q page using MULTICAST transfer. + */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * Second descriptor, multiply data from the q page + * and store the result in real destination. + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, dst[1]); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +/** + * ppc440spe_dma01_prep_sum_product - + * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also + * the source. + */ +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC440SPE_DESC_WXOR, &op); + slot_cnt = 3; + + spin_lock_bh(&ppc440spe_chan->lock); + + /* WXOR, each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc440spe_adma_chan *chan; + struct ppc440spe_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = 1; + /* 1st descriptor, src[1] data to q page and zero destination */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* 2nd descriptor, multiply src[1] data and store the + * result in destination */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + ppc440spe_chan->qdest); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[1]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + + /* + * 3rd descriptor, multiply src[0] data and xor it + * with destination + */ + iter = list_first_entry(&iter->chain_node, + struct ppc440spe_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC440SPE_DESC_INT, &iter->flags); + else + clear_bit(PPC440SPE_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len); + /* select operations WXOR/RXOR depending on the + * source addresses of operators and the number + * of destinations (RXOR support only Q-parity calculations) + */ + set_bit(PPC440SPE_DESC_WXOR, &op); + if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) { + /* no active RXOR; + * do RXOR if: + * - there are more than 1 source, + * - len is aligned on 512-byte boundary, + * - source addresses fit to one of 4 possible regions. + */ + if (src_cnt > 1 && + !(len & MQ0_CF2H_RXOR_BS_MASK) && + (src[0] + len) == src[1]) { + /* may do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR, &op); + if (src_cnt != 2) { + /* may try to enhance region of RXOR */ + if ((src[1] + len) == src[2]) { + /* do RXOR R1 R2 R3 */ + set_bit(PPC440SPE_DESC_RXOR123, + &op); + } else if ((src[1] + len * 2) == src[2]) { + /* do RXOR R1 R2 R4 */ + set_bit(PPC440SPE_DESC_RXOR124, &op); + } else if ((src[1] + len * 3) == src[2]) { + /* do RXOR R1 R2 R5 */ + set_bit(PPC440SPE_DESC_RXOR125, + &op); + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, + &op); + } + } else { + /* do RXOR R1 R2 */ + set_bit(PPC440SPE_DESC_RXOR12, &op); + } + } + + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* can not do this operation with RXOR */ + clear_bit(PPC440SPE_RXOR_RUN, + &ppc440spe_rxor_state); + } else { + /* can do; set block size right now */ + ppc440spe_desc_set_rxor_block_size(len); + } + } + + /* Number of necessary slots depends on operation type selected */ + if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { + /* This is a WXOR only chain. Need descriptors for each + * source to GF-XOR them with WXOR, and need descriptors + * for each destination to zero them with WXOR + */ + slot_cnt = src_cnt; + + if (flags & DMA_PREP_ZERO_P) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_P, &op); + } + if (flags & DMA_PREP_ZERO_Q) { + slot_cnt++; + set_bit(PPC440SPE_ZERO_Q, &op); + } + } else { + /* Need 1/2 descriptor for RXOR operation, and + * need (src_cnt - (2 or 3)) for WXOR of sources + * remained (if any) + */ + slot_cnt = dst_cnt; + + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &op); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &op); + + if (test_bit(PPC440SPE_DESC_RXOR12, &op)) + slot_cnt += src_cnt - 2; + else + slot_cnt += src_cnt - 3; + + /* Thus we have either RXOR only chain or + * mixed RXOR/WXOR + */ + if (slot_cnt == dst_cnt) + /* RXOR only chain */ + clear_bit(PPC440SPE_DESC_WXOR, &op); + } + + spin_lock_bh(&ppc440spe_chan->lock); + /* for both RXOR/WXOR each descriptor occupies one slot */ + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, + flags, op); + + /* setup dst/src/mult */ + pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n", + __func__, dst[0], dst[1]); + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + while (src_cnt--) { + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + + /* NOTE: "Multi = 0 is equivalent to = 1" as it + * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf + * doesn't work for RXOR with DMA0/1! Instead, multi=0 + * leads to zeroing source data after RXOR. + * So, for P case set-up mult=1 explicitly. + */ + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + + /* Setup byte count foreach slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_byte_count(iter, + ppc440spe_chan, len); + iter->unmap_len = len; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + + return sw_desc; +} + +static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq( + struct ppc440spe_adma_chan *ppc440spe_chan, + dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, + const unsigned char *scf, size_t len, unsigned long flags) +{ + int slot_cnt, descs_per_op; + struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + BUG_ON(!dst_cnt); + /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len);*/ + + spin_lock_bh(&ppc440spe_chan->lock); + descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len); + if (descs_per_op < 0) { + spin_unlock_bh(&ppc440spe_chan->lock); + return NULL; + } + + /* depending on number of sources we have 1 or 2 RXOR chains */ + slot_cnt = descs_per_op * dst_cnt; + + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); + if (sw_desc) { + op = slot_cnt; + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt, + --op ? 0 : flags); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + + ppc440spe_init_rxor_cursor(&(iter->rxor_cursor)); + iter->rxor_cursor.len = len; + iter->descs_per_op = descs_per_op; + } + op = 0; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + op++; + if (op % descs_per_op == 0) + ppc440spe_adma_init_dma2rxor_slot(iter, src, + src_cnt); + if (likely(!list_is_last(&iter->chain_node, + &sw_desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = + list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + ppc440spe_xor_set_link(iter, iter->hw_next); + } else { + /* this is the last descriptor. */ + iter->hw_next = NULL; + } + } + + /* fixup head descriptor */ + sw_desc->dst_cnt = dst_cnt; + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC440SPE_ZERO_P, &sw_desc->flags); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags); + + /* setup dst/src/mult */ + ppc440spe_adma_pq_set_dest(sw_desc, dst, flags); + + while (src_cnt--) { + /* handle descriptors (if dst_cnt == 2) inside + * the ppc440spe_adma_pq_set_srcxxx() functions + */ + ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt], + src_cnt); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc440spe_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + ppc440spe_desc_set_rxor_block_size(len); + return sw_desc; +} + +/** + * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc = NULL; + int dst_cnt = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, + dst, src, src_cnt)); + BUG_ON(!len); + BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(!src_cnt); + + if (src_cnt == 1 && dst[1] == src[0]) { + dma_addr_t dest[2]; + + /* dst[1] is real destination (Q) */ + dest[0] = dst[1]; + /* this is the page to multicast source data to */ + dest[1] = ppc440spe_chan->qdest; + sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan, + dest, 2, src, src_cnt, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (src_cnt == 2 && dst[1] == src[1]) { + sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan, + &dst[1], src, 2, scf, len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_P)) { + BUG_ON(!dst[0]); + dst_cnt++; + flags |= DMA_PREP_ZERO_P; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) { + BUG_ON(!dst[1]); + dst_cnt++; + flags |= DMA_PREP_ZERO_Q; + } + + BUG_ON(!dst_cnt); + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc440spe_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + switch (ppc440spe_chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan, + dst, dst_cnt, src, src_cnt, scf, + len, flags); + break; + } + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for + * a PQ_ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + dma_addr_t pdest, qdest; + int slot_cnt, slots_per_op, idst, dst_cnt; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + pdest = 0; + else + pdest = pq[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qdest = 0; + else + qdest = pq[1]; + + ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id, + src, src_cnt, scf)); + + /* Always use WXOR for P/Q calculations (two destinations). + * Need 1 or 2 extra slots to verify results are zero. + */ + idst = dst_cnt = (pdest && qdest) ? 2 : 1; + + /* One additional slot per destination to clone P/Q + * before calculation (we have to preserve destinations). + */ + slot_cnt = src_cnt + dst_cnt * 2; + slots_per_op = 1; + + spin_lock_bh(&ppc440spe_chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, + slots_per_op); + if (sw_desc) { + ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt); + + /* Setup byte count for each slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = len; + } + + if (pdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = sw_desc->group_head; + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->pdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override pdest to preserve original P */ + pdest = ppc440spe_chan->pdest; + } + if (qdest) { + struct dma_cdb *hw_desc; + struct ppc440spe_adma_chan *chan; + + iter = list_first_entry(&sw_desc->group_list, + struct ppc440spe_adma_desc_slot, + chain_node); + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + + if (pdest) { + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc440spe_desc_set_dest_addr(iter, chan, 0, + ppc440spe_chan->qdest, 0); + ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest); + ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, + len); + iter->unmap_len = 0; + /* override qdest to preserve original Q */ + qdest = ppc440spe_chan->qdest; + } + + /* Setup destinations for P/Q ops */ + ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest); + + /* Setup zero QWORDs into DCHECK CDBs */ + idst = dst_cnt; + list_for_each_entry_reverse(iter, &sw_desc->group_list, + chain_node) { + /* + * The last CDB corresponds to Q-parity check, + * the one before last CDB corresponds + * P-parity check + */ + if (idst == DMA_DEST_MAX_NUM) { + if (idst == dst_cnt) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } else { + if (qdest) { + set_bit(PPC440SPE_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC440SPE_DESC_PCHECK, + &iter->flags); + } + } + iter->xor_check_result = pqres; + + /* + * set it to zero, if check fail then result will + * be updated + */ + *iter->xor_check_result = 0; + ppc440spe_desc_set_dcheck(iter, ppc440spe_chan, + ppc440spe_qword); + + if (!(--dst_cnt)) + break; + } + + /* Setup sources and mults for P/Q ops */ + list_for_each_entry_continue_reverse(iter, &sw_desc->group_list, + chain_node) { + struct ppc440spe_adma_chan *chan; + u32 mult_dst; + + chan = to_ppc440spe_adma_chan(iter->async_tx.chan); + ppc440spe_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, + src[src_cnt - 1]); + if (qdest) { + mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + ppc440spe_desc_set_src_mult(iter, chan, + DMA_CUED_MULT1_OFF, + mult_dst, + scf[src_cnt - 1]); + } + if (!(--src_cnt)) + break; + } + } + spin_unlock_bh(&ppc440spe_chan->lock); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for + * XOR ZERO_SUM operation + */ +static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum( + struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, + size_t len, enum sum_check_flags *result, unsigned long flags) +{ + struct dma_async_tx_descriptor *tx; + dma_addr_t pq[2]; + + /* validate P, disable Q */ + pq[0] = src[0]; + pq[1] = 0; + flags |= DMA_PREP_PQ_DISABLE_Q; + + tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1], + src_cnt - 1, 0, len, + result, flags); + return tx; +} + +/** + * ppc440spe_adma_set_dest - set destination address into descriptor + */ +static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + BUG_ON(index >= sw_desc->dst_cnt); + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* to do: support transfers lengths > + * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT + */ + ppc440spe_desc_set_dest_addr(sw_desc->group_head, + chan, 0, addr, index); + break; + case PPC440SPE_XOR_ID: + sw_desc = ppc440spe_get_group_entry(sw_desc, index); + ppc440spe_desc_set_dest_addr(sw_desc, + chan, 0, addr, index); + break; + } +} + +static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter, + struct ppc440spe_adma_chan *chan, dma_addr_t addr) +{ + /* To clear destinations update the descriptor + * (P or Q depending on index) as follows: + * addr is destination (0 corresponds to SG2): + */ + ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0); + + /* ... and the addr is source: */ + ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr); + + /* addr is always SG2 then the mult is always DST1 */ + ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, 1); +} + +/** + * ppc440spe_adma_pq_set_dest - set destination address into descriptor + * for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t *addrs, unsigned long flags) +{ + struct ppc440spe_adma_desc_slot *iter; + struct ppc440spe_adma_chan *chan; + dma_addr_t paddr, qaddr; + dma_addr_t addr = 0, ppath, qpath; + int index = 0, i; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + paddr = 0; + else + paddr = addrs[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qaddr = 0; + else + qaddr = addrs[1]; + + if (!paddr || !qaddr) + addr = paddr ? paddr : qaddr; + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* walk through the WXOR source list and set P/Q-destinations + * for each slot: + */ + if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* This is WXOR-only chain; may have 1/2 zero descs */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + index++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + index++; + + iter = ppc440spe_get_group_entry(sw_desc, index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } + + if (index) { + /* To clear destinations update the descriptor + * (1st,2nd, or both depending on flags) + */ + index = 0; + if (test_bit(PPC440SPE_ZERO_P, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + paddr); + } + + if (test_bit(PPC440SPE_ZERO_Q, + &sw_desc->flags)) { + iter = ppc440spe_get_group_entry( + sw_desc, index++); + ppc440spe_adma_pq_zero_op(iter, chan, + qaddr); + } + + return; + } + } else { + /* This is RXOR-only or RXOR/WXOR mixed chain */ + + /* If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + /* Setup destination(s) in RXOR slot(s) */ + iter = ppc440spe_get_group_entry(sw_desc, index++); + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + if (!addr) { + /* two destinations */ + iter = ppc440spe_get_group_entry(sw_desc, + index++); + ppc440spe_desc_set_dest_addr(iter, chan, + qpath, qaddr, 0); + } + + if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) { + /* Setup destination(s) in remaining WXOR + * slots + */ + iter = ppc440spe_get_group_entry(sw_desc, + index); + if (addr) { + /* one destination */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + addr, 0); + + } else { + /* two destinations */ + list_for_each_entry_from(iter, + &sw_desc->group_list, + chain_node) { + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + paddr, 0); + ppc440spe_desc_set_dest_addr( + iter, chan, + DMA_CUED_XOR_BASE, + qaddr, 1); + } + } + } + + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 descriptors have only 1 destination, so there are + * two chains - one for each dest. + * If we want to include destination into calculations, + * then make dest addresses cued with mult=1 (XOR). + */ + ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ? + DMA_CUED_XOR_HB : + DMA_CUED_XOR_BASE | + (1 << DMA_CUED_MULT1_OFF); + + iter = ppc440spe_get_group_entry(sw_desc, 0); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, chan, + paddr ? ppath : qpath, + paddr ? paddr : qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + if (!addr) { + /* Two destinations; setup Q here */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + for (i = 0; i < sw_desc->descs_per_op; i++) { + ppc440spe_desc_set_dest_addr(iter, + chan, qpath, qaddr, 0); + iter = list_entry(iter->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + } + + break; + } +} + +/** + * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor + * for the PQ_ZERO_SUM operation + */ +static void ppc440spe_adma_pqzero_sum_set_dest( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t paddr, dma_addr_t qaddr) +{ + struct ppc440spe_adma_desc_slot *iter, *end; + struct ppc440spe_adma_chan *chan; + dma_addr_t addr = 0; + int idx; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + /* walk through the WXOR source list and set P/Q-destinations + * for each slot + */ + idx = (paddr && qaddr) ? 2 : 1; + /* set end */ + list_for_each_entry_reverse(end, &sw_desc->group_list, + chain_node) { + if (!(--idx)) + break; + } + /* set start */ + idx = (paddr && qaddr) ? 2 : 1; + iter = ppc440spe_get_group_entry(sw_desc, idx); + + if (paddr && qaddr) { + /* two destinations */ + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, paddr, 0); + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, qaddr, 1); + } + } else { + /* one destination */ + addr = paddr ? paddr : qaddr; + list_for_each_entry_from(iter, &sw_desc->group_list, + chain_node) { + if (unlikely(iter == end)) + break; + ppc440spe_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, addr, 0); + } + } + + /* The remaining descriptors are DATACHECK. These have no need in + * destination. Actually, these destinations are used there + * as sources for check operation. So, set addr as source. + */ + ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr); + + if (!addr) { + end = list_entry(end->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr); + } +} + +/** + * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor + */ +static inline void ppc440spe_desc_set_xor_src_cnt( + struct ppc440spe_adma_desc_slot *desc, + int src_cnt) +{ + struct xor_cb *hw_desc = desc->hw_desc; + + hw_desc->cbc &= ~XOR_CDCR_OAC_MSK; + hw_desc->cbc |= src_cnt; +} + +/** + * ppc440spe_adma_pq_set_src - set source address into descriptor + */ +static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + dma_addr_t haddr = 0; + struct ppc440spe_adma_desc_slot *iter = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain + */ + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + /* RXOR-only or RXOR/WXOR operation */ + int iskip = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index == 0) { + /* 1st slot (RXOR) */ + /* setup sources region (R1-2-3, R1-2-4, + * or R1-2-5) + */ + if (test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags)) + haddr = DMA_RXOR12 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR123, + &sw_desc->flags)) + haddr = DMA_RXOR123 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR124, + &sw_desc->flags)) + haddr = DMA_RXOR124 << + DMA_CUED_REGION_OFF; + else if (test_bit(PPC440SPE_DESC_RXOR125, + &sw_desc->flags)) + haddr = DMA_RXOR125 << + DMA_CUED_REGION_OFF; + else + BUG(); + haddr |= DMA_CUED_XOR_BASE; + iter = ppc440spe_get_group_entry(sw_desc, 0); + } else if (index < iskip) { + /* 1st slot (RXOR) + * shall actually set source address only once + * instead of first + */ + iter = NULL; + } else { + /* 2nd/3d and next slots (WXOR); + * skip first slot with RXOR + */ + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index - iskip + sw_desc->dst_cnt); + } + } else { + int znum = 0; + + /* WXOR-only operation; skip first slots with + * zeroing destinations + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + haddr = DMA_CUED_XOR_HB; + iter = ppc440spe_get_group_entry(sw_desc, + index + znum); + } + + if (likely(iter)) { + ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr); + + if (!index && + test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) && + sw_desc->dst_cnt == 2) { + /* if we have two destinations for RXOR, then + * setup source in the second descr too + */ + iter = ppc440spe_get_group_entry(sw_desc, 1); + ppc440spe_desc_set_src_addr(iter, chan, 0, + haddr, addr); + } + } + break; + + case PPC440SPE_XOR_ID: + /* DMA2 may do Biskup */ + iter = sw_desc->group_head; + if (iter->dst_cnt == 2) { + /* both P & Q calculations required; set P src here */ + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + + /* this is for Q */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_src(iter, index, addr); + break; + } +} + +/** + * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor + */ +static void ppc440spe_adma_memcpy_xor_set_src( + struct ppc440spe_adma_desc_slot *sw_desc, + dma_addr_t addr, int index) +{ + struct ppc440spe_adma_chan *chan; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + sw_desc = sw_desc->group_head; + + if (likely(sw_desc)) + ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr); +} + +/** + * ppc440spe_adma_dma2rxor_inc_addr - + */ +static void ppc440spe_adma_dma2rxor_inc_addr( + struct ppc440spe_adma_desc_slot *desc, + struct ppc440spe_rxor *cursor, int index, int src_cnt) +{ + cursor->addr_count++; + if (index == src_cnt - 1) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + } else if (cursor->addr_count == XOR_MAX_OPS) { + ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count); + cursor->addr_count = 0; + cursor->desc_count++; + } +} + +/** + * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB + */ +static int ppc440spe_adma_dma2rxor_prep_src( + struct ppc440spe_adma_desc_slot *hdesc, + struct ppc440spe_rxor *cursor, int index, + int src_cnt, u32 addr) +{ + int rval = 0; + u32 sign; + struct ppc440spe_adma_desc_slot *desc = hdesc; + int i; + + for (i = 0; i < cursor->desc_count; i++) { + desc = list_entry(hdesc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + } + + switch (cursor->state) { + case 0: + if (addr == cursor->addrl + cursor->len) { + /* direct RXOR */ + cursor->state = 1; + cursor->xor_count++; + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (cursor->addrl == addr + cursor->len) { + /* reverse RXOR */ + cursor->state = 1; + cursor->xor_count++; + set_bit(cursor->addr_count, &desc->reverse_flags[0]); + if (index == src_cnt-1) { + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + printk(KERN_ERR "Cannot build " + "DMA2 RXOR command block.\n"); + BUG(); + } + break; + case 1: + sign = test_bit(cursor->addr_count, + desc->reverse_flags) + ? -1 : 1; + if (index == src_cnt-2 || (sign == -1 + && addr != cursor->addrl - 2*cursor->len)) { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } else if (addr == cursor->addrl + 2*sign*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR123 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 3*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR124 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else if (addr == cursor->addrl + 4*cursor->len) { + cursor->state = 2; + cursor->xor_count = 0; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR125 << DMA_CUED_REGION_OFF); + if (index == src_cnt-1) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + } else { + cursor->state = 0; + cursor->xor_count = 1; + cursor->addrl = addr; + ppc440spe_rxor_set_region(desc, + cursor->addr_count, + DMA_RXOR12 << DMA_CUED_REGION_OFF); + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + case 2: + cursor->state = 0; + cursor->addrl = addr; + cursor->xor_count++; + if (index) { + ppc440spe_adma_dma2rxor_inc_addr( + desc, cursor, index, src_cnt); + } + break; + } + + return rval; +} + +/** + * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_src( + struct ppc440spe_adma_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index addr */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse operand order; put last op in RXOR group */ + if (index == op - 1) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } else { + /* direct operand order; put first op in RXOR group */ + if (index == lop) + ppc440spe_rxor_set_src(desc, k - 1, addr); + } +} + +/** + * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that + * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call + */ +static void ppc440spe_adma_dma2rxor_set_mult( + struct ppc440spe_adma_desc_slot *desc, + int index, u8 mult) +{ + struct xor_cb *xcb = desc->hw_desc; + int k = 0, op = 0, lop = 0; + + /* get the RXOR operand which corresponds to index mult */ + while (op <= index) { + lop = op; + if (k == XOR_MAX_OPS) { + k = 0; + desc = list_entry(desc->chain_node.next, + struct ppc440spe_adma_desc_slot, + chain_node); + xcb = desc->hw_desc; + + } + if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) == + (DMA_RXOR12 << DMA_CUED_REGION_OFF)) + op += 2; + else + op += 3; + } + + BUG_ON(k < 1); + if (test_bit(k-1, desc->reverse_flags)) { + /* reverse order */ + ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult); + } else { + /* direct order */ + ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult); + } +} + +/** + * ppc440spe_init_rxor_cursor - + */ +static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor) +{ + memset(cursor, 0, sizeof(struct ppc440spe_rxor)); + cursor->state = 2; +} + +/** + * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into + * descriptor for the PQXOR operation + */ +static void ppc440spe_adma_pq_set_src_mult( + struct ppc440spe_adma_desc_slot *sw_desc, + unsigned char mult, int index, int dst_pos) +{ + struct ppc440spe_adma_chan *chan; + u32 mult_idx, mult_dst; + struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL; + + chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan); + + switch (chan->device->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { + int region = test_bit(PPC440SPE_DESC_RXOR12, + &sw_desc->flags) ? 2 : 3; + + if (index < region) { + /* RXOR multipliers */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->dst_cnt - 1); + if (sw_desc->dst_cnt == 2) + iter1 = ppc440spe_get_group_entry( + sw_desc, 0); + + mult_idx = DMA_CUED_MULT1_OFF + (index << 3); + mult_dst = DMA_CDB_SG_SRC; + } else { + /* WXOR multiplier */ + iter = ppc440spe_get_group_entry(sw_desc, + index - region + + sw_desc->dst_cnt); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + } + } else { + int znum = 0; + + /* WXOR-only; + * skip first slots with destinations (if ZERO_DST has + * place) + */ + if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags)) + znum++; + if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags)) + znum++; + + iter = ppc440spe_get_group_entry(sw_desc, index + znum); + mult_idx = DMA_CUED_MULT1_OFF; + mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1; + } + + if (likely(iter)) { + ppc440spe_desc_set_src_mult(iter, chan, + mult_idx, mult_dst, mult); + + if (unlikely(iter1)) { + /* if we have two destinations for RXOR, then + * we've just set Q mult. Set-up P now. + */ + ppc440spe_desc_set_src_mult(iter1, chan, + mult_idx, mult_dst, 1); + } + + } + break; + + case PPC440SPE_XOR_ID: + iter = sw_desc->group_head; + if (sw_desc->dst_cnt == 2) { + /* both P & Q calculations required; set P mult here */ + ppc440spe_adma_dma2rxor_set_mult(iter, index, 1); + + /* and then set Q mult */ + iter = ppc440spe_get_group_entry(sw_desc, + sw_desc->descs_per_op); + } + ppc440spe_adma_dma2rxor_set_mult(iter, index, mult); + break; + } +} + +/** + * ppc440spe_adma_free_chan_resources - free the resources allocated + */ +static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + struct ppc440spe_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + spin_lock_bh(&ppc440spe_chan->lock); + list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse(iter, _iter, + &ppc440spe_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + ppc440spe_chan->slots_allocated--; + } + ppc440spe_chan->last_used = NULL; + + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d %s slots_allocated %d\n", + ppc440spe_chan->device->id, + __func__, ppc440spe_chan->slots_allocated); + spin_unlock_bh(&ppc440spe_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * ppc440spe_adma_is_complete - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + */ +static enum dma_status ppc440spe_adma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + last_used = chan->cookie; + last_complete = ppc440spe_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) + return ret; + + ppc440spe_adma_slot_cleanup(ppc440spe_chan); + + last_used = chan->cookie; + last_complete = ppc440spe_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +/** + * ppc440spe_adma_eot_handler - end of transfer interrupt handler + */ +static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_adma_err_handler - DMA error interrupt handler; + * do the same things as a eot handler + */ +static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data) +{ + struct ppc440spe_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + tasklet_schedule(&chan->irq_tasklet); + ppc440spe_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +/** + * ppc440spe_test_callback - called when test operation has been done + */ +static void ppc440spe_test_callback(void *unused) +{ + complete(&ppc440spe_r6_test_comp); +} + +/** + * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w + */ +static void ppc440spe_adma_issue_pending(struct dma_chan *chan) +{ + struct ppc440spe_adma_chan *ppc440spe_chan; + + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + dev_dbg(ppc440spe_chan->device->common.dev, + "ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id, + __func__, ppc440spe_chan->pending); + + if (ppc440spe_chan->pending) { + ppc440spe_chan->pending = 0; + ppc440spe_chan_append(ppc440spe_chan); + } +} + +/** + * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines + * use FIFOs (as opposite to chains used in XOR) so this is a XOR + * specific operation) + */ +static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *group_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(chan->device->common.dev, + "ppc440spe adma%d: %s\n", chan->device->id, __func__); + + spin_lock_bh(&chan->lock); + slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op); + if (sw_desc) { + group_start = sw_desc->group_head; + list_splice_init(&sw_desc->group_list, &chan->chain); + async_tx_ack(&sw_desc->async_tx); + ppc440spe_desc_init_null_xor(group_start); + + cookie = chan->common.cookie; + cookie++; + if (cookie <= 1) + cookie = 2; + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + chan->completed_cookie = cookie - 1; + chan->common.cookie = sw_desc->async_tx.cookie = cookie; + + /* channel should not be busy */ + BUG_ON(ppc440spe_chan_is_busy(chan)); + + /* set the descriptor address */ + ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc); + + /* run the descriptor */ + ppc440spe_chan_run(chan); + } else + printk(KERN_ERR "ppc440spe adma%d" + " failed to allocate null descriptor\n", + chan->device->id); + spin_unlock_bh(&chan->lock); +} + +/** + * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully. + * For this we just perform one WXOR operation with the same source + * and destination addresses, the GF-multiplier is 1; so if RAID-6 + * capabilities are enabled then we'll get src/dst filled with zero. + */ +static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan) +{ + struct ppc440spe_adma_desc_slot *sw_desc, *iter; + struct page *pg; + char *a; + dma_addr_t dma_addr, addrs[2]; + unsigned long op = 0; + int rval = 0; + + set_bit(PPC440SPE_DESC_WXOR, &op); + + pg = alloc_page(GFP_KERNEL); + if (!pg) + return -ENOMEM; + + spin_lock_bh(&chan->lock); + sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1); + if (sw_desc) { + /* 1 src, 1 dsr, int_ena, WXOR */ + ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op); + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE); + iter->unmap_len = PAGE_SIZE; + } + } else { + rval = -EFAULT; + spin_unlock_bh(&chan->lock); + goto exit; + } + spin_unlock_bh(&chan->lock); + + /* Fill the test page with ones */ + memset(page_address(pg), 0xFF, PAGE_SIZE); + dma_addr = dma_map_page(chan->device->dev, pg, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Setup addresses */ + ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0); + ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0); + addrs[0] = dma_addr; + addrs[1] = 0; + ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q); + + async_tx_ack(&sw_desc->async_tx); + sw_desc->async_tx.callback = ppc440spe_test_callback; + sw_desc->async_tx.callback_param = NULL; + + init_completion(&ppc440spe_r6_test_comp); + + ppc440spe_adma_tx_submit(&sw_desc->async_tx); + ppc440spe_adma_issue_pending(&chan->common); + + wait_for_completion(&ppc440spe_r6_test_comp); + + /* Now check if the test page is zeroed */ + a = page_address(pg); + if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) { + /* page is zero - RAID-6 enabled */ + rval = 0; + } else { + /* RAID-6 was not enabled */ + rval = -EINVAL; + } +exit: + __free_page(pg); + return rval; +} + +static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) +{ + switch (adev->id) { + case PPC440SPE_DMA0_ID: + case PPC440SPE_DMA1_ID: + dma_cap_set(DMA_MEMCPY, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + dma_cap_set(DMA_MEMSET, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask); + dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask); + break; + case PPC440SPE_XOR_ID: + dma_cap_set(DMA_XOR, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + adev->common.cap_mask = adev->common.cap_mask; + break; + } + + /* Set base routines */ + adev->common.device_alloc_chan_resources = + ppc440spe_adma_alloc_chan_resources; + adev->common.device_free_chan_resources = + ppc440spe_adma_free_chan_resources; + adev->common.device_is_tx_complete = ppc440spe_adma_is_complete; + adev->common.device_issue_pending = ppc440spe_adma_issue_pending; + + /* Set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) { + adev->common.device_prep_dma_memcpy = + ppc440spe_adma_prep_dma_memcpy; + } + if (dma_has_cap(DMA_MEMSET, adev->common.cap_mask)) { + adev->common.device_prep_dma_memset = + ppc440spe_adma_prep_dma_memset; + } + if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) { + adev->common.max_xor = XOR_MAX_OPS; + adev->common.device_prep_dma_xor = + ppc440spe_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + dma_set_maxpq(&adev->common, + DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_DMA1_ID: + dma_set_maxpq(&adev->common, + DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0); + break; + case PPC440SPE_XOR_ID: + adev->common.max_pq = XOR_MAX_OPS * 3; + break; + } + adev->common.device_prep_dma_pq = + ppc440spe_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_pq = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_pq = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_pq_val = + ppc440spe_adma_prep_dma_pqzero_sum; + } + if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC440SPE_DMA0_ID: + adev->common.max_xor = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC440SPE_DMA1_ID: + adev->common.max_xor = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + adev->common.device_prep_dma_xor_val = + ppc440spe_adma_prep_dma_xor_zero_sum; + } + if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) { + adev->common.device_prep_dma_interrupt = + ppc440spe_adma_prep_dma_interrupt; + } + pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: " + "( %s%s%s%s%s%s%s)\n", + dev_name(adev->dev), + dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "", + dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "", + dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "", + dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "", + dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : ""); +} + +static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan, + int *initcode) +{ + struct device_node *np; + int ret; + + np = container_of(adev->dev, struct of_device, dev)->node; + if (adev->id != PPC440SPE_XOR_ID) { + adev->err_irq = irq_of_parse_and_map(np, 1); + if (adev->err_irq == NO_IRQ) { + dev_warn(adev->dev, "no err irq resource?\n"); + *initcode = PPC_ADMA_INIT_IRQ2; + adev->err_irq = -ENXIO; + } else + atomic_inc(&ppc440spe_adma_err_irq_ref); + } else { + adev->err_irq = -ENXIO; + } + + adev->irq = irq_of_parse_and_map(np, 0); + if (adev->irq == NO_IRQ) { + dev_err(adev->dev, "no irq resource\n"); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -ENXIO; + goto err_irq_map; + } + dev_dbg(adev->dev, "irq %d, err irq %d\n", + adev->irq, adev->err_irq); + + ret = request_irq(adev->irq, ppc440spe_adma_eot_handler, + 0, dev_driver_string(adev->dev), chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->irq); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -EIO; + goto err_req1; + } + + /* only DMA engines have a separate error IRQ + * so it's Ok if err_irq < 0 in XOR engine case. + */ + if (adev->err_irq > 0) { + /* both DMA engines share common error IRQ */ + ret = request_irq(adev->err_irq, + ppc440spe_adma_err_handler, + IRQF_SHARED, + dev_driver_string(adev->dev), + chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->err_irq); + *initcode = PPC_ADMA_INIT_IRQ2; + ret = -EIO; + goto err_req2; + } + } + + if (adev->id == PPC440SPE_XOR_ID) { + /* enable XOR engine interrupts */ + iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT, + &adev->xor_reg->ier); + } else { + u32 mask, enable; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + ret = -ENODEV; + goto err_req2; + } + adev->i2o_reg = of_iomap(np, 0); + if (!adev->i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + ret = -EINVAL; + goto err_req2; + } + of_node_put(np); + /* Unmask 'CS FIFO Attention' interrupts and + * enable generating interrupts on errors + */ + enable = (adev->id == PPC440SPE_DMA0_ID) ? + ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) & enable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + return 0; + +err_req2: + free_irq(adev->irq, chan); +err_req1: + irq_dispose_mapping(adev->irq); +err_irq_map: + if (adev->err_irq > 0) { + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) + irq_dispose_mapping(adev->err_irq); + } + return ret; +} + +static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev, + struct ppc440spe_adma_chan *chan) +{ + u32 mask, disable; + + if (adev->id == PPC440SPE_XOR_ID) { + /* disable XOR engine interrupts */ + mask = ioread32be(&adev->xor_reg->ier); + mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT); + iowrite32be(mask, &adev->xor_reg->ier); + } else { + /* disable DMAx engine interrupts */ + disable = (adev->id == PPC440SPE_DMA0_ID) ? + (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) | disable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + free_irq(adev->irq, chan); + irq_dispose_mapping(adev->irq); + if (adev->err_irq > 0) { + free_irq(adev->err_irq, chan); + if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) { + irq_dispose_mapping(adev->err_irq); + iounmap(adev->i2o_reg); + } + } +} + +/** + * ppc440spe_adma_probe - probe the asynch device + */ +static int __devinit ppc440spe_adma_probe(struct of_device *ofdev, + const struct of_device_id *match) +{ + struct device_node *np = ofdev->node; + struct resource res; + struct ppc440spe_adma_device *adev; + struct ppc440spe_adma_chan *chan; + struct ppc_dma_chan_ref *ref, *_ref; + int ret = 0, initcode = PPC_ADMA_INIT_OK; + const u32 *idx; + int len; + void *regs; + u32 id, pool_size; + + if (of_device_is_compatible(np, "amcc,xor-accelerator")) { + id = PPC440SPE_XOR_ID; + /* As far as the XOR engine is concerned, it does not + * use FIFOs but uses linked list. So there is no dependency + * between pool size to allocate and the engine configuration. + */ + pool_size = PAGE_SIZE << 1; + } else { + /* it is DMA0 or DMA1 */ + idx = of_get_property(np, "cell-index", &len); + if (!idx || (len != sizeof(u32))) { + dev_err(&ofdev->dev, "Device node %s has missing " + "or invalid cell-index property\n", + np->full_name); + return -EINVAL; + } + id = *idx; + /* DMA0,1 engines use FIFO to maintain CDBs, so we + * should allocate the pool accordingly to size of this + * FIFO. Thus, the pool size depends on the FIFO depth: + * how much CDBs pointers the FIFO may contain then so + * much CDBs we should provide in the pool. + * That is + * CDB size = 32B; + * CDBs number = (DMA0_FIFO_SIZE >> 3); + * Pool size = CDBs number * CDB size = + * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2. + */ + pool_size = (id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + pool_size <<= 2; + } + + if (of_address_to_resource(np, 0, &res)) { + dev_err(&ofdev->dev, "failed to get memory resource\n"); + initcode = PPC_ADMA_INIT_MEMRES; + ret = -ENODEV; + goto out; + } + + if (!request_mem_region(res.start, resource_size(&res), + dev_driver_string(&ofdev->dev))) { + dev_err(&ofdev->dev, "failed to request memory region " + "(0x%016llx-0x%016llx)\n", + (u64)res.start, (u64)res.end); + initcode = PPC_ADMA_INIT_MEMREG; + ret = -EBUSY; + goto out; + } + + /* create a device */ + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) { + dev_err(&ofdev->dev, "failed to allocate device\n"); + initcode = PPC_ADMA_INIT_ALLOC; + ret = -ENOMEM; + goto err_adev_alloc; + } + + adev->id = id; + adev->pool_size = pool_size; + /* allocate coherent memory for hardware descriptors */ + adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev, + adev->pool_size, &adev->dma_desc_pool, + GFP_KERNEL); + if (adev->dma_desc_pool_virt == NULL) { + dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent " + "memory for hardware descriptors\n", + adev->pool_size); + initcode = PPC_ADMA_INIT_COHERENT; + ret = -ENOMEM; + goto err_dma_alloc; + } + dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n", + adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool); + + regs = ioremap(res.start, resource_size(&res)); + if (!regs) { + dev_err(&ofdev->dev, "failed to ioremap regs!\n"); + goto err_regs_alloc; + } + + if (adev->id == PPC440SPE_XOR_ID) { + adev->xor_reg = regs; + /* Reset XOR */ + iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr); + iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr); + } else { + size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ? + DMA0_FIFO_SIZE : DMA1_FIFO_SIZE; + adev->dma_reg = regs; + /* DMAx_FIFO_SIZE is defined in bytes, + * - is defined in number of CDB pointers (8byte). + * DMA FIFO Length = CSlength + CPlength, where + * CSlength = CPlength = (fsiz + 1) * 8. + */ + iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2), + &adev->dma_reg->fsiz); + /* Configure DMA engine */ + iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN, + &adev->dma_reg->cfg); + /* Clear Status */ + iowrite32(~0, &adev->dma_reg->dsts); + } + + adev->dev = &ofdev->dev; + adev->common.dev = &ofdev->dev; + INIT_LIST_HEAD(&adev->common.channels); + dev_set_drvdata(&ofdev->dev, adev); + + /* create a channel */ + chan = kzalloc(sizeof(*chan), GFP_KERNEL); + if (!chan) { + dev_err(&ofdev->dev, "can't allocate channel structure\n"); + initcode = PPC_ADMA_INIT_CHANNEL; + ret = -ENOMEM; + goto err_chan_alloc; + } + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->chain); + INIT_LIST_HEAD(&chan->all_slots); + chan->device = adev; + chan->common.device = &adev->common; + list_add_tail(&chan->common.device_node, &adev->common.channels); + tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet, + (unsigned long)chan); + + /* allocate and map helper pages for async validation or + * async_mult/async_sum_product operations on DMA0/1. + */ + if (adev->id != PPC440SPE_XOR_ID) { + chan->pdest_page = alloc_page(GFP_KERNEL); + chan->qdest_page = alloc_page(GFP_KERNEL); + if (!chan->pdest_page || + !chan->qdest_page) { + if (chan->pdest_page) + __free_page(chan->pdest_page); + if (chan->qdest_page) + __free_page(chan->qdest_page); + ret = -ENOMEM; + goto err_page_alloc; + } + chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + ref = kmalloc(sizeof(*ref), GFP_KERNEL); + if (ref) { + ref->chan = &chan->common; + INIT_LIST_HEAD(&ref->node); + list_add_tail(&ref->node, &ppc440spe_adma_chan_list); + } else { + dev_err(&ofdev->dev, "failed to allocate channel reference!\n"); + ret = -ENOMEM; + goto err_ref_alloc; + } + + ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode); + if (ret) + goto err_irq; + + ppc440spe_adma_init_capabilities(adev); + + ret = dma_async_device_register(&adev->common); + if (ret) { + initcode = PPC_ADMA_INIT_REGISTER; + dev_err(&ofdev->dev, "failed to register dma device\n"); + goto err_dev_reg; + } + + goto out; + +err_dev_reg: + ppc440spe_adma_release_irqs(adev, chan); +err_irq: + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) { + if (chan == to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } +err_ref_alloc: + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(chan->pdest_page); + __free_page(chan->qdest_page); + } +err_page_alloc: + kfree(chan); +err_chan_alloc: + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); +err_regs_alloc: + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, + adev->dma_desc_pool); +err_dma_alloc: + kfree(adev); +err_adev_alloc: + release_mem_region(res.start, resource_size(&res)); +out: + if (id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[id] = initcode; + + return ret; +} + +/** + * ppc440spe_adma_remove - remove the asynch device + */ +static int __devexit ppc440spe_adma_remove(struct of_device *ofdev) +{ + struct ppc440spe_adma_device *adev = dev_get_drvdata(&ofdev->dev); + struct device_node *np = ofdev->node; + struct resource res; + struct dma_chan *chan, *_chan; + struct ppc_dma_chan_ref *ref, *_ref; + struct ppc440spe_adma_chan *ppc440spe_chan; + + dev_set_drvdata(&ofdev->dev, NULL); + if (adev->id < PPC440SPE_ADMA_ENGINES_NUM) + ppc440spe_adma_devices[adev->id] = -1; + + dma_async_device_unregister(&adev->common); + + list_for_each_entry_safe(chan, _chan, &adev->common.channels, + device_node) { + ppc440spe_chan = to_ppc440spe_adma_chan(chan); + ppc440spe_adma_release_irqs(adev, ppc440spe_chan); + tasklet_kill(&ppc440spe_chan->irq_tasklet); + if (adev->id != PPC440SPE_XOR_ID) { + dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(ppc440spe_chan->pdest_page); + __free_page(ppc440spe_chan->qdest_page); + } + list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, + node) { + if (ppc440spe_chan == + to_ppc440spe_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } + list_del(&chan->device_node); + kfree(ppc440spe_chan); + } + + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + if (adev->id == PPC440SPE_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); + of_address_to_resource(np, 0, &res); + release_mem_region(res.start, resource_size(&res)); + kfree(adev); + return 0; +} + +/* + * /sys driver interface to enable h/w RAID-6 capabilities + * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/ + * directory are "devices", "enable" and "poly". + * "devices" shows available engines. + * "enable" is used to enable RAID-6 capabilities or to check + * whether these has been activated. + * "poly" allows setting/checking used polynomial (for PPC440SPe only). + */ + +static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + int i; + + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) { + if (ppc440spe_adma_devices[i] == -1) + continue; + size += snprintf(buf + size, PAGE_SIZE - size, + "PPC440SP(E)-ADMA.%d: %s\n", i, + ppc_adma_errors[ppc440spe_adma_devices[i]]); + } + return size; +} + +static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf) +{ + return snprintf(buf, PAGE_SIZE, + "PPC440SP(e) RAID-6 capabilities are %sABLED.\n", + ppc440spe_r6_enabled ? "EN" : "DIS"); +} + +static ssize_t store_ppc440spe_r6enable(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long val; + + if (!count || count > 11) + return -EINVAL; + + if (!ppc440spe_r6_tchan) + return -EFAULT; + + /* Write a key */ + sscanf(buf, "%lx", &val); + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val); + isync(); + + /* Verify whether it really works now */ + if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) { + pr_info("PPC440SP(e) RAID-6 has been activated " + "successfully\n"); + ppc440spe_r6_enabled = 1; + } else { + pr_info("PPC440SP(e) RAID-6 hasn't been activated!" + " Error key ?\n"); + ppc440spe_r6_enabled = 0; + } + return count; +} + +static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + u32 reg; + +#ifdef CONFIG_440SP + /* 440SP has fixed polynomial */ + reg = 0x4d; +#else + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg >>= MQ0_CFBHL_POLY; + reg &= 0xFF; +#endif + + size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver " + "uses 0x1%02x polynomial.\n", reg); + return size; +} + +static ssize_t store_ppc440spe_r6poly(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long reg, val; + +#ifdef CONFIG_440SP + /* 440SP uses default 0x14D polynomial only */ + return -EINVAL; +#endif + + if (!count || count > 6) + return -EINVAL; + + /* e.g., 0x14D or 0x11D */ + sscanf(buf, "%lx", &val); + + if (val & ~0x1FF) + return -EINVAL; + + val &= 0xFF; + reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL); + reg &= ~(0xFF << MQ0_CFBHL_POLY); + reg |= val << MQ0_CFBHL_POLY; + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg); + + return count; +} + +static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL); +static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable, + store_ppc440spe_r6enable); +static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly, + store_ppc440spe_r6poly); + +/* + * Common initialisation for RAID engines; allocate memory for + * DMAx FIFOs, perform configuration common for all DMA engines. + * Further DMA engine specific configuration is done at probe time. + */ +static int ppc440spe_configure_raid_devices(void) +{ + struct device_node *np; + struct resource i2o_res; + struct i2o_regs __iomem *i2o_reg; + dcr_host_t i2o_dcr_host; + unsigned int dcr_base, dcr_len; + int i, ret; + + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + return -ENODEV; + } + + if (of_address_to_resource(np, 0, &i2o_res)) { + of_node_put(np); + return -EINVAL; + } + + i2o_reg = of_iomap(np, 0); + if (!i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + return -EINVAL; + } + + /* Get I2O DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + + i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(i2o_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + of_node_put(np); + + /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share + * the base address of FIFO memory space. + * Actually we need twice more physical memory than programmed in the + * register (because there are two FIFOs for each DMA: CP and CS) + */ + ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1, + GFP_KERNEL); + if (!ppc440spe_dma_fifo_buf) { + pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__); + iounmap(i2o_reg); + dcr_unmap(i2o_dcr_host, dcr_len); + return -ENOMEM; + } + + /* + * Configure h/w + */ + /* Reset I2O/DMA */ + mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA); + mtdcri(SDR0, DCRN_SDR0_SRST, 0); + + /* Setup the base address of mmaped registers */ + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32)); + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) | + I2O_REG_ENABLE); + dcr_unmap(i2o_dcr_host, dcr_len); + + /* Setup FIFO memory space base address */ + iowrite32(0, &i2o_reg->ifbah); + iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal); + + /* set zero FIFO size for I2O, so the whole + * ppc440spe_dma_fifo_buf is used by DMAs. + * DMAx_FIFOs will be configured while probe. + */ + iowrite32(0, &i2o_reg->ifsiz); + iounmap(i2o_reg); + + /* To prepare WXOR/RXOR functionality we need access to + * Memory Queue Module DCRs (finally it will be enabled + * via /sys interface of the ppc440spe ADMA driver). + */ + np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe"); + if (!np) { + pr_err("%s: can't find MQ device tree node\n", + __func__); + ret = -ENODEV; + goto out_free; + } + + /* Get MQ DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + ret = -ENODEV; + goto out_mq; + } + + ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + ret = -ENODEV; + goto out_mq; + } + of_node_put(np); + ppc440spe_mq_dcr_len = dcr_len; + + /* Set HB alias */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB); + + /* Set: + * - LL transaction passing limit to 1; + * - Memory controller cycle limit to 1; + * - Galois Polynomial to 0x14d (default) + */ + dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, + (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) | + (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY)); + + atomic_set(&ppc440spe_adma_err_irq_ref, 0); + for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) + ppc440spe_adma_devices[i] = -1; + + return 0; + +out_mq: + of_node_put(np); +out_free: + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static struct of_device_id __devinitdata ppc440spe_adma_of_match[] = { + { .compatible = "ibm,dma-440spe", }, + { .compatible = "amcc,xor-accelerator", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match); + +static struct of_platform_driver ppc440spe_adma_driver = { + .match_table = ppc440spe_adma_of_match, + .probe = ppc440spe_adma_probe, + .remove = __devexit_p(ppc440spe_adma_remove), + .driver = { + .name = "PPC440SP(E)-ADMA", + .owner = THIS_MODULE, + }, +}; + +static __init int ppc440spe_adma_init(void) +{ + int ret; + + ret = ppc440spe_configure_raid_devices(); + if (ret) + return ret; + + ret = of_register_platform_driver(&ppc440spe_adma_driver); + if (ret) { + pr_err("%s: failed to register platform driver\n", + __func__); + goto out_reg; + } + + /* Initialization status */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + if (ret) + goto out_dev; + + /* RAID-6 h/w enable entry */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + if (ret) + goto out_en; + + /* GF polynomial to use */ + ret = driver_create_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + if (!ret) + return ret; + + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); +out_en: + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); +out_dev: + /* User will not be able to enable h/w RAID-6 */ + pr_err("%s: failed to create RAID-6 driver interface\n", + __func__); + of_unregister_platform_driver(&ppc440spe_adma_driver); +out_reg: + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); + return ret; +} + +static void __exit ppc440spe_adma_exit(void) +{ + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_poly); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_enable); + driver_remove_file(&ppc440spe_adma_driver.driver, + &driver_attr_devices); + of_unregister_platform_driver(&ppc440spe_adma_driver); + dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len); + kfree(ppc440spe_dma_fifo_buf); +} + +arch_initcall(ppc440spe_adma_init); +module_exit(ppc440spe_adma_exit); + +MODULE_AUTHOR("Yuri Tikhonov "); +MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h new file mode 100644 index 000000000000..8ada5a812e3b --- /dev/null +++ b/drivers/dma/ppc4xx/adma.h @@ -0,0 +1,195 @@ +/* + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#ifndef _PPC440SPE_ADMA_H +#define _PPC440SPE_ADMA_H + +#include +#include "dma.h" +#include "xor.h" + +#define to_ppc440spe_adma_chan(chan) \ + container_of(chan, struct ppc440spe_adma_chan, common) +#define to_ppc440spe_adma_device(dev) \ + container_of(dev, struct ppc440spe_adma_device, common) +#define tx_to_ppc440spe_adma_slot(tx) \ + container_of(tx, struct ppc440spe_adma_desc_slot, async_tx) + +/* Default polynomial (for 440SP is only available) */ +#define PPC440SPE_DEFAULT_POLY 0x4d + +#define PPC440SPE_ADMA_ENGINES_NUM (XOR_ENGINES_NUM + DMA_ENGINES_NUM) + +#define PPC440SPE_ADMA_WATCHDOG_MSEC 3 +#define PPC440SPE_ADMA_THRESHOLD 1 + +#define PPC440SPE_DMA0_ID 0 +#define PPC440SPE_DMA1_ID 1 +#define PPC440SPE_XOR_ID 2 + +#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT 0xFFFFFFUL +/* this is the XOR_CBBCR width */ +#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31) +#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT + +#define PPC440SPE_RXOR_RUN 0 + +#define MQ0_CF2H_RXOR_BS_MASK 0x1FF + +#undef ADMA_LL_DEBUG + +/** + * struct ppc440spe_adma_device - internal representation of an ADMA device + * @dev: device + * @dma_reg: base for DMAx register access + * @xor_reg: base for XOR register access + * @i2o_reg: base for I2O register access + * @id: HW ADMA Device selector + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @pool_size: size of the pool + * @irq: DMAx or XOR irq number + * @err_irq: DMAx error irq number + * @common: embedded struct dma_device + */ +struct ppc440spe_adma_device { + struct device *dev; + struct dma_regs __iomem *dma_reg; + struct xor_regs __iomem *xor_reg; + struct i2o_regs __iomem *i2o_reg; + int id; + void *dma_desc_pool_virt; + dma_addr_t dma_desc_pool; + size_t pool_size; + int irq; + int err_irq; + struct dma_device common; +}; + +/** + * struct ppc440spe_adma_chan - internal representation of an ADMA channel + * @lock: serializes enqueue/dequeue operations to the slot pool + * @device: parent device + * @chain: device chain view of the descriptors + * @common: common dmaengine channel object members + * @all_slots: complete domain of slots usable by the channel + * @pending: allows batching of hardware operations + * @completed_cookie: identifier for the most recently completed operation + * @slots_allocated: records the actual size of the descriptor slot pool + * @hw_chain_inited: h/w descriptor chain initialization flag + * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs + * @needs_unmap: if buffers should not be unmapped upon final processing + * @pdest_page: P destination page for async validate operation + * @qdest_page: Q destination page for async validate operation + * @pdest: P dma addr for async validate operation + * @qdest: Q dma addr for async validate operation + */ +struct ppc440spe_adma_chan { + spinlock_t lock; + struct ppc440spe_adma_device *device; + struct list_head chain; + struct dma_chan common; + struct list_head all_slots; + struct ppc440spe_adma_desc_slot *last_used; + int pending; + dma_cookie_t completed_cookie; + int slots_allocated; + int hw_chain_inited; + struct tasklet_struct irq_tasklet; + u8 needs_unmap; + struct page *pdest_page; + struct page *qdest_page; + dma_addr_t pdest; + dma_addr_t qdest; +}; + +struct ppc440spe_rxor { + u32 addrl; + u32 addrh; + int len; + int xor_count; + int addr_count; + int desc_count; + int state; +}; + +/** + * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @hw_next: pointer to the next descriptor in chain + * @async_tx: support for the async_tx api + * @slot_node: node on the iop_adma_chan.all_slots list + * @chain_node: node on the op_adma_chan.chain list + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @unmap_len: transaction bytecount + * @hw_desc: virtual address of the hardware descriptor chain + * @stride: currently chained or not + * @idx: pool index + * @slot_cnt: total slots used in an transaction (group of operations) + * @src_cnt: number of sources set in this descriptor + * @dst_cnt: number of destinations set in the descriptor + * @slots_per_op: number of slots per operation + * @descs_per_op: number of slot per P/Q operation see comment + * for ppc440spe_prep_dma_pqxor function + * @flags: desc state/type + * @reverse_flags: 1 if a corresponding rxor address uses reversed address order + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct ppc440spe_adma_desc_slot { + dma_addr_t phys; + struct ppc440spe_adma_desc_slot *group_head; + struct ppc440spe_adma_desc_slot *hw_next; + struct dma_async_tx_descriptor async_tx; + struct list_head slot_node; + struct list_head chain_node; /* node in channel ops list */ + struct list_head group_list; /* list */ + unsigned int unmap_len; + void *hw_desc; + u16 stride; + u16 idx; + u16 slot_cnt; + u8 src_cnt; + u8 dst_cnt; + u8 slots_per_op; + u8 descs_per_op; + unsigned long flags; + unsigned long reverse_flags[8]; + +#define PPC440SPE_DESC_INT 0 /* generate interrupt on complete */ +#define PPC440SPE_ZERO_P 1 /* clear P destionaion */ +#define PPC440SPE_ZERO_Q 2 /* clear Q destination */ +#define PPC440SPE_COHERENT 3 /* src/dst are coherent */ + +#define PPC440SPE_DESC_WXOR 4 /* WXORs are in chain */ +#define PPC440SPE_DESC_RXOR 5 /* RXOR is in chain */ + +#define PPC440SPE_DESC_RXOR123 8 /* CDB for RXOR123 operation */ +#define PPC440SPE_DESC_RXOR124 9 /* CDB for RXOR124 operation */ +#define PPC440SPE_DESC_RXOR125 10 /* CDB for RXOR125 operation */ +#define PPC440SPE_DESC_RXOR12 11 /* CDB for RXOR12 operation */ +#define PPC440SPE_DESC_RXOR_REV 12 /* CDB has srcs in reversed order */ + +#define PPC440SPE_DESC_PCHECK 13 +#define PPC440SPE_DESC_QCHECK 14 + +#define PPC440SPE_DESC_RXOR_MSK 0x3 + + struct ppc440spe_rxor rxor_cursor; + + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +}; + +#endif /* _PPC440SPE_ADMA_H */ diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h new file mode 100644 index 000000000000..bcde2df2f373 --- /dev/null +++ b/drivers/dma/ppc4xx/dma.h @@ -0,0 +1,223 @@ +/* + * 440SPe's DMA engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * This file is licensed under the term of the GNU General Public License + * version 2. The program licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef _PPC440SPE_DMA_H +#define _PPC440SPE_DMA_H + +#include + +/* Number of elements in the array with statical CDBs */ +#define MAX_STAT_DMA_CDBS 16 +/* Number of DMA engines available on the contoller */ +#define DMA_ENGINES_NUM 2 + +/* Maximum h/w supported number of destinations */ +#define DMA_DEST_MAX_NUM 2 + +/* FIFO's params */ +#define DMA0_FIFO_SIZE 0x1000 +#define DMA1_FIFO_SIZE 0x1000 +#define DMA_FIFO_ENABLE (1<<12) + +/* DMA Configuration Register. Data Transfer Engine PLB Priority: */ +#define DMA_CFG_DXEPR_LP (0<<26) +#define DMA_CFG_DXEPR_HP (3<<26) +#define DMA_CFG_DXEPR_HHP (2<<26) +#define DMA_CFG_DXEPR_HHHP (1<<26) + +/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */ +#define DMA_CFG_DFMPP_LP (0<<23) +#define DMA_CFG_DFMPP_HP (3<<23) +#define DMA_CFG_DFMPP_HHP (2<<23) +#define DMA_CFG_DFMPP_HHHP (1<<23) + +/* DMA Configuration Register. Force 64-byte Alignment */ +#define DMA_CFG_FALGN (1 << 19) + +/*UIC0:*/ +#define D0CPF_INT (1<<12) +#define D0CSF_INT (1<<11) +#define D1CPF_INT (1<<10) +#define D1CSF_INT (1<<9) +/*UIC1:*/ +#define DMAE_INT (1<<9) + +/* I2O IOP Interrupt Mask Register */ +#define I2O_IOPIM_P0SNE (1<<3) +#define I2O_IOPIM_P0EM (1<<5) +#define I2O_IOPIM_P1SNE (1<<6) +#define I2O_IOPIM_P1EM (1<<8) + +/* DMA CDB fields */ +#define DMA_CDB_MSK (0xF) +#define DMA_CDB_64B_ADDR (1<<2) +#define DMA_CDB_NO_INT (1<<3) +#define DMA_CDB_STATUS_MSK (0x3) +#define DMA_CDB_ADDR_MSK (0xFFFFFFF0) + +/* DMA CDB OpCodes */ +#define DMA_CDB_OPC_NO_OP (0x00) +#define DMA_CDB_OPC_MV_SG1_SG2 (0x01) +#define DMA_CDB_OPC_MULTICAST (0x05) +#define DMA_CDB_OPC_DFILL128 (0x24) +#define DMA_CDB_OPC_DCHECK128 (0x23) + +#define DMA_CUED_XOR_BASE (0x10000000) +#define DMA_CUED_XOR_HB (0x00000008) + +#ifdef CONFIG_440SP +#define DMA_CUED_MULT1_OFF 0 +#define DMA_CUED_MULT2_OFF 8 +#define DMA_CUED_MULT3_OFF 16 +#define DMA_CUED_REGION_OFF 24 +#define DMA_CUED_XOR_WIN_MSK (0xFC000000) +#else +#define DMA_CUED_MULT1_OFF 2 +#define DMA_CUED_MULT2_OFF 10 +#define DMA_CUED_MULT3_OFF 18 +#define DMA_CUED_REGION_OFF 26 +#define DMA_CUED_XOR_WIN_MSK (0xF0000000) +#endif + +#define DMA_CUED_REGION_MSK 0x3 +#define DMA_RXOR123 0x0 +#define DMA_RXOR124 0x1 +#define DMA_RXOR125 0x2 +#define DMA_RXOR12 0x3 + +/* S/G addresses */ +#define DMA_CDB_SG_SRC 1 +#define DMA_CDB_SG_DST1 2 +#define DMA_CDB_SG_DST2 3 + +/* + * DMAx engines Command Descriptor Block Type + */ +struct dma_cdb { + /* + * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf) + */ + u8 pad0[2]; /* reserved */ + u8 attr; /* attributes */ + u8 opc; /* opcode */ + u32 sg1u; /* upper SG1 address */ + u32 sg1l; /* lower SG1 address */ + u32 cnt; /* SG count, 3B used */ + u32 sg2u; /* upper SG2 address */ + u32 sg2l; /* lower SG2 address */ + u32 sg3u; /* upper SG3 address */ + u32 sg3l; /* lower SG3 address */ +}; + +/* + * DMAx hardware registers (p.515 in 440SPe UM 1.22) + */ +struct dma_regs { + u32 cpfpl; + u32 cpfph; + u32 csfpl; + u32 csfph; + u32 dsts; + u32 cfg; + u8 pad0[0x8]; + u16 cpfhp; + u16 cpftp; + u16 csfhp; + u16 csftp; + u8 pad1[0x8]; + u32 acpl; + u32 acph; + u32 s1bpl; + u32 s1bph; + u32 s2bpl; + u32 s2bph; + u32 s3bpl; + u32 s3bph; + u8 pad2[0x10]; + u32 earl; + u32 earh; + u8 pad3[0x8]; + u32 seat; + u32 sead; + u32 op; + u32 fsiz; +}; + +/* + * I2O hardware registers (p.528 in 440SPe UM 1.22) + */ +struct i2o_regs { + u32 ists; + u32 iseat; + u32 isead; + u8 pad0[0x14]; + u32 idbel; + u8 pad1[0xc]; + u32 ihis; + u32 ihim; + u8 pad2[0x8]; + u32 ihiq; + u32 ihoq; + u8 pad3[0x8]; + u32 iopis; + u32 iopim; + u32 iopiq; + u8 iopoq; + u8 pad4[3]; + u16 iiflh; + u16 iiflt; + u16 iiplh; + u16 iiplt; + u16 ioflh; + u16 ioflt; + u16 ioplh; + u16 ioplt; + u32 iidc; + u32 ictl; + u32 ifcpp; + u8 pad5[0x4]; + u16 mfac0; + u16 mfac1; + u16 mfac2; + u16 mfac3; + u16 mfac4; + u16 mfac5; + u16 mfac6; + u16 mfac7; + u16 ifcfh; + u16 ifcht; + u8 pad6[0x4]; + u32 iifmc; + u32 iodb; + u32 iodbc; + u32 ifbal; + u32 ifbah; + u32 ifsiz; + u32 ispd0; + u32 ispd1; + u32 ispd2; + u32 ispd3; + u32 ihipl; + u32 ihiph; + u32 ihopl; + u32 ihoph; + u32 iiipl; + u32 iiiph; + u32 iiopl; + u32 iioph; + u32 ifcpl; + u32 ifcph; + u8 pad7[0x8]; + u32 iopt; +}; + +#endif /* _PPC440SPE_DMA_H */ diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h new file mode 100644 index 000000000000..daed7384daac --- /dev/null +++ b/drivers/dma/ppc4xx/xor.h @@ -0,0 +1,110 @@ +/* + * 440SPe's XOR engines support header file + * + * 2006-2009 (C) DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * This file is licensed under the term of the GNU General Public License + * version 2. The program licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef _PPC440SPE_XOR_H +#define _PPC440SPE_XOR_H + +#include + +/* Number of XOR engines available on the contoller */ +#define XOR_ENGINES_NUM 1 + +/* Number of operands supported in the h/w */ +#define XOR_MAX_OPS 16 + +/* + * XOR Command Block Control Register bits + */ +#define XOR_CBCR_LNK_BIT (1<<31) /* link present */ +#define XOR_CBCR_TGT_BIT (1<<30) /* target present */ +#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */ +#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */ +#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */ +#define XOR_CDCR_OAC_MSK (0x7F) /* operand address count */ + +/* + * XORCore Status Register bits + */ +#define XOR_SR_XCP_BIT (1<<31) /* core processing */ +#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */ +#define XOR_SR_IC_BIT (1<<16) /* invalid command */ +#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */ +#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */ +#define XOR_SR_CBC_BIT (1<<1) /* CB complete */ +#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */ + +/* + * XORCore Control Set and Reset Register bits + */ +#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */ +#define XOR_CRSR_XAE_BIT (1<<30) /* enable */ +#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */ +#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */ +#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */ +#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */ + +/* + * XORCore Interrupt Enable Register + */ +#define XOR_IE_ICBIE_BIT (1<<17) /* Invalid Command Block IRQ Enable */ +#define XOR_IE_ICIE_BIT (1<<16) /* Invalid Command IRQ Enable */ +#define XOR_IE_RPTIE_BIT (1<<14) /* Read PLB Timeout Error IRQ Enable */ +#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */ +#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */ + +/* + * XOR Accelerator engine Command Block Type + */ +struct xor_cb { + /* + * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf) + */ + u32 cbc; /* control */ + u32 cbbc; /* byte count */ + u32 cbs; /* status */ + u8 pad0[4]; /* reserved */ + u32 cbtah; /* target address high */ + u32 cbtal; /* target address low */ + u32 cblah; /* link address high */ + u32 cblal; /* link address low */ + struct { + u32 h; + u32 l; + } __attribute__ ((packed)) ops[16]; +} __attribute__ ((packed)); + +/* + * XOR hardware registers Table 19-3, UM 1.22 + */ +struct xor_regs { + u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */ + u8 pad0[352]; /* reserved */ + u32 cbcr; /* CB control register */ + u32 cbbcr; /* CB byte count register */ + u32 cbsr; /* CB status register */ + u8 pad1[4]; /* reserved */ + u32 cbtahr; /* operand target address high register */ + u32 cbtalr; /* operand target address low register */ + u32 cblahr; /* CB link address high register */ + u32 cblalr; /* CB link address low register */ + u32 crsr; /* control set register */ + u32 crrr; /* control reset register */ + u32 ccbahr; /* current CB address high register */ + u32 ccbalr; /* current CB address low register */ + u32 plbr; /* PLB configuration register */ + u32 ier; /* interrupt enable register */ + u32 pecr; /* parity error count register */ + u32 sr; /* status register */ + u32 revidr; /* revision ID register */ +}; + +#endif /* _PPC440SPE_XOR_H */ -- cgit v1.2.3 From 4297a462f455e38f08976df7b16c849614a287da Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 16 Dec 2009 16:28:03 +0100 Subject: dma: at_hdmac: correct incompatible type for argument 1 of 'spin_lock_bh' Correct a typo error in locking calls. Cc: Signed-off-by: Nicolas Ferre Signed-off-by: Dan Williams --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7585c4164bd5..c558fa13b4b7 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -815,7 +815,7 @@ atc_is_tx_complete(struct dma_chan *chan, dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(atchan->lock); + spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; last_used = chan->cookie; @@ -830,7 +830,7 @@ atc_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - spin_unlock_bh(atchan->lock); + spin_unlock_bh(&atchan->lock); if (done) *done = last_complete; -- cgit v1.2.3 From 3542a113ab2f5880f1b62e5909d754250fb57d6b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 17 Dec 2009 09:41:39 -0700 Subject: sh: fix DMA driver's descriptor chaining and cookie assignment The SH DMA driver wrongly assigns negative cookies to transfer descriptors, also, its chaining of partial descriptors is broken. The latter problem is usually invisible, because maximum transfer size per chunk is 16M, but if you artificially set this limit lower, the driver fails. Since cookies are also used in chunk management, both these problems are fixed in one patch. As side effects a possible memory leak, when descriptors are prepared, but not submitted, and multiple races have also been fixed. Signed-off-by: Guennadi Liakhovetski Acked-by: Paul Mundt Acked-by: Nobuhiro Iwamatsu Signed-off-by: Dan Williams --- drivers/dma/shdma.c | 324 +++++++++++++++++++++++++++++++++------------------- drivers/dma/shdma.h | 9 +- 2 files changed, 213 insertions(+), 120 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 8d9acd751ed6..9546f5f356eb 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -23,16 +23,19 @@ #include #include #include -#include #include #include #include #include "shdma.h" /* DMA descriptor control */ -#define DESC_LAST (-1) -#define DESC_COMP (1) -#define DESC_NCOMP (0) +enum sh_dmae_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; #define NR_DESCS_PER_CHANNEL 32 /* @@ -45,6 +48,8 @@ */ #define RS_DEFAULT (RS_DUAL) +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all); + #define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) { @@ -106,11 +111,11 @@ static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; } -static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) { - sh_dmae_writel(sh_chan, hw.sar, SAR); - sh_dmae_writel(sh_chan, hw.dar, DAR); - sh_dmae_writel(sh_chan, hw.tcr >> calc_xmit_shift(sh_chan), TCR); + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> calc_xmit_shift(sh_chan), TCR); } static void dmae_start(struct sh_dmae_chan *sh_chan) @@ -184,8 +189,9 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) { - struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_async_tx_callback callback = tx->callback; dma_cookie_t cookie; spin_lock_bh(&sh_chan->desc_lock); @@ -195,45 +201,53 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) if (cookie < 0) cookie = 1; - /* If desc only in the case of 1 */ - if (desc->async_tx.cookie != -EBUSY) - desc->async_tx.cookie = cookie; - sh_chan->common.cookie = desc->async_tx.cookie; + sh_chan->common.cookie = cookie; + tx->cookie = cookie; + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &sh_chan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + chunk->cookie = cookie; + list_move_tail(&chunk->node, &sh_chan->ld_queue); + last = chunk; + } + + last->async_tx.callback = callback; + last->async_tx.callback_param = tx->callback_param; - list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n", + tx->cookie, &last->async_tx, sh_chan->id, + desc->hw.sar, desc->hw.tcr, desc->hw.dar); spin_unlock_bh(&sh_chan->desc_lock); return cookie; } +/* Called with desc_lock held */ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) { - struct sh_desc *desc, *_desc, *ret = NULL; + struct sh_desc *desc; - spin_lock_bh(&sh_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { - if (async_tx_test_ack(&desc->async_tx)) { + list_for_each_entry(desc, &sh_chan->ld_free, node) + if (desc->mark != DESC_PREPARED) { + BUG_ON(desc->mark != DESC_IDLE); list_del(&desc->node); - ret = desc; - break; + return desc; } - } - spin_unlock_bh(&sh_chan->desc_lock); - - return ret; -} - -static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) -{ - if (desc) { - spin_lock_bh(&sh_chan->desc_lock); - - list_splice_init(&desc->tx_list, &sh_chan->ld_free); - list_add(&desc->node, &sh_chan->ld_free); - spin_unlock_bh(&sh_chan->desc_lock); - } + return NULL; } static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) @@ -252,11 +266,10 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&desc->async_tx, &sh_chan->common); desc->async_tx.tx_submit = sh_dmae_tx_submit; - desc->async_tx.flags = DMA_CTRL_ACK; - INIT_LIST_HEAD(&desc->tx_list); - sh_dmae_put_desc(sh_chan, desc); + desc->mark = DESC_IDLE; spin_lock_bh(&sh_chan->desc_lock); + list_add(&desc->node, &sh_chan->ld_free); sh_chan->descs_allocated++; } spin_unlock_bh(&sh_chan->desc_lock); @@ -273,7 +286,10 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) struct sh_desc *desc, *_desc; LIST_HEAD(list); - BUG_ON(!list_empty(&sh_chan->ld_queue)); + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&sh_chan->ld_queue)) + sh_dmae_chan_ld_cleanup(sh_chan, true); + spin_lock_bh(&sh_chan->desc_lock); list_splice_init(&sh_chan->ld_free, &list); @@ -292,6 +308,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( struct sh_dmae_chan *sh_chan; struct sh_desc *first = NULL, *prev = NULL, *new; size_t copy_size; + LIST_HEAD(tx_list); + int chunks = (len + SH_DMA_TCR_MAX) / (SH_DMA_TCR_MAX + 1); if (!chan) return NULL; @@ -301,108 +319,189 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( sh_chan = to_sh_chan(chan); + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_bh(&sh_chan->desc_lock); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ do { - /* Allocate the link descriptor from DMA pool */ + /* Allocate the link descriptor from the free list */ new = sh_dmae_get_desc(sh_chan); if (!new) { dev_err(sh_chan->dev, "No free memory for link descriptor\n"); - goto err_get_desc; + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &sh_chan->ld_free); + spin_unlock_bh(&sh_chan->desc_lock); + return NULL; } - copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + copy_size = min(len, (size_t)SH_DMA_TCR_MAX + 1); new->hw.sar = dma_src; new->hw.dar = dma_dest; new->hw.tcr = copy_size; - if (!first) + if (!first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } - new->mark = DESC_NCOMP; - async_tx_ack(&new->async_tx); + dev_dbg(sh_chan->dev, + "chaining %u of %u with %p, dst %x, cookie %d\n", + copy_size, len, &new->async_tx, dma_dest, + new->async_tx.cookie); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->chunks = chunks--; prev = new; len -= copy_size; dma_src += copy_size; dma_dest += copy_size; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); + list_add_tail(&new->node, &tx_list); } while (len); - new->async_tx.flags = flags; /* client is in control of this ack */ - new->async_tx.cookie = -EBUSY; /* Last desc */ + if (new != first) + new->async_tx.cookie = -ENOSPC; - return &first->async_tx; + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &sh_chan->ld_free); -err_get_desc: - sh_dmae_put_desc(sh_chan, first); - return NULL; + spin_unlock_bh(&sh_chan->desc_lock); + return &first->async_tx; } -/* - * sh_chan_ld_cleanup - Clean up link descriptors - * - * This function clean up the ld_queue of DMA channel. - */ -static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) { struct sh_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + void *param = NULL; spin_lock_bh(&sh_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { - dma_async_tx_callback callback; - void *callback_param; - - /* non send data */ - if (desc->mark == DESC_NCOMP) + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) break; - /* send data sesc */ - callback = desc->async_tx.callback; - callback_param = desc->async_tx.callback_param; + if (tx->cookie > 0) + cookie = tx->cookie; - /* Remove from ld_queue list */ - list_splice_init(&desc->tx_list, &sh_chan->ld_free); + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + BUG_ON(sh_chan->completed_cookie != desc->cookie - 1); + sh_chan->completed_cookie = desc->cookie; + } - dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", - desc); + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + callback = tx->callback; + param = tx->callback_param; + dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, sh_chan->id); + BUG_ON(desc->chunks != 1); + break; + } - list_move(&desc->node, &sh_chan->ld_free); - /* Run the link descriptor callback function */ - if (callback) { - spin_unlock_bh(&sh_chan->desc_lock); - dev_dbg(sh_chan->dev, "link descriptor %p callback\n", - desc); - callback(callback_param); - spin_lock_bh(&sh_chan->desc_lock); + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + /* Fall through */ + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &sh_chan->ld_free); } } spin_unlock_bh(&sh_chan->desc_lock); + + if (callback) + callback(param); + + return callback; +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function cleans up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + while (__ld_cleanup(sh_chan, all)) + ; } static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) { - struct list_head *ld_node; - struct sh_dmae_regs hw; + struct sh_desc *sd; + spin_lock_bh(&sh_chan->desc_lock); /* DMA work check */ - if (dmae_is_busy(sh_chan)) + if (dmae_is_busy(sh_chan)) { + spin_unlock_bh(&sh_chan->desc_lock); return; + } /* Find the first un-transfer desciptor */ - for (ld_node = sh_chan->ld_queue.next; - (ld_node != &sh_chan->ld_queue) - && (to_sh_desc(ld_node)->mark == DESC_COMP); - ld_node = ld_node->next) - cpu_relax(); - - if (ld_node != &sh_chan->ld_queue) { - /* Get the ld start address from ld_queue */ - hw = to_sh_desc(ld_node)->hw; - dmae_set_reg(sh_chan, hw); - dmae_start(sh_chan); - } + list_for_each_entry(sd, &sh_chan->ld_queue, node) + if (sd->mark == DESC_SUBMITTED) { + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &sd->hw); + dmae_start(sh_chan); + break; + } + + spin_unlock_bh(&sh_chan->desc_lock); } static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) @@ -420,12 +519,11 @@ static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, dma_cookie_t last_used; dma_cookie_t last_complete; - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); last_used = chan->cookie; last_complete = sh_chan->completed_cookie; - if (last_complete == -EBUSY) - last_complete = last_used; + BUG_ON(last_complete < 0); if (done) *done = last_complete; @@ -480,11 +578,13 @@ static irqreturn_t sh_dmae_err(int irq, void *data) err = sh_dmae_rst(0); if (err) return err; +#ifdef SH_DMAC_BASE1 if (shdev->pdata.mode & SHDMA_DMAOR1) { err = sh_dmae_rst(1); if (err) return err; } +#endif disable_irq(irq); return IRQ_HANDLED; } @@ -494,35 +594,25 @@ static irqreturn_t sh_dmae_err(int irq, void *data) static void dmae_do_tasklet(unsigned long data) { struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; - struct sh_desc *desc, *_desc, *cur_desc = NULL; + struct sh_desc *desc; u32 sar_buf = sh_dmae_readl(sh_chan, SAR); - list_for_each_entry_safe(desc, _desc, - &sh_chan->ld_queue, node) { - if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { - cur_desc = desc; + spin_lock(&sh_chan->desc_lock); + list_for_each_entry(desc, &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf && + desc->mark == DESC_SUBMITTED) { + dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n", + desc->async_tx.cookie, &desc->async_tx, + desc->hw.dar); + desc->mark = DESC_COMPLETED; break; } } + spin_unlock(&sh_chan->desc_lock); - if (cur_desc) { - switch (cur_desc->async_tx.cookie) { - case 0: /* other desc data */ - break; - case -EBUSY: /* last desc */ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie; - break; - default: /* first desc ( 0 < )*/ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie - 1; - break; - } - cur_desc->mark = DESC_COMP; - } /* Next desc */ sh_chan_xfer_ld_queue(sh_chan); - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); } static unsigned int get_dmae_irq(unsigned int id) diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index 60b81e529b42..108f1cffb6f5 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -13,9 +13,9 @@ #ifndef __DMA_SHDMA_H #define __DMA_SHDMA_H -#include -#include #include +#include +#include #define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ @@ -26,13 +26,16 @@ struct sh_dmae_regs { }; struct sh_desc { - struct list_head tx_list; struct sh_dmae_regs hw; struct list_head node; struct dma_async_tx_descriptor async_tx; + dma_cookie_t cookie; + int chunks; int mark; }; +struct device; + struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ spinlock_t desc_lock; /* Descriptor operation lock */ -- cgit v1.2.3 From cd78809f6191485a90ea6c92c2b58900ab5c156f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Dec 2009 13:52:39 -0700 Subject: ioat3: fix p-disabled q-continuation When continuing a pq calculation the driver needs 3 extra sources. The driver can perform a 3 source calculation with a single descriptor, but needs an extended descriptor to process up to 8 sources in one operation. However, in the p-disabled case only one extra source is needed. When continuing a p-disabled operation there are occasions (i.e. 0 < src_cnt % 8 < 3) where the tail operation does not need an extended descriptor. Properly account for this fact otherwise invalid 'dmacount' values will be written to hardware usually causing the channel to halt with 'invalid descriptor' errors. Cc: Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 42f6f10fb0cc..e58ecb29513f 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -650,9 +650,11 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, num_descs = ioat2_xferlen_to_descs(ioat, len); /* we need 2x the number of descriptors to cover greater than 3 - * sources + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. */ - if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { with_ext = 1; num_descs *= 2; } else -- cgit v1.2.3 From a6d52d70677e99bdb89b6921c265d0a58c22e597 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Dec 2009 15:36:02 -0700 Subject: ioat2,3: put channel hardware in known state at init Put the ioat2 and ioat3 state machines in the halted state with all errors cleared. The ioat1 init path is not disturbed for stability, there are no reported ioat1 initiaization issues. Cc: Reported-by: Roland Dreier Tested-by: Roland Dreier Acked-by: Simon Horman Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 2 +- drivers/dma/ioat/dma.h | 18 ++++++++++++ drivers/dma/ioat/dma_v2.c | 69 +++++++++++++++++++++++++++++++++----------- drivers/dma/ioat/dma_v2.h | 2 ++ drivers/dma/ioat/dma_v3.c | 54 +++++++++++++++++++++++++--------- drivers/dma/ioat/registers.h | 1 + 6 files changed, 114 insertions(+), 32 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c524d36d3c2e..dcc4ab78b32b 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1032,7 +1032,7 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma->dev = &pdev->dev; if (!dma->chancnt) { - dev_err(dev, "zero channels detected\n"); + dev_err(dev, "channel enumeration error\n"); goto err_setup_interrupts; } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 45edde996480..bbc3e78ef333 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,7 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @reset_hw: hw version specific channel (re)initialization * @cleanup_tasklet: select between the v2 and v3 cleanup routines * @timer_fn: select between the v2 and v3 timer watchdog routines * @self_test: hardware version specific self test for each supported op type @@ -78,6 +79,7 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + int (*reset_hw)(struct ioat_chan_common *chan); void (*cleanup_tasklet)(unsigned long data); void (*timer_fn)(unsigned long data); int (*self_test)(struct ioatdma_device *device); @@ -264,6 +266,22 @@ static inline void ioat_suspend(struct ioat_chan_common *chan) writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } +static inline void ioat_reset(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u8 cmd; + + cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) { struct ioat_chan_common *chan = &ioat->base; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 8f1f7f05deaa..5f7a500e18d0 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -239,20 +239,50 @@ void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) __ioat2_start_null_desc(ioat); } -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; + unsigned long end = jiffies + tmo; + int err = 0; u32 status; status = ioat_chansts(chan); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } status = ioat_chansts(chan); cpu_relax(); } + return err; +} + +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + ioat2_quiesce(chan, 0); if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); @@ -318,6 +348,19 @@ void ioat2_timer_event(unsigned long data) spin_unlock_bh(&chan->cleanup_lock); } +static int ioat2_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated @@ -360,6 +403,10 @@ int ioat2_enumerate_channels(struct ioatdma_device *device) (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } } dma->chancnt = i; return i; @@ -467,7 +514,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u32 chanerr; int order; /* have we already been set up? */ @@ -477,12 +523,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) /* Setup register to interrupt and write completion status on error */ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ chan->completion = pci_pool_alloc(chan->device->completion_pool, @@ -746,13 +786,7 @@ void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); device->cleanup_tasklet((unsigned long) ioat); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); + device->reset_hw(chan); spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); @@ -839,6 +873,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; device->cleanup_tasklet = ioat2_cleanup_tasklet; device->timer_fn = ioat2_timer_event; device->self_test = ioat_dma_self_test; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 1d849ef74d5f..3afad8da43cc 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -185,6 +185,8 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); void ioat2_cleanup_tasklet(unsigned long data); void ioat2_timer_event(unsigned long data); +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); extern struct kobj_type ioat2_ktype; extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index e58ecb29513f..9908c9e94b2d 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1130,6 +1130,45 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3, and clear any + * pending errors + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1139,10 +1178,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u16 dev_id; u32 cap; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; @@ -1218,19 +1257,6 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = NULL; #endif - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - err = ioat_probe(device); if (err) return err; diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index f015ec196700..e8ae63baf588 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -27,6 +27,7 @@ #define IOAT_PCI_DEVICE_ID_OFFSET 0x02 #define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 /* MMIO Device Registers */ -- cgit v1.2.3 From 1e9d1b13efae7e0a2705611d47ae5f07e27015f0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 19 Dec 2009 08:30:30 +0100 Subject: drivers/dma: drop unnecesary memset memset of 0 is not needed after kzalloc The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; statement S; @@ x = kzalloc(...); if (x == NULL) S ... when != x -memset(x,0,...);// Signed-off-by: Julia Lawall Signed-off-by: Dan Williams --- drivers/dma/dw_dmac.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 2eea823516a7..53bb3cac248f 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1270,8 +1270,6 @@ static int __init dw_probe(struct platform_device *pdev) goto err_kfree; } - memset(dw, 0, sizeof *dw); - dw->regs = ioremap(io->start, DW_REGLEN); if (!dw->regs) { err = -ENOMEM; -- cgit v1.2.3 From 0794ec8ce327ec74416b569b8fb1951274693700 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 22 Dec 2009 21:30:59 +0100 Subject: drivers/dma: Correct use after free Move the kfree after the iounmap that refers to the same structure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,e; identifier f; iterator I; statement S; @@ *kfree(x); ... when != &x when != x = e when != I(x,...) S *x->f // Signed-off-by: Julia Lawall Signed-off-by: Dan Williams --- drivers/dma/coh901318.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 4a99cd94536b..b5f2ee0f8e2c 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1294,8 +1294,8 @@ static int __exit coh901318_remove(struct platform_device *pdev) dma_async_device_unregister(&base->dma_slave); coh901318_pool_destroy(&base->pool); free_irq(platform_get_irq(pdev, 0), base); - kfree(base); iounmap(base->virtbase); + kfree(base); release_mem_region(pdev->resource->start, resource_size(pdev->resource)); return 0; -- cgit v1.2.3 From adef477268ff5ddd0195611dc7e26d7a879fefe1 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Tue, 26 Jan 2010 10:26:06 +0100 Subject: dmaengine: fix memleak in dma_async_device_unregister While debugging a dma driver I noticed a memleak after unloading the driver module. Caught by kmemleak. Signed-off-by: Anatolij Gustschin Cc: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/dma') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6f51a0a7a8bb..e7a3230fb7d5 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -826,6 +826,7 @@ void dma_async_device_unregister(struct dma_device *device) chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); device_unregister(&chan->dev->device); + free_percpu(chan->local); } } EXPORT_SYMBOL(dma_async_device_unregister); -- cgit v1.2.3 From 7e55a70c5b9a57c12f49c44b0847c9343d4f54e4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 13 Jan 2010 13:33:12 -0700 Subject: ioat: fix infinite timeout checking in ioat2_quiesce Fix typo in ioat2_quiesce. check 'tmo' is zero, not 'end'. Also applies to 2.6.32.3 Cc: Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 5f7a500e18d0..5cc37afe2bc1 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -249,7 +249,7 @@ int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { - if (end && time_after(jiffies, end)) { + if (tmo && time_after(jiffies, end)) { err = -ETIMEDOUT; break; } -- cgit v1.2.3 From b953df7c70740cd7593072ebec77a8f658505630 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Fri, 5 Feb 2010 21:52:37 +0800 Subject: dmaengine: correct onstack wait_queue_head declaration Use DECLARE_WAIT_QUEUE_HEAD_ONSTACK to make lockdep happy Signed-off-by: Yong Zhang Cc: Maciej Sosnowski Cc: Andrew Morton Cc: Nicolas Ferre Signed-off-by: Dan Williams --- drivers/dma/dmatest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dma') diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 8b905161fbf4..948d563941c9 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -467,7 +467,7 @@ err_srcs: if (iterations > 0) while (!kthread_should_stop()) { - DECLARE_WAIT_QUEUE_HEAD(wait_dmatest_exit); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit); interruptible_sleep_on(&wait_dmatest_exit); } -- cgit v1.2.3 From 8f98781e0f15207b6ab33bee1fae05428be0475b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 10 Feb 2010 17:32:38 +0100 Subject: async-tx: fix buffer submission error handling in ipu_idma.c If submitting new buffer failed, a wrong descriptor gets completed and it doesn't check, if a callback is at all defined, which can lead to an Oops. Fix these bugs and make ipu_update_channel_buffer() void, because it never fails. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- drivers/dma/ipu/ipu_idmac.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 9a5bc1a7389e..e80bae1673fa 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -761,12 +761,10 @@ static void ipu_select_buffer(enum ipu_channel channel, int buffer_n) * @buffer_n: buffer number to update. * 0 or 1 are the only valid values. * @phyaddr: buffer physical address. - * @return: Returns 0 on success or negative error code on failure. This - * function will fail if the buffer is set to ready. */ /* Called under spin_lock(_irqsave)(&ichan->lock) */ -static int ipu_update_channel_buffer(struct idmac_channel *ichan, - int buffer_n, dma_addr_t phyaddr) +static void ipu_update_channel_buffer(struct idmac_channel *ichan, + int buffer_n, dma_addr_t phyaddr) { enum ipu_channel channel = ichan->dma_chan.chan_id; uint32_t reg; @@ -806,8 +804,6 @@ static int ipu_update_channel_buffer(struct idmac_channel *ichan, } spin_unlock_irqrestore(&ipu_data.lock, flags); - - return 0; } /* Called under spin_lock_irqsave(&ichan->lock) */ @@ -816,7 +812,6 @@ static int ipu_submit_buffer(struct idmac_channel *ichan, { unsigned int chan_id = ichan->dma_chan.chan_id; struct device *dev = &ichan->dma_chan.dev->device; - int ret; if (async_tx_test_ack(&desc->txd)) return -EINTR; @@ -827,14 +822,7 @@ static int ipu_submit_buffer(struct idmac_channel *ichan, * could make it conditional on status >= IPU_CHANNEL_ENABLED, but * doing it again shouldn't hurt either. */ - ret = ipu_update_channel_buffer(ichan, buf_idx, - sg_dma_address(sg)); - - if (ret < 0) { - dev_err(dev, "Updating sg %p on channel 0x%x buffer %d failed!\n", - sg, chan_id, buf_idx); - return ret; - } + ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg)); ipu_select_buffer(chan_id, buf_idx); dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n", @@ -1379,10 +1367,11 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id) if (likely(sgnew) && ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) { - callback = desc->txd.callback; - callback_param = desc->txd.callback_param; + callback = descnew->txd.callback; + callback_param = descnew->txd.callback_param; spin_unlock(&ichan->lock); - callback(callback_param); + if (callback) + callback(callback_param); spin_lock(&ichan->lock); } -- cgit v1.2.3 From 734c2992828c66cee3feb21ecd30a6ac44aecc51 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 6 Feb 2010 09:43:41 +0100 Subject: drivers/dma: Correct NULL test cohd_fin has already been verified not to be NULL, so the argument to BUG_ON cannot be true. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ expression *x; expression e; identifier l; @@ if (x == NULL || ...) { ... when forall return ...; } ... when != goto l; when != x = e when != &x *x == NULL // Signed-off-by: Julia Lawall Acked-by: Linus Walleij Signed-off-by: Dan Williams --- drivers/dma/coh901318.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/dma') diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index b5f2ee0f8e2c..64a937262a40 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -613,8 +613,6 @@ static void dma_tasklet(unsigned long data) cohd_fin->pending_irqs--; cohc->completed = cohd_fin->desc.cookie; - BUG_ON(cohc->nbr_active_done && cohd_fin == NULL); - if (cohc->nbr_active_done == 0) return; -- cgit v1.2.3