Hi,

On Mon, Jun 25, 2012 at 10:25:01PM +0800, cloudy.linux wrote:
> On 2012-6-25 21:40, Phil Sutter wrote:
> > Hi,
> >
> > On Wed, Jun 20, 2012 at 05:41:31PM +0200, Phil Sutter wrote:
> >> PS: I am currently working at the address decoding problem, will get
> >> back to in a few days when I have something to test. So stay tuned!
> >
> > I have updated the cesa-dma branch at git://nwl.cc/~n0-1/linux.git with
> > code setting the decoding windows. I hope this fixes the issues on
> > orion. I decided not to publish the changes regarding the second DMA
> > channel for now, as this seems to be support for a second crypto session
> > (handled consecutively, so no real improvement) which is not supported
> > anyway.
> >
> > Greetings, Phil
> >
> >
> > Phil Sutter
> > Software Engineer
> >
> 
> Thanks Phil. I'm cloning your git now but the speed is really slow. Last 
> time I tried to do this but had to cancel after hours of downloading (at 
> only about 20% progress). So the previous tests were actually done with 
> 3.5-rc3 (I tried the up-to-date Linus' linux-git, but met compiling 
> problem), of course with your patch and Simon's. Could you provide a 
> diff based on your last round patch (diff to the not patched kernel 
> should also be good, I think)?
> 
> In the mean time, I'm still trying with a cloning speed of 5KiB/s ...

Ugh, that's horrible. No idea what's going wrong there, and no access to
the management interface right now. In the mean time, please refer to
the attached patch. It bases on 94fa83c in linus' git but should
cleanly apply to it's current HEAD, too.

Greetings, Phil


Phil Sutter
Software Engineer

-- 


Viprinet GmbH
Mainzer Str. 43
55411 Bingen am Rhein
Germany

Phone/Zentrale:         +49-6721-49030-0
Direct line/Durchwahl:  +49-6721-49030-134
Fax:                    +49-6721-49030-209

phil.sut...@viprinet.com
http://www.viprinet.com

Registered office/Sitz der Gesellschaft: Bingen am Rhein
Commercial register/Handelsregister: Amtsgericht Mainz HRB40380
CEO/Geschäftsführer: Simon Kissel
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 25fb3fd..a011b93 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -232,6 +232,7 @@ void __init kirkwood_clk_init(void)
        orion_clkdev_add(NULL, "orion-ehci.0", usb0);
        orion_clkdev_add(NULL, "orion_nand", runit);
        orion_clkdev_add(NULL, "mvsdio", sdio);
+       orion_clkdev_add(NULL, "mv_tdma", crypto);
        orion_clkdev_add(NULL, "mv_crypto", crypto);
        orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0);
        orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1);
@@ -426,8 +427,41 @@ void __init kirkwood_uart1_init(void)
 /*****************************************************************************
  * Cryptographic Engines and Security Accelerator (CESA)
  ****************************************************************************/
+static struct resource kirkwood_tdma_res[] = {
+       {
+               .name   = "regs deco",
+               .start  = CRYPTO_PHYS_BASE + 0xA00,
+               .end    = CRYPTO_PHYS_BASE + 0xA24,
+               .flags  = IORESOURCE_MEM,
+       }, {
+               .name   = "regs control and error",
+               .start  = CRYPTO_PHYS_BASE + 0x800,
+               .end    = CRYPTO_PHYS_BASE + 0x8CF,
+               .flags  = IORESOURCE_MEM,
+       }, {
+               .name   = "crypto error",
+               .start  = IRQ_KIRKWOOD_TDMA_ERR,
+               .end    = IRQ_KIRKWOOD_TDMA_ERR,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static u64 mv_tdma_dma_mask = DMA_BIT_MASK(32);
+
+static struct platform_device kirkwood_tdma_device = {
+       .name           = "mv_tdma",
+       .id             = -1,
+       .dev            = {
+               .dma_mask               = &mv_tdma_dma_mask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+       },
+       .num_resources  = ARRAY_SIZE(kirkwood_tdma_res),
+       .resource       = kirkwood_tdma_res,
+};
+
 void __init kirkwood_crypto_init(void)
 {
+       platform_device_register(&kirkwood_tdma_device);
        orion_crypto_init(CRYPTO_PHYS_BASE, KIRKWOOD_SRAM_PHYS_BASE,
                          KIRKWOOD_SRAM_SIZE, IRQ_KIRKWOOD_CRYPTO);
 }
diff --git a/arch/arm/mach-kirkwood/include/mach/irqs.h 
b/arch/arm/mach-kirkwood/include/mach/irqs.h
index 2bf8161..a66aa3f 100644
--- a/arch/arm/mach-kirkwood/include/mach/irqs.h
+++ b/arch/arm/mach-kirkwood/include/mach/irqs.h
@@ -51,6 +51,7 @@
 #define IRQ_KIRKWOOD_GPIO_HIGH_16_23   41
 #define IRQ_KIRKWOOD_GE00_ERR  46
 #define IRQ_KIRKWOOD_GE01_ERR  47
+#define IRQ_KIRKWOOD_TDMA_ERR  49
 #define IRQ_KIRKWOOD_RTC        53
 
 /*
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 9148b22..4734231 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -181,9 +181,49 @@ void __init orion5x_xor_init(void)
 /*****************************************************************************
  * Cryptographic Engines and Security Accelerator (CESA)
  ****************************************************************************/
+static struct resource orion_idma_res[] = {
+       {
+               .name   = "regs deco",
+               .start  = ORION5X_IDMA_PHYS_BASE + 0xA00,
+               .end    = ORION5X_IDMA_PHYS_BASE + 0xA24,
+               .flags  = IORESOURCE_MEM,
+       }, {
+               .name   = "regs control and error",
+               .start  = ORION5X_IDMA_PHYS_BASE + 0x800,
+               .end    = ORION5X_IDMA_PHYS_BASE + 0x8CF,
+               .flags  = IORESOURCE_MEM,
+       }, {
+               .name   = "crypto error",
+               .start  = IRQ_ORION5X_IDMA_ERR,
+               .end    = IRQ_ORION5X_IDMA_ERR,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static u64 mv_idma_dma_mask = DMA_BIT_MASK(32);
+
+static struct mv_dma_pdata mv_idma_pdata = {
+       .sram_target_id = TARGET_SRAM,
+       .sram_attr      = 0,
+       .sram_base      = ORION5X_SRAM_PHYS_BASE,
+};
+
+static struct platform_device orion_idma_device = {
+       .name           = "mv_idma",
+       .id             = -1,
+       .dev            = {
+               .dma_mask               = &mv_idma_dma_mask,
+               .coherent_dma_mask      = DMA_BIT_MASK(32),
+               .platform_data          = &mv_idma_pdata,
+       },
+       .num_resources  = ARRAY_SIZE(orion_idma_res),
+       .resource       = orion_idma_res,
+};
+
 static void __init orion5x_crypto_init(void)
 {
        orion5x_setup_sram_win();
+       platform_device_register(&orion_idma_device);
        orion_crypto_init(ORION5X_CRYPTO_PHYS_BASE, ORION5X_SRAM_PHYS_BASE,
                          SZ_8K, IRQ_ORION5X_CESA);
 }
diff --git a/arch/arm/mach-orion5x/include/mach/orion5x.h 
b/arch/arm/mach-orion5x/include/mach/orion5x.h
index 2745f5d..a31ac88 100644
--- a/arch/arm/mach-orion5x/include/mach/orion5x.h
+++ b/arch/arm/mach-orion5x/include/mach/orion5x.h
@@ -90,6 +90,8 @@
 #define ORION5X_USB0_PHYS_BASE         (ORION5X_REGS_PHYS_BASE | 0x50000)
 #define ORION5X_USB0_VIRT_BASE         (ORION5X_REGS_VIRT_BASE | 0x50000)
 
+#define ORION5X_IDMA_PHYS_BASE         (ORION5X_REGS_PHYS_BASE | 0x60000)
+
 #define ORION5X_XOR_PHYS_BASE          (ORION5X_REGS_PHYS_BASE | 0x60900)
 #define ORION5X_XOR_VIRT_BASE          (ORION5X_REGS_VIRT_BASE | 0x60900)
 
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 61fd837..0c6c695 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -924,9 +924,15 @@ static struct resource orion_crypto_resources[] = {
        },
 };
 
+static u64 mv_crypto_dmamask = DMA_BIT_MASK(32);
+
 static struct platform_device orion_crypto = {
        .name           = "mv_crypto",
        .id             = -1,
+       .dev            = {
+               .dma_mask = &mv_crypto_dmamask,
+               .coherent_dma_mask = DMA_BIT_MASK(32),
+       },
 };
 
 void __init orion_crypto_init(unsigned long mapbase,
diff --git a/arch/arm/plat-orion/include/plat/mv_dma.h 
b/arch/arm/plat-orion/include/plat/mv_dma.h
new file mode 100644
index 0000000..e4e72bb
--- /dev/null
+++ b/arch/arm/plat-orion/include/plat/mv_dma.h
@@ -0,0 +1,15 @@
+/*
+ * arch/arm/plat-orion/include/plat/mv_dma.h
+ *
+ * Marvell IDMA/TDMA platform device data definition file.
+ */
+#ifndef __PLAT_MV_DMA_H
+#define __PLAT_MV_DMA_H
+
+struct mv_dma_pdata {
+       unsigned int sram_target_id;
+       unsigned int sram_attr;
+       unsigned int sram_base;
+};
+
+#endif /* __PLAT_MV_DMA_H */
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1092a77..3709f38 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -159,6 +159,10 @@ config CRYPTO_GHASH_S390
 
          It is available as of z196.
 
+config CRYPTO_DEV_MV_DMA
+       tristate
+       default no
+
 config CRYPTO_DEV_MV_CESA
        tristate "Marvell's Cryptographic Engine"
        depends on PLAT_ORION
@@ -166,6 +170,7 @@ config CRYPTO_DEV_MV_CESA
        select CRYPTO_AES
        select CRYPTO_BLKCIPHER2
        select CRYPTO_HASH
+       select CRYPTO_DEV_MV_DMA
        help
          This driver allows you to utilize the Cryptographic Engines and
          Security Accelerator (CESA) which can be found on the Marvell Orion
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 0139032..cb655ad 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_MV_DMA) += mv_dma.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
@@ -14,4 +15,4 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
-obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
\ No newline at end of file
+obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/dma_desclist.h b/drivers/crypto/dma_desclist.h
new file mode 100644
index 0000000..c471ad6
--- /dev/null
+++ b/drivers/crypto/dma_desclist.h
@@ -0,0 +1,79 @@
+#ifndef __DMA_DESCLIST__
+#define __DMA_DESCLIST__
+
+struct dma_desc {
+       void *virt;
+       dma_addr_t phys;
+};
+
+struct dma_desclist {
+       struct dma_pool *itempool;
+       struct dma_desc *desclist;
+       unsigned long length;
+       unsigned long usage;
+};
+
+#define DESCLIST_ITEM(dl, x)           ((dl).desclist[(x)].virt)
+#define DESCLIST_ITEM_DMA(dl, x)       ((dl).desclist[(x)].phys)
+#define DESCLIST_FULL(dl)              ((dl).length == (dl).usage)
+
+static inline int
+init_dma_desclist(struct dma_desclist *dl, struct device *dev,
+               size_t size, size_t align, size_t boundary)
+{
+#define STRX(x) #x
+#define STR(x) STRX(x)
+       dl->itempool = dma_pool_create(
+                       "DMA Desclist Pool at "__FILE__"("STR(__LINE__)")",
+                       dev, size, align, boundary);
+#undef STR
+#undef STRX
+       if (!dl->itempool)
+               return 1;
+       dl->desclist = NULL;
+       dl->length = dl->usage = 0;
+       return 0;
+}
+
+static inline int
+set_dma_desclist_size(struct dma_desclist *dl, unsigned long nelem)
+{
+       /* need to increase size first if requested */
+       if (nelem > dl->length) {
+               struct dma_desc *newmem;
+               int newsize = nelem * sizeof(struct dma_desc);
+
+               newmem = krealloc(dl->desclist, newsize, GFP_KERNEL);
+               if (!newmem)
+                       return -ENOMEM;
+               dl->desclist = newmem;
+       }
+
+       /* allocate/free dma descriptors, adjusting dl->length on the go */
+       for (; dl->length < nelem; dl->length++) {
+               DESCLIST_ITEM(*dl, dl->length) = dma_pool_alloc(dl->itempool,
+                               GFP_KERNEL, &DESCLIST_ITEM_DMA(*dl, 
dl->length));
+               if (!DESCLIST_ITEM(*dl, dl->length))
+                       return -ENOMEM;
+       }
+       for (; dl->length > nelem; dl->length--)
+               dma_pool_free(dl->itempool, DESCLIST_ITEM(*dl, dl->length - 1),
+                               DESCLIST_ITEM_DMA(*dl, dl->length - 1));
+
+       /* ignore size decreases but those to zero */
+       if (!nelem) {
+               kfree(dl->desclist);
+               dl->desclist = 0;
+       }
+       return 0;
+}
+
+static inline void
+fini_dma_desclist(struct dma_desclist *dl)
+{
+       set_dma_desclist_size(dl, 0);
+       dma_pool_destroy(dl->itempool);
+       dl->length = dl->usage = 0;
+}
+
+#endif /* __DMA_DESCLIST__ */
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 1cc6b3f..b75fdf5 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -9,6 +9,9 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kthread.h>
@@ -21,9 +24,17 @@
 #include <crypto/sha.h>
 
 #include "mv_cesa.h"
+#include "mv_dma.h"
+#include "dma_desclist.h"
 
 #define MV_CESA        "MV-CESA:"
 #define MAX_HW_HASH_SIZE       0xFFFF
+#define MV_CESA_EXPIRE         500 /* msec */
+
+#define MV_DMA_INIT_POOLSIZE 16
+#define MV_DMA_ALIGN 16
+
+static int count_sgs(struct scatterlist *, unsigned int);
 
 /*
  * STM:
@@ -43,13 +54,12 @@ enum engine_status {
 
 /**
  * struct req_progress - used for every crypt request
- * @src_sg_it:         sg iterator for src
- * @dst_sg_it:         sg iterator for dst
+ * @src_sg:            sg list for src
+ * @dst_sg:            sg list for dst
  * @sg_src_left:       bytes left in src to process (scatter list)
  * @src_start:         offset to add to src start position (scatter list)
  * @crypt_len:         length of current hw crypt/hash process
  * @hw_nbytes:         total bytes to process in hw for this request
- * @copy_back:         whether to copy data back (crypt) or not (hash)
  * @sg_dst_left:       bytes left dst to process in this scatter list
  * @dst_start:         offset to add to dst start position (scatter list)
  * @hw_processed_bytes:        number of bytes processed by hw (request).
@@ -59,10 +69,9 @@ enum engine_status {
  * track of progress within current scatterlist.
  */
 struct req_progress {
-       struct sg_mapping_iter src_sg_it;
-       struct sg_mapping_iter dst_sg_it;
+       struct scatterlist *src_sg;
+       struct scatterlist *dst_sg;
        void (*complete) (void);
-       void (*process) (int is_first);
 
        /* src mostly */
        int sg_src_left;
@@ -70,15 +79,34 @@ struct req_progress {
        int crypt_len;
        int hw_nbytes;
        /* dst mostly */
-       int copy_back;
        int sg_dst_left;
        int dst_start;
        int hw_processed_bytes;
 };
 
+struct sec_accel_sram {
+       struct sec_accel_config op;
+       union {
+               struct {
+                       u32 key[8];
+                       u32 iv[4];
+               } crypt;
+               struct {
+                       u32 ivi[5];
+                       u32 ivo[5];
+               } hash;
+       } type;
+#define sa_key type.crypt.key
+#define sa_iv  type.crypt.iv
+#define sa_ivi type.hash.ivi
+#define sa_ivo type.hash.ivo
+} __attribute__((packed));
+
 struct crypto_priv {
+       struct device *dev;
        void __iomem *reg;
        void __iomem *sram;
+       u32 sram_phys;
        int irq;
        struct clk *clk;
        struct task_struct *queue_th;
@@ -87,16 +115,25 @@ struct crypto_priv {
        spinlock_t lock;
        struct crypto_queue queue;
        enum engine_status eng_st;
+       struct timer_list completion_timer;
        struct crypto_async_request *cur_req;
        struct req_progress p;
        int max_req_size;
        int sram_size;
        int has_sha1;
        int has_hmac_sha1;
+
+       struct sec_accel_sram sa_sram;
+       dma_addr_t sa_sram_dma;
+
+       struct dma_desclist desclist;
 };
 
 static struct crypto_priv *cpg;
 
+#define ITEM(x)                ((u32 *)DESCLIST_ITEM(cpg->desclist, x))
+#define ITEM_DMA(x)    DESCLIST_ITEM_DMA(cpg->desclist, x)
+
 struct mv_ctx {
        u8 aes_enc_key[AES_KEY_LEN];
        u32 aes_dec_key[8];
@@ -131,13 +168,75 @@ struct mv_req_hash_ctx {
        u64 count;
        u32 state[SHA1_DIGEST_SIZE / 4];
        u8 buffer[SHA1_BLOCK_SIZE];
+       dma_addr_t buffer_dma;
        int first_hash;         /* marks that we don't have previous state */
        int last_chunk;         /* marks that this is the 'final' request */
        int extra_bytes;        /* unprocessed bytes in buffer */
+       int digestsize;         /* size of the digest */
        enum hash_op op;
        int count_add;
+       dma_addr_t result_dma;
 };
 
+static void mv_completion_timer_callback(unsigned long unused)
+{
+       int active = readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_EN_SEC_ACCL0;
+       int count = 10;
+
+       printk(KERN_ERR MV_CESA
+              "completion timer expired (CESA %sactive), cleaning up.\n",
+              active ? "" : "in");
+
+       del_timer(&cpg->completion_timer);
+       writel(SEC_CMD_DISABLE_SEC, cpg->reg + SEC_ACCEL_CMD);
+       while((readl(cpg->reg + SEC_ACCEL_CMD) & SEC_CMD_DISABLE_SEC) && 
count--) {
+               mdelay(100);
+       }
+       if (count < 0) {
+               printk(KERN_ERR MV_CESA
+                      "%s: engine reset timed out!\n", __func__);
+       }
+       cpg->eng_st = ENGINE_W_DEQUEUE;
+       wake_up_process(cpg->queue_th);
+}
+
+static void mv_setup_timer(void)
+{
+       setup_timer(&cpg->completion_timer, &mv_completion_timer_callback, 0);
+       mod_timer(&cpg->completion_timer,
+                       jiffies + msecs_to_jiffies(MV_CESA_EXPIRE));
+}
+
+static inline void mv_dma_u32_copy(dma_addr_t dst, u32 val)
+{
+       if (unlikely(DESCLIST_FULL(cpg->desclist)) &&
+           set_dma_desclist_size(&cpg->desclist, cpg->desclist.length << 1)) {
+               printk(KERN_ERR MV_CESA "resizing poolsize to %lu failed\n",
+                               cpg->desclist.length << 1);
+               return;
+       }
+       *ITEM(cpg->desclist.usage) = val;
+       mv_dma_memcpy(dst, ITEM_DMA(cpg->desclist.usage), sizeof(u32));
+       cpg->desclist.usage++;
+}
+
+static inline bool
+mv_dma_map_sg(struct scatterlist *sg, int nbytes, enum dma_data_direction dir)
+{
+       int nents = count_sgs(sg, nbytes);
+
+       if (nbytes && dma_map_sg(cpg->dev, sg, nents, dir) != nents)
+               return false;
+       return true;
+}
+
+static inline void
+mv_dma_unmap_sg(struct scatterlist *sg, int nbytes, enum dma_data_direction 
dir)
+{
+       if (nbytes)
+               dma_unmap_sg(cpg->dev, sg, count_sgs(sg, nbytes), dir);
+}
+
 static void compute_aes_dec_key(struct mv_ctx *ctx)
 {
        struct crypto_aes_ctx gen_aes_key;
@@ -187,19 +286,19 @@ static int mv_setkey_aes(struct crypto_ablkcipher 
*cipher, const u8 *key,
 
 static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 {
-       int ret;
        void *sbuf;
        int copy_len;
 
        while (len) {
                if (!p->sg_src_left) {
-                       ret = sg_miter_next(&p->src_sg_it);
-                       BUG_ON(!ret);
-                       p->sg_src_left = p->src_sg_it.length;
+                       /* next sg please */
+                       p->src_sg = sg_next(p->src_sg);
+                       BUG_ON(!p->src_sg);
+                       p->sg_src_left = p->src_sg->length;
                        p->src_start = 0;
                }
 
-               sbuf = p->src_sg_it.addr + p->src_start;
+               sbuf = sg_virt(p->src_sg) + p->src_start;
 
                copy_len = min(p->sg_src_left, len);
                memcpy(dbuf, sbuf, copy_len);
@@ -212,73 +311,123 @@ static void copy_src_to_buf(struct req_progress *p, char 
*dbuf, int len)
        }
 }
 
+static void dma_copy_src_to_buf(struct req_progress *p, dma_addr_t dbuf, int 
len)
+{
+       dma_addr_t sbuf;
+       int copy_len;
+
+       while (len) {
+               if (!p->sg_src_left) {
+                       /* next sg please */
+                       p->src_sg = sg_next(p->src_sg);
+                       BUG_ON(!p->src_sg);
+                       p->sg_src_left = sg_dma_len(p->src_sg);
+                       p->src_start = 0;
+               }
+
+               sbuf = sg_dma_address(p->src_sg) + p->src_start;
+
+               copy_len = min(p->sg_src_left, len);
+               mv_dma_memcpy(dbuf, sbuf, copy_len);
+
+               p->src_start += copy_len;
+               p->sg_src_left -= copy_len;
+
+               len -= copy_len;
+               dbuf += copy_len;
+       }
+}
+
+static void dma_copy_buf_to_dst(struct req_progress *p, dma_addr_t sbuf, int 
len)
+{
+       dma_addr_t dbuf;
+       int copy_len;
+
+       while (len) {
+               if (!p->sg_dst_left) {
+                       /* next sg please */
+                       p->dst_sg = sg_next(p->dst_sg);
+                       BUG_ON(!p->dst_sg);
+                       p->sg_dst_left = sg_dma_len(p->dst_sg);
+                       p->dst_start = 0;
+               }
+
+               dbuf = sg_dma_address(p->dst_sg) + p->dst_start;
+
+               copy_len = min(p->sg_dst_left, len);
+               mv_dma_memcpy(dbuf, sbuf, copy_len);
+
+               p->dst_start += copy_len;
+               p->sg_dst_left -= copy_len;
+
+               len -= copy_len;
+               sbuf += copy_len;
+       }
+}
+
 static void setup_data_in(void)
 {
        struct req_progress *p = &cpg->p;
        int data_in_sram =
            min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
-       copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len,
+       dma_copy_src_to_buf(p, cpg->sram_phys + SRAM_DATA_IN_START + 
p->crypt_len,
                        data_in_sram - p->crypt_len);
        p->crypt_len = data_in_sram;
 }
 
-static void mv_process_current_q(int first_block)
+static void mv_init_crypt_config(struct ablkcipher_request *req)
 {
-       struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
        struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
        struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
-       struct sec_accel_config op;
+       struct sec_accel_config *op = &cpg->sa_sram.op;
 
        switch (req_ctx->op) {
        case COP_AES_ECB:
-               op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB;
+               op->config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | 
CFG_ENC_MODE_ECB;
                break;
        case COP_AES_CBC:
        default:
-               op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC;
-               op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
+               op->config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | 
CFG_ENC_MODE_CBC;
+               op->enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
                        ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF);
-               if (first_block)
-                       memcpy(cpg->sram + SRAM_DATA_IV, req->info, 16);
+               memcpy(cpg->sa_sram.sa_iv, req->info, 16);
                break;
        }
        if (req_ctx->decrypt) {
-               op.config |= CFG_DIR_DEC;
-               memcpy(cpg->sram + SRAM_DATA_KEY_P, ctx->aes_dec_key,
-                               AES_KEY_LEN);
+               op->config |= CFG_DIR_DEC;
+               memcpy(cpg->sa_sram.sa_key, ctx->aes_dec_key, AES_KEY_LEN);
        } else {
-               op.config |= CFG_DIR_ENC;
-               memcpy(cpg->sram + SRAM_DATA_KEY_P, ctx->aes_enc_key,
-                               AES_KEY_LEN);
+               op->config |= CFG_DIR_ENC;
+               memcpy(cpg->sa_sram.sa_key, ctx->aes_enc_key, AES_KEY_LEN);
        }
 
        switch (ctx->key_len) {
        case AES_KEYSIZE_128:
-               op.config |= CFG_AES_LEN_128;
+               op->config |= CFG_AES_LEN_128;
                break;
        case AES_KEYSIZE_192:
-               op.config |= CFG_AES_LEN_192;
+               op->config |= CFG_AES_LEN_192;
                break;
        case AES_KEYSIZE_256:
-               op.config |= CFG_AES_LEN_256;
+               op->config |= CFG_AES_LEN_256;
                break;
        }
-       op.enc_p = ENC_P_SRC(SRAM_DATA_IN_START) |
+       op->enc_p = ENC_P_SRC(SRAM_DATA_IN_START) |
                ENC_P_DST(SRAM_DATA_OUT_START);
-       op.enc_key_p = SRAM_DATA_KEY_P;
+       op->enc_key_p = SRAM_DATA_KEY_P;
+       op->enc_len = cpg->p.crypt_len;
 
-       setup_data_in();
-       op.enc_len = cpg->p.crypt_len;
-       memcpy(cpg->sram + SRAM_CONFIG, &op,
-                       sizeof(struct sec_accel_config));
-
-       /* GO */
-       writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+       dma_sync_single_for_device(cpg->dev, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
+       mv_dma_memcpy(cpg->sram_phys + SRAM_CONFIG, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram));
+}
 
-       /*
-        * XXX: add timer if the interrupt does not occur for some mystery
-        * reason
-        */
+static void mv_update_crypt_config(void)
+{
+       /* update the enc_len field only */
+       mv_dma_u32_copy(cpg->sram_phys + SRAM_CONFIG + 2 * sizeof(u32),
+                       (u32)cpg->p.crypt_len);
 }
 
 static void mv_crypto_algo_completion(void)
@@ -286,8 +435,12 @@ static void mv_crypto_algo_completion(void)
        struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
        struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 
-       sg_miter_stop(&cpg->p.src_sg_it);
-       sg_miter_stop(&cpg->p.dst_sg_it);
+       if (req->src == req->dst) {
+               mv_dma_unmap_sg(req->src, req->nbytes, DMA_BIDIRECTIONAL);
+       } else {
+               mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
+               mv_dma_unmap_sg(req->dst, req->nbytes, DMA_FROM_DEVICE);
+       }
 
        if (req_ctx->op != COP_AES_CBC)
                return ;
@@ -295,37 +448,33 @@ static void mv_crypto_algo_completion(void)
        memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16);
 }
 
-static void mv_process_hash_current(int first_block)
+static void mv_init_hash_config(struct ahash_request *req)
 {
-       struct ahash_request *req = ahash_request_cast(cpg->cur_req);
        const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
        struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
        struct req_progress *p = &cpg->p;
-       struct sec_accel_config op = { 0 };
+       struct sec_accel_config *op = &cpg->sa_sram.op;
        int is_last;
 
        switch (req_ctx->op) {
        case COP_SHA1:
        default:
-               op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+               op->config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
                break;
        case COP_HMAC_SHA1:
-               op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
-               memcpy(cpg->sram + SRAM_HMAC_IV_IN,
+               op->config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+               memcpy(cpg->sa_sram.sa_ivi,
                                tfm_ctx->ivs, sizeof(tfm_ctx->ivs));
                break;
        }
 
-       op.mac_src_p =
-               MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32)
-               req_ctx->
-               count);
-
-       setup_data_in();
+       op->mac_src_p =
+               MAC_SRC_DATA_P(SRAM_DATA_IN_START) |
+               MAC_SRC_TOTAL_LEN((u32)req_ctx->count);
 
-       op.mac_digest =
+       op->mac_digest =
                MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
-       op.mac_iv =
+       op->mac_iv =
                MAC_INNER_IV_P(SRAM_HMAC_IV_IN) |
                MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT);
 
@@ -334,35 +483,59 @@ static void mv_process_hash_current(int first_block)
                && (req_ctx->count <= MAX_HW_HASH_SIZE);
        if (req_ctx->first_hash) {
                if (is_last)
-                       op.config |= CFG_NOT_FRAG;
+                       op->config |= CFG_NOT_FRAG;
                else
-                       op.config |= CFG_FIRST_FRAG;
+                       op->config |= CFG_FIRST_FRAG;
 
                req_ctx->first_hash = 0;
        } else {
                if (is_last)
-                       op.config |= CFG_LAST_FRAG;
+                       op->config |= CFG_LAST_FRAG;
                else
-                       op.config |= CFG_MID_FRAG;
-
-               if (first_block) {
-                       writel(req_ctx->state[0], cpg->reg + 
DIGEST_INITIAL_VAL_A);
-                       writel(req_ctx->state[1], cpg->reg + 
DIGEST_INITIAL_VAL_B);
-                       writel(req_ctx->state[2], cpg->reg + 
DIGEST_INITIAL_VAL_C);
-                       writel(req_ctx->state[3], cpg->reg + 
DIGEST_INITIAL_VAL_D);
-                       writel(req_ctx->state[4], cpg->reg + 
DIGEST_INITIAL_VAL_E);
-               }
+                       op->config |= CFG_MID_FRAG;
+
+               writel(req_ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A);
+               writel(req_ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B);
+               writel(req_ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C);
+               writel(req_ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D);
+               writel(req_ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E);
        }
 
-       memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
+       dma_sync_single_for_device(cpg->dev, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
+       mv_dma_memcpy(cpg->sram_phys + SRAM_CONFIG, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram));
+}
 
-       /* GO */
-       writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+static void mv_update_hash_config(struct ahash_request *req)
+{
+       struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+       struct req_progress *p = &cpg->p;
+       int is_last;
+       u32 val;
+
+       /* update only the config (for changed fragment state) and
+        * mac_digest (for changed frag len) fields */
 
-       /*
-       * XXX: add timer if the interrupt does not occur for some mystery
-       * reason
-       */
+       switch (req_ctx->op) {
+       case COP_SHA1:
+       default:
+               val = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+               break;
+       case COP_HMAC_SHA1:
+               val = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+               break;
+       }
+
+       is_last = req_ctx->last_chunk
+               && (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes)
+               && (req_ctx->count <= MAX_HW_HASH_SIZE);
+
+       val |= is_last ? CFG_LAST_FRAG : CFG_MID_FRAG;
+       mv_dma_u32_copy(cpg->sram_phys + SRAM_CONFIG, val);
+
+       val = MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
+       mv_dma_u32_copy(cpg->sram_phys + SRAM_CONFIG + 6 * sizeof(u32), val);
 }
 
 static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
@@ -406,6 +579,15 @@ out:
        return rc;
 }
 
+static void mv_save_digest_state(struct mv_req_hash_ctx *ctx)
+{
+       ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+       ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+       ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+       ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+       ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+}
+
 static void mv_hash_algo_completion(void)
 {
        struct ahash_request *req = ahash_request_cast(cpg->cur_req);
@@ -413,72 +595,39 @@ static void mv_hash_algo_completion(void)
 
        if (ctx->extra_bytes)
                copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes);
-       sg_miter_stop(&cpg->p.src_sg_it);
 
        if (likely(ctx->last_chunk)) {
-               if (likely(ctx->count <= MAX_HW_HASH_SIZE)) {
-                       memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
-                              crypto_ahash_digestsize(crypto_ahash_reqtfm
-                                                      (req)));
-               } else
+               dma_unmap_single(cpg->dev, ctx->result_dma,
+                               ctx->digestsize, DMA_FROM_DEVICE);
+
+               dma_unmap_single(cpg->dev, ctx->buffer_dma,
+                               SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+
+               if (unlikely(ctx->count > MAX_HW_HASH_SIZE)) {
+                       mv_save_digest_state(ctx);
                        mv_hash_final_fallback(req);
+               }
        } else {
-               ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
-               ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
-               ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
-               ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
-               ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+               mv_save_digest_state(ctx);
        }
+
+       mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
 }
 
 static void dequeue_complete_req(void)
 {
        struct crypto_async_request *req = cpg->cur_req;
-       void *buf;
-       int ret;
-       cpg->p.hw_processed_bytes += cpg->p.crypt_len;
-       if (cpg->p.copy_back) {
-               int need_copy_len = cpg->p.crypt_len;
-               int sram_offset = 0;
-               do {
-                       int dst_copy;
-
-                       if (!cpg->p.sg_dst_left) {
-                               ret = sg_miter_next(&cpg->p.dst_sg_it);
-                               BUG_ON(!ret);
-                               cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
-                               cpg->p.dst_start = 0;
-                       }
 
-                       buf = cpg->p.dst_sg_it.addr;
-                       buf += cpg->p.dst_start;
-
-                       dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
-
-                       memcpy(buf,
-                              cpg->sram + SRAM_DATA_OUT_START + sram_offset,
-                              dst_copy);
-                       sram_offset += dst_copy;
-                       cpg->p.sg_dst_left -= dst_copy;
-                       need_copy_len -= dst_copy;
-                       cpg->p.dst_start += dst_copy;
-               } while (need_copy_len > 0);
-       }
-
-       cpg->p.crypt_len = 0;
+       mv_dma_clear();
+       cpg->desclist.usage = 0;
 
        BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
-       if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
-               /* process next scatter list entry */
-               cpg->eng_st = ENGINE_BUSY;
-               cpg->p.process(0);
-       } else {
-               cpg->p.complete();
-               cpg->eng_st = ENGINE_IDLE;
-               local_bh_disable();
-               req->complete(req, 0);
-               local_bh_enable();
-       }
+
+       cpg->p.complete();
+       cpg->eng_st = ENGINE_IDLE;
+       local_bh_disable();
+       req->complete(req, 0);
+       local_bh_enable();
 }
 
 static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
@@ -501,33 +650,68 @@ static int count_sgs(struct scatterlist *sl, unsigned int 
total_bytes)
 static void mv_start_new_crypt_req(struct ablkcipher_request *req)
 {
        struct req_progress *p = &cpg->p;
-       int num_sgs;
 
        cpg->cur_req = &req->base;
        memset(p, 0, sizeof(struct req_progress));
        p->hw_nbytes = req->nbytes;
        p->complete = mv_crypto_algo_completion;
-       p->process = mv_process_current_q;
-       p->copy_back = 1;
 
-       num_sgs = count_sgs(req->src, req->nbytes);
-       sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+       /* assume inplace request */
+       if (req->src == req->dst) {
+               if (!mv_dma_map_sg(req->src, req->nbytes, DMA_BIDIRECTIONAL))
+                       return;
+       } else {
+               if (!mv_dma_map_sg(req->src, req->nbytes, DMA_TO_DEVICE))
+                       return;
 
-       num_sgs = count_sgs(req->dst, req->nbytes);
-       sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+               if (!mv_dma_map_sg(req->dst, req->nbytes, DMA_FROM_DEVICE)) {
+                       mv_dma_unmap_sg(req->src, req->nbytes, DMA_TO_DEVICE);
+                       return;
+               }
+       }
+
+       p->src_sg = req->src;
+       p->dst_sg = req->dst;
+       if (req->nbytes) {
+               BUG_ON(!req->src);
+               BUG_ON(!req->dst);
+               p->sg_src_left = sg_dma_len(req->src);
+               p->sg_dst_left = sg_dma_len(req->dst);
+       }
 
-       mv_process_current_q(1);
+       setup_data_in();
+       mv_init_crypt_config(req);
+       mv_dma_separator();
+       dma_copy_buf_to_dst(&cpg->p, cpg->sram_phys + SRAM_DATA_OUT_START, 
cpg->p.crypt_len);
+       cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+       while (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
+               cpg->p.crypt_len = 0;
+
+               setup_data_in();
+               mv_update_crypt_config();
+               mv_dma_separator();
+               dma_copy_buf_to_dst(&cpg->p, cpg->sram_phys + 
SRAM_DATA_OUT_START, cpg->p.crypt_len);
+               cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+       }
+
+
+       /* GO */
+       mv_setup_timer();
+       mv_dma_trigger();
+       writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
 }
 
 static void mv_start_new_hash_req(struct ahash_request *req)
 {
        struct req_progress *p = &cpg->p;
        struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
-       int num_sgs, hw_bytes, old_extra_bytes, rc;
+       int hw_bytes, old_extra_bytes, rc;
+
        cpg->cur_req = &req->base;
        memset(p, 0, sizeof(struct req_progress));
        hw_bytes = req->nbytes + ctx->extra_bytes;
        old_extra_bytes = ctx->extra_bytes;
+       ctx->digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
 
        ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE;
        if (ctx->extra_bytes != 0
@@ -536,25 +720,13 @@ static void mv_start_new_hash_req(struct ahash_request 
*req)
        else
                ctx->extra_bytes = 0;
 
-       num_sgs = count_sgs(req->src, req->nbytes);
-       sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
-
-       if (hw_bytes) {
-               p->hw_nbytes = hw_bytes;
-               p->complete = mv_hash_algo_completion;
-               p->process = mv_process_hash_current;
-
-               if (unlikely(old_extra_bytes)) {
-                       memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer,
-                              old_extra_bytes);
-                       p->crypt_len = old_extra_bytes;
+       if (unlikely(!hw_bytes)) { /* too little data for CESA */
+               if (req->nbytes) {
+                       p->src_sg = req->src;
+                       p->sg_src_left = req->src->length;
+                       copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
+                                       req->nbytes);
                }
-
-               mv_process_hash_current(1);
-       } else {
-               copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
-                               ctx->extra_bytes - old_extra_bytes);
-               sg_miter_stop(&p->src_sg_it);
                if (ctx->last_chunk)
                        rc = mv_hash_final_fallback(req);
                else
@@ -563,7 +735,60 @@ static void mv_start_new_hash_req(struct ahash_request 
*req)
                local_bh_disable();
                req->base.complete(&req->base, rc);
                local_bh_enable();
+               return;
+       }
+
+       if (likely(req->nbytes)) {
+               BUG_ON(!req->src);
+
+               if (!mv_dma_map_sg(req->src, req->nbytes, DMA_TO_DEVICE)) {
+                       printk(KERN_ERR "%s: out of memory\n", __func__);
+                       return;
+               }
+               p->sg_src_left = sg_dma_len(req->src);
+               p->src_sg = req->src;
        }
+
+       p->hw_nbytes = hw_bytes;
+       p->complete = mv_hash_algo_completion;
+
+       if (unlikely(old_extra_bytes)) {
+               dma_sync_single_for_device(cpg->dev, ctx->buffer_dma,
+                               SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+               mv_dma_memcpy(cpg->sram_phys + SRAM_DATA_IN_START,
+                               ctx->buffer_dma, old_extra_bytes);
+               p->crypt_len = old_extra_bytes;
+       }
+
+       setup_data_in();
+       mv_init_hash_config(req);
+       mv_dma_separator();
+       cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+       while (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
+               cpg->p.crypt_len = 0;
+
+               setup_data_in();
+               mv_update_hash_config(req);
+               mv_dma_separator();
+               cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+       }
+       if (req->result) {
+               ctx->result_dma = dma_map_single(cpg->dev, req->result,
+                               ctx->digestsize, DMA_FROM_DEVICE);
+               mv_dma_memcpy(ctx->result_dma,
+                               cpg->sram_phys + SRAM_DIGEST_BUF,
+                               ctx->digestsize);
+       } else {
+               /* XXX: this fixes some ugly register fuckup bug in the tdma 
engine
+                *      (no need to sync since the data is ignored anyway) */
+               mv_dma_memcpy(cpg->sa_sram_dma,
+                               cpg->sram_phys + SRAM_CONFIG, 1);
+       }
+
+       /* GO */
+       mv_setup_timer();
+       mv_dma_trigger();
+       writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
 }
 
 static int queue_manag(void *data)
@@ -686,6 +911,8 @@ static void mv_init_hash_req_ctx(struct mv_req_hash_ctx 
*ctx, int op,
        ctx->first_hash = 1;
        ctx->last_chunk = is_last;
        ctx->count_add = count_add;
+       ctx->buffer_dma = dma_map_single(cpg->dev, ctx->buffer,
+                       SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
 }
 
 static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last,
@@ -885,10 +1112,14 @@ irqreturn_t crypto_int(int irq, void *priv)
        u32 val;
 
        val = readl(cpg->reg + SEC_ACCEL_INT_STATUS);
-       if (!(val & SEC_INT_ACCEL0_DONE))
+       if (!(val & SEC_INT_ACC0_IDMA_DONE))
                return IRQ_NONE;
 
-       val &= ~SEC_INT_ACCEL0_DONE;
+       if (!del_timer(&cpg->completion_timer)) {
+               printk(KERN_WARNING MV_CESA
+                      "got an interrupt but no pending timer?\n");
+       }
+       val &= ~SEC_INT_ACC0_IDMA_DONE;
        writel(val, cpg->reg + FPGA_INT_STATUS);
        writel(val, cpg->reg + SEC_ACCEL_INT_STATUS);
        BUG_ON(cpg->eng_st != ENGINE_BUSY);
@@ -1028,6 +1259,7 @@ static int mv_probe(struct platform_device *pdev)
        }
        cp->sram_size = resource_size(res);
        cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
+       cp->sram_phys = res->start;
        cp->sram = ioremap(res->start, cp->sram_size);
        if (!cp->sram) {
                ret = -ENOMEM;
@@ -1043,6 +1275,7 @@ static int mv_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, cp);
        cpg = cp;
+       cpg->dev = &pdev->dev;
 
        cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto");
        if (IS_ERR(cp->queue_th)) {
@@ -1061,15 +1294,30 @@ static int mv_probe(struct platform_device *pdev)
        if (!IS_ERR(cp->clk))
                clk_prepare_enable(cp->clk);
 
-       writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
-       writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG);
+       writel(0, cpg->reg + SEC_ACCEL_INT_STATUS);
+       writel(SEC_INT_ACC0_IDMA_DONE, cpg->reg + SEC_ACCEL_INT_MASK);
+       writel((SEC_CFG_STOP_DIG_ERR | SEC_CFG_CH0_W_IDMA | SEC_CFG_MP_CHAIN |
+               SEC_CFG_ACT_CH0_IDMA), cpg->reg + SEC_ACCEL_CFG);
        writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
 
+       cp->sa_sram_dma = dma_map_single(&pdev->dev, &cp->sa_sram,
+                       sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
+
+       if (init_dma_desclist(&cpg->desclist, &pdev->dev,
+                               sizeof(u32), MV_DMA_ALIGN, 0)) {
+               ret = -ENOMEM;
+               goto err_mapping;
+       }
+       if (set_dma_desclist_size(&cpg->desclist, MV_DMA_INIT_POOLSIZE)) {
+               printk(KERN_ERR MV_CESA "failed to initialise poolsize\n");
+               goto err_pool;
+       }
+
        ret = crypto_register_alg(&mv_aes_alg_ecb);
        if (ret) {
                printk(KERN_WARNING MV_CESA
                       "Could not register aes-ecb driver\n");
-               goto err_irq;
+               goto err_pool;
        }
 
        ret = crypto_register_alg(&mv_aes_alg_cbc);
@@ -1096,7 +1344,11 @@ static int mv_probe(struct platform_device *pdev)
        return 0;
 err_unreg_ecb:
        crypto_unregister_alg(&mv_aes_alg_ecb);
-err_irq:
+err_pool:
+       fini_dma_desclist(&cpg->desclist);
+err_mapping:
+       dma_unmap_single(&pdev->dev, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
        free_irq(irq, cp);
 err_thread:
        kthread_stop(cp->queue_th);
@@ -1123,6 +1375,9 @@ static int mv_remove(struct platform_device *pdev)
                crypto_unregister_ahash(&mv_hmac_sha1_alg);
        kthread_stop(cp->queue_th);
        free_irq(cp->irq, cp);
+       dma_unmap_single(&pdev->dev, cpg->sa_sram_dma,
+                       sizeof(struct sec_accel_sram), DMA_TO_DEVICE);
+       fini_dma_desclist(&cpg->desclist);
        memset(cp->sram, 0, cp->sram_size);
        iounmap(cp->sram);
        iounmap(cp->reg);
diff --git a/drivers/crypto/mv_cesa.h b/drivers/crypto/mv_cesa.h
index 08fcb11..866c437 100644
--- a/drivers/crypto/mv_cesa.h
+++ b/drivers/crypto/mv_cesa.h
@@ -24,6 +24,7 @@
 #define SEC_CFG_CH1_W_IDMA     (1 << 8)
 #define SEC_CFG_ACT_CH0_IDMA   (1 << 9)
 #define SEC_CFG_ACT_CH1_IDMA   (1 << 10)
+#define SEC_CFG_MP_CHAIN       (1 << 11)
 
 #define SEC_ACCEL_STATUS       0xde0c
 #define SEC_ST_ACT_0           (1 << 0)
diff --git a/drivers/crypto/mv_dma.c b/drivers/crypto/mv_dma.c
new file mode 100644
index 0000000..dd1ce02
--- /dev/null
+++ b/drivers/crypto/mv_dma.c
@@ -0,0 +1,520 @@
+/*
+ * Support for Marvell's IDMA/TDMA engines found on Orion/Kirkwood chips,
+ * used exclusively by the CESA crypto accelerator.
+ *
+ * Based on unpublished code for IDMA written by Sebastian Siewior.
+ *
+ * Copyright (C) 2012 Phil Sutter <phil.sut...@viprinet.com>
+ * License: GPLv2
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/mbus.h>
+#include <plat/mv_dma.h>
+
+#include "mv_dma.h"
+#include "dma_desclist.h"
+
+#define MV_DMA "MV-DMA: "
+
+#define MV_DMA_INIT_POOLSIZE 16
+#define MV_DMA_ALIGN 16
+
+struct mv_dma_desc {
+       u32 count;
+       u32 src;
+       u32 dst;
+       u32 next;
+} __attribute__((packed));
+
+struct mv_dma_priv {
+       bool idma_registered, tdma_registered;
+       struct device *dev;
+       void __iomem *reg;
+       int irq;
+       struct clk *clk;
+       /* protecting the dma descriptors and stuff */
+       spinlock_t lock;
+       struct dma_desclist desclist;
+       u32 (*print_and_clear_irq)(void);
+} tpg;
+
+#define ITEM(x)                ((struct mv_dma_desc 
*)DESCLIST_ITEM(tpg.desclist, x))
+#define ITEM_DMA(x)    DESCLIST_ITEM_DMA(tpg.desclist, x)
+
+typedef u32 (*print_and_clear_irq)(void);
+typedef void (*deco_win_setter)(void __iomem *, int, int, int, int, int);
+
+
+static inline void wait_for_dma_idle(void)
+{
+       while (readl(tpg.reg + DMA_CTRL) & DMA_CTRL_ACTIVE)
+               mdelay(100);
+}
+
+static inline void switch_dma_engine(bool state)
+{
+       u32 val = readl(tpg.reg + DMA_CTRL);
+
+       val |=  ( state * DMA_CTRL_ENABLE);
+       val &= ~(!state * DMA_CTRL_ENABLE);
+
+       writel(val, tpg.reg + DMA_CTRL);
+}
+
+static struct mv_dma_desc *get_new_last_desc(void)
+{
+       if (unlikely(DESCLIST_FULL(tpg.desclist)) &&
+           set_dma_desclist_size(&tpg.desclist, tpg.desclist.length << 1)) {
+               printk(KERN_ERR MV_DMA "failed to increase DMA pool to %lu\n",
+                               tpg.desclist.length << 1);
+               return NULL;
+       }
+
+       if (likely(tpg.desclist.usage))
+               ITEM(tpg.desclist.usage - 1)->next =
+                       ITEM_DMA(tpg.desclist.usage);
+
+       return ITEM(tpg.desclist.usage++);
+}
+
+static inline void mv_dma_desc_dump(void)
+{
+       struct mv_dma_desc *tmp;
+       int i;
+
+       if (!tpg.desclist.usage) {
+               printk(KERN_WARNING MV_DMA "DMA descriptor list is empty\n");
+               return;
+       }
+
+       printk(KERN_WARNING MV_DMA "DMA descriptor list:\n");
+       for (i = 0; i < tpg.desclist.usage; i++) {
+               tmp = ITEM(i);
+               printk(KERN_WARNING MV_DMA "entry %d at 0x%x: dma addr 0x%x, "
+                      "src 0x%x, dst 0x%x, count %u, own %d, next 0x%x", i,
+                      (u32)tmp, ITEM_DMA(i) , tmp->src, tmp->dst,
+                      tmp->count & DMA_BYTE_COUNT_MASK, !!(tmp->count & 
DMA_OWN_BIT),
+                      tmp->next);
+       }
+}
+
+static inline void mv_dma_reg_dump(void)
+{
+#define PRINTREG(offset) \
+       printk(KERN_WARNING MV_DMA "tpg.reg + " #offset " = 0x%x\n", \
+                       readl(tpg.reg + offset))
+
+       PRINTREG(DMA_CTRL);
+       PRINTREG(DMA_BYTE_COUNT);
+       PRINTREG(DMA_SRC_ADDR);
+       PRINTREG(DMA_DST_ADDR);
+       PRINTREG(DMA_NEXT_DESC);
+       PRINTREG(DMA_CURR_DESC);
+
+#undef PRINTREG
+}
+
+static inline void mv_dma_clear_desc_reg(void)
+{
+       writel(0, tpg.reg + DMA_BYTE_COUNT);
+       writel(0, tpg.reg + DMA_SRC_ADDR);
+       writel(0, tpg.reg + DMA_DST_ADDR);
+       writel(0, tpg.reg + DMA_CURR_DESC);
+       writel(0, tpg.reg + DMA_NEXT_DESC);
+}
+
+void mv_dma_clear(void)
+{
+       if (!tpg.dev)
+               return;
+
+       spin_lock(&tpg.lock);
+
+       /* make sure engine is idle */
+       wait_for_dma_idle();
+       switch_dma_engine(0);
+       wait_for_dma_idle();
+
+       /* clear descriptor registers */
+       mv_dma_clear_desc_reg();
+
+       tpg.desclist.usage = 0;
+
+       switch_dma_engine(1);
+
+       /* finally free system lock again */
+       spin_unlock(&tpg.lock);
+}
+EXPORT_SYMBOL_GPL(mv_dma_clear);
+
+void mv_dma_trigger(void)
+{
+       if (!tpg.dev)
+               return;
+
+       spin_lock(&tpg.lock);
+
+       writel(ITEM_DMA(0), tpg.reg + DMA_NEXT_DESC);
+
+       spin_unlock(&tpg.lock);
+}
+EXPORT_SYMBOL_GPL(mv_dma_trigger);
+
+void mv_dma_separator(void)
+{
+       struct mv_dma_desc *tmp;
+
+       if (!tpg.dev)
+               return;
+
+       spin_lock(&tpg.lock);
+
+       tmp = get_new_last_desc();
+       memset(tmp, 0, sizeof(*tmp));
+
+       spin_unlock(&tpg.lock);
+}
+EXPORT_SYMBOL_GPL(mv_dma_separator);
+
+void mv_dma_memcpy(dma_addr_t dst, dma_addr_t src, unsigned int size)
+{
+       struct mv_dma_desc *tmp;
+
+       if (!tpg.dev)
+               return;
+
+       spin_lock(&tpg.lock);
+
+       tmp = get_new_last_desc();
+       tmp->count = size | DMA_OWN_BIT;
+       tmp->src = src;
+       tmp->dst = dst;
+       tmp->next = 0;
+
+       spin_unlock(&tpg.lock);
+}
+EXPORT_SYMBOL_GPL(mv_dma_memcpy);
+
+static u32 idma_print_and_clear_irq(void)
+{
+       u32 val, val2, addr;
+
+       val = readl(tpg.reg + IDMA_INT_CAUSE);
+       val2 = readl(tpg.reg + IDMA_ERR_SELECT);
+       addr = readl(tpg.reg + IDMA_ERR_ADDR);
+
+       if (val & IDMA_INT_MISS(0))
+               printk(KERN_ERR MV_DMA "%s: address miss @%x!\n",
+                               __func__, val2 & IDMA_INT_MISS(0) ? addr : 0);
+       if (val & IDMA_INT_APROT(0))
+               printk(KERN_ERR MV_DMA "%s: access protection @%x!\n",
+                               __func__, val2 & IDMA_INT_APROT(0) ? addr : 0);
+       if (val & IDMA_INT_WPROT(0))
+               printk(KERN_ERR MV_DMA "%s: write protection @%x!\n",
+                               __func__, val2 & IDMA_INT_WPROT(0) ? addr : 0);
+
+       /* clear interrupt cause register */
+       writel(0, tpg.reg + IDMA_INT_CAUSE);
+
+       return val;
+}
+
+static u32 tdma_print_and_clear_irq(void)
+{
+       u32 val;
+
+       val = readl(tpg.reg + TDMA_ERR_CAUSE);
+
+       if (val & TDMA_INT_MISS)
+               printk(KERN_ERR MV_DMA "%s: miss!\n", __func__);
+       if (val & TDMA_INT_DOUBLE_HIT)
+               printk(KERN_ERR MV_DMA "%s: double hit!\n", __func__);
+       if (val & TDMA_INT_BOTH_HIT)
+               printk(KERN_ERR MV_DMA "%s: both hit!\n", __func__);
+       if (val & TDMA_INT_DATA_ERROR)
+               printk(KERN_ERR MV_DMA "%s: data error!\n", __func__);
+
+       /* clear error cause register */
+       writel(0, tpg.reg + TDMA_ERR_CAUSE);
+
+       return val;
+}
+
+irqreturn_t mv_dma_int(int irq, void *priv)
+{
+       int handled;
+
+       handled = (*tpg.print_and_clear_irq)();
+
+       if (handled) {
+               mv_dma_reg_dump();
+               mv_dma_desc_dump();
+       }
+
+       switch_dma_engine(0);
+       wait_for_dma_idle();
+
+       /* clear descriptor registers */
+       mv_dma_clear_desc_reg();
+
+       switch_dma_engine(1);
+       wait_for_dma_idle();
+
+       return (handled ? IRQ_HANDLED : IRQ_NONE);
+}
+
+static void tdma_set_deco_win(void __iomem *regs, int chan,
+               int target, int attr, int base, int size)
+{
+       u32 val;
+
+       writel(DMA_DECO_ADDR_MASK(base), regs + TDMA_DECO_BAR(chan));
+
+       val = TDMA_WCR_ENABLE;
+       val |= TDMA_WCR_TARGET(target);
+       val |= TDMA_WCR_ATTR(attr);
+       val |= DMA_DECO_SIZE_MASK(size);
+       writel(val, regs + TDMA_DECO_WCR(chan));
+}
+
+static void idma_set_deco_win(void __iomem *regs, int chan,
+               int target, int attr, int base, int size)
+{
+       u32 val;
+
+       /* setup window parameters */
+       val = IDMA_BAR_TARGET(target);
+       val |= IDMA_BAR_ATTR(attr);
+       val |= DMA_DECO_ADDR_MASK(base);
+       writel(val, regs + IDMA_DECO_BAR(chan));
+
+       /* window size goes to a separate register */
+       writel(DMA_DECO_SIZE_MASK(size), regs + IDMA_DECO_SIZE(chan));
+
+       /* set the channel to enabled */
+       val = readl(regs + IDMA_DECO_ENABLE);
+       val &= ~(1 << chan);
+       writel(val, regs + IDMA_DECO_ENABLE);
+
+       /* allow RW access from all other windows */
+       writel(0xffff, regs + IDMA_DECO_PROT(chan));
+}
+
+static void setup_mbus_windows(void __iomem *regs, struct mv_dma_pdata *pdata,
+               deco_win_setter win_setter)
+{
+       int chan;
+       const struct mbus_dram_target_info *dram;
+
+       dram = mv_mbus_dram_info();
+       for (chan = 0; chan < dram->num_cs; chan++) {
+               const struct mbus_dram_window *cs = &dram->cs[chan];
+
+               (*win_setter)(regs, chan, dram->mbus_dram_target_id,
+                               cs->mbus_attr, cs->base, cs->size);
+       }
+       if (pdata) {
+               /* Need to add a decoding window for SRAM access.
+                * This is needed only on IDMA, since every address
+                * is looked up. But not allowed on TDMA, since it
+                * errors if source and dest are in different windows.
+                *
+                * Size is in 64k granularity, max SRAM size is 8k -
+                * so a single "unit" easily suffices.
+                */
+               (*win_setter)(regs, chan, pdata->sram_target_id,
+                               pdata->sram_attr, pdata->sram_base, 1 << 16);
+       }
+}
+
+/* initialise the global tpg structure */
+static int mv_init_engine(struct platform_device *pdev, u32 ctrl_init_val,
+               print_and_clear_irq pc_irq, deco_win_setter win_setter)
+{
+       struct resource *res;
+       void __iomem *deco;
+       int rc;
+
+       if (tpg.dev) {
+               printk(KERN_ERR MV_DMA "second DMA device?!\n");
+               return -ENXIO;
+       }
+       tpg.dev = &pdev->dev;
+       tpg.print_and_clear_irq = pc_irq;
+
+       /* setup address decoding */
+       res = platform_get_resource_byname(pdev,
+                       IORESOURCE_MEM, "regs deco");
+       if (!res)
+               return -ENXIO;
+       if (!(deco = ioremap(res->start, resource_size(res))))
+               return -ENOMEM;
+       setup_mbus_windows(deco, pdev->dev.platform_data, win_setter);
+       iounmap(deco);
+
+       /* get register start address */
+       res = platform_get_resource_byname(pdev,
+                       IORESOURCE_MEM, "regs control and error");
+       if (!res)
+               return -ENXIO;
+       if (!(tpg.reg = ioremap(res->start, resource_size(res))))
+               return -ENOMEM;
+
+       /* get the IRQ */
+       tpg.irq = platform_get_irq(pdev, 0);
+       if (tpg.irq < 0 || tpg.irq == NO_IRQ) {
+               rc = -ENXIO;
+               goto out_unmap_reg;
+       }
+
+       /* Not all platforms can gate the clock, so it is not
+          an error if the clock does not exists. */
+       tpg.clk = clk_get(&pdev->dev, NULL);
+       if (!IS_ERR(tpg.clk))
+               clk_prepare_enable(tpg.clk);
+
+       /* initialise DMA descriptor list */
+       if (init_dma_desclist(&tpg.desclist, tpg.dev,
+                       sizeof(struct mv_dma_desc), MV_DMA_ALIGN, 0)) {
+               rc = -ENOMEM;
+               goto out_unmap_reg;
+       }
+       if (set_dma_desclist_size(&tpg.desclist, MV_DMA_INIT_POOLSIZE)) {
+               rc = -ENOMEM;
+               goto out_free_desclist;
+       }
+
+       platform_set_drvdata(pdev, &tpg);
+
+       spin_lock_init(&tpg.lock);
+
+       switch_dma_engine(0);
+       wait_for_dma_idle();
+
+       /* clear descriptor registers */
+       mv_dma_clear_desc_reg();
+
+       /* initialize control register (also enables engine) */
+       writel(ctrl_init_val, tpg.reg + DMA_CTRL);
+       wait_for_dma_idle();
+
+       if (request_irq(tpg.irq, mv_dma_int, IRQF_DISABLED,
+                               dev_name(tpg.dev), &tpg)) {
+               rc = -ENXIO;
+               goto out_free_all;
+       }
+
+       return 0;
+
+out_free_all:
+       switch_dma_engine(0);
+       platform_set_drvdata(pdev, NULL);
+out_free_desclist:
+       fini_dma_desclist(&tpg.desclist);
+out_unmap_reg:
+       iounmap(tpg.reg);
+       tpg.dev = NULL;
+       return rc;
+}
+
+static int mv_remove(struct platform_device *pdev)
+{
+       switch_dma_engine(0);
+       platform_set_drvdata(pdev, NULL);
+       fini_dma_desclist(&tpg.desclist);
+       free_irq(tpg.irq, &tpg);
+       iounmap(tpg.reg);
+
+       if (!IS_ERR(tpg.clk)) {
+               clk_disable_unprepare(tpg.clk);
+               clk_put(tpg.clk);
+       }
+
+       tpg.dev = NULL;
+       return 0;
+}
+
+static int mv_probe_tdma(struct platform_device *pdev)
+{
+       int rc;
+
+       rc = mv_init_engine(pdev, TDMA_CTRL_INIT_VALUE,
+                       &tdma_print_and_clear_irq, &tdma_set_deco_win);
+       if (rc)
+               return rc;
+
+       /* have an ear for occurring errors */
+       writel(TDMA_INT_ALL, tpg.reg + TDMA_ERR_MASK);
+       writel(0, tpg.reg + TDMA_ERR_CAUSE);
+
+       printk(KERN_INFO MV_DMA
+                       "TDMA engine up and running, IRQ %d\n", tpg.irq);
+       return 0;
+}
+
+static int mv_probe_idma(struct platform_device *pdev)
+{
+       int rc;
+
+       rc = mv_init_engine(pdev, IDMA_CTRL_INIT_VALUE,
+                       &idma_print_and_clear_irq, &idma_set_deco_win);
+       if (rc)
+               return rc;
+
+       /* have an ear for occurring errors */
+       writel(IDMA_INT_MISS(0) | IDMA_INT_APROT(0) | IDMA_INT_WPROT(0),
+                       tpg.reg + IDMA_INT_MASK);
+       writel(0, tpg.reg + IDMA_INT_CAUSE);
+
+       printk(KERN_INFO MV_DMA
+                       "IDMA engine up and running, IRQ %d\n", tpg.irq);
+       return 0;
+}
+
+static struct platform_driver marvell_tdma = {
+       .probe          = mv_probe_tdma,
+       .remove         = mv_remove,
+       .driver         = {
+               .owner  = THIS_MODULE,
+               .name   = "mv_tdma",
+       },
+}, marvell_idma = {
+       .probe          = mv_probe_idma,
+       .remove         = mv_remove,
+       .driver         = {
+               .owner  = THIS_MODULE,
+               .name   = "mv_idma",
+       },
+};
+MODULE_ALIAS("platform:mv_tdma");
+MODULE_ALIAS("platform:mv_idma");
+
+static int __init mv_dma_init(void)
+{
+       tpg.tdma_registered = !platform_driver_register(&marvell_tdma);
+       tpg.idma_registered = !platform_driver_register(&marvell_idma);
+       return !(tpg.tdma_registered || tpg.idma_registered);
+}
+module_init(mv_dma_init);
+
+static void __exit mv_dma_exit(void)
+{
+       if (tpg.tdma_registered)
+               platform_driver_unregister(&marvell_tdma);
+       if (tpg.idma_registered)
+               platform_driver_unregister(&marvell_idma);
+}
+module_exit(mv_dma_exit);
+
+MODULE_AUTHOR("Phil Sutter <phil.sut...@viprinet.com>");
+MODULE_DESCRIPTION("Support for Marvell's IDMA/TDMA engines");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/crypto/mv_dma.h b/drivers/crypto/mv_dma.h
new file mode 100644
index 0000000..1d8d5df
--- /dev/null
+++ b/drivers/crypto/mv_dma.h
@@ -0,0 +1,150 @@
+#ifndef _MV_DMA_H
+#define _MV_DMA_H
+
+/* common TDMA_CTRL/IDMA_CTRL_LOW bits */
+#define DMA_CTRL_DST_BURST(x)  (x)
+#define DMA_CTRL_SRC_BURST(x)  (x << 6)
+#define DMA_CTRL_NO_CHAIN_MODE (1 << 9)
+#define DMA_CTRL_ENABLE                (1 << 12)
+#define DMA_CTRL_FETCH_ND      (1 << 13)
+#define DMA_CTRL_ACTIVE                (1 << 14)
+
+/* TDMA_CTRL register bits */
+#define TDMA_CTRL_DST_BURST_32 DMA_CTRL_DST_BURST(3)
+#define TDMA_CTRL_DST_BURST_128        DMA_CTRL_DST_BURST(4)
+#define TDMA_CTRL_OUTST_RD_EN  (1 << 4)
+#define TDMA_CTRL_SRC_BURST_32 DMA_CTRL_SRC_BURST(3)
+#define TDMA_CTRL_SRC_BURST_128        DMA_CTRL_SRC_BURST(4)
+#define TDMA_CTRL_NO_BYTE_SWAP (1 << 11)
+
+#define TDMA_CTRL_INIT_VALUE ( \
+       TDMA_CTRL_DST_BURST_128 | TDMA_CTRL_SRC_BURST_128 | \
+       TDMA_CTRL_NO_BYTE_SWAP | DMA_CTRL_ENABLE \
+)
+
+/* IDMA_CTRL_LOW register bits */
+#define IDMA_CTRL_DST_BURST_8  DMA_CTRL_DST_BURST(0)
+#define IDMA_CTRL_DST_BURST_16 DMA_CTRL_DST_BURST(1)
+#define IDMA_CTRL_DST_BURST_32 DMA_CTRL_DST_BURST(3)
+#define IDMA_CTRL_DST_BURST_64 DMA_CTRL_DST_BURST(7)
+#define IDMA_CTRL_DST_BURST_128        DMA_CTRL_DST_BURST(4)
+#define IDMA_CTRL_SRC_HOLD     (1 << 3)
+#define IDMA_CTRL_DST_HOLD     (1 << 5)
+#define IDMA_CTRL_SRC_BURST_8  DMA_CTRL_SRC_BURST(0)
+#define IDMA_CTRL_SRC_BURST_16 DMA_CTRL_SRC_BURST(1)
+#define IDMA_CTRL_SRC_BURST_32 DMA_CTRL_SRC_BURST(3)
+#define IDMA_CTRL_SRC_BURST_64 DMA_CTRL_SRC_BURST(7)
+#define IDMA_CTRL_SRC_BURST_128        DMA_CTRL_SRC_BURST(4)
+#define IDMA_CTRL_INT_MODE     (1 << 10)
+#define IDMA_CTRL_BLOCK_MODE   (1 << 11)
+#define IDMA_CTRL_CLOSE_DESC   (1 << 17)
+#define IDMA_CTRL_ABORT                (1 << 20)
+#define IDMA_CTRL_SADDR_OVR(x) (x << 21)
+#define IDMA_CTRL_NO_SADDR_OVR IDMA_CTRL_SADDR_OVR(0)
+#define IDMA_CTRL_SADDR_OVR_1  IDMA_CTRL_SADDR_OVR(1)
+#define IDMA_CTRL_SADDR_OVR_2  IDMA_CTRL_SADDR_OVR(2)
+#define IDMA_CTRL_SADDR_OVR_3  IDMA_CTRL_SADDR_OVR(3)
+#define IDMA_CTRL_DADDR_OVR(x) (x << 23)
+#define IDMA_CTRL_NO_DADDR_OVR IDMA_CTRL_DADDR_OVR(0)
+#define IDMA_CTRL_DADDR_OVR_1  IDMA_CTRL_DADDR_OVR(1)
+#define IDMA_CTRL_DADDR_OVR_2  IDMA_CTRL_DADDR_OVR(2)
+#define IDMA_CTRL_DADDR_OVR_3  IDMA_CTRL_DADDR_OVR(3)
+#define IDMA_CTRL_NADDR_OVR(x) (x << 25)
+#define IDMA_CTRL_NO_NADDR_OVR IDMA_CTRL_NADDR_OVR(0)
+#define IDMA_CTRL_NADDR_OVR_1  IDMA_CTRL_NADDR_OVR(1)
+#define IDMA_CTRL_NADDR_OVR_2  IDMA_CTRL_NADDR_OVR(2)
+#define IDMA_CTRL_NADDR_OVR_3  IDMA_CTRL_NADDR_OVR(3)
+#define IDMA_CTRL_DESC_MODE_16M        (1 << 31)
+
+#define IDMA_CTRL_INIT_VALUE ( \
+       IDMA_CTRL_DST_BURST_128 | IDMA_CTRL_SRC_BURST_128 | \
+       IDMA_CTRL_INT_MODE | IDMA_CTRL_BLOCK_MODE | \
+       DMA_CTRL_ENABLE | IDMA_CTRL_DESC_MODE_16M \
+)
+
+/* TDMA_ERR_CAUSE bits */
+#define TDMA_INT_MISS          (1 << 0)
+#define TDMA_INT_DOUBLE_HIT    (1 << 1)
+#define TDMA_INT_BOTH_HIT      (1 << 2)
+#define TDMA_INT_DATA_ERROR    (1 << 3)
+#define TDMA_INT_ALL           0x0f
+
+/* address decoding registers, starting at "regs deco" */
+#define TDMA_DECO_BAR(chan)            (0x00 + (chan) * 8)
+#define TDMA_DECO_WCR(chan)            (0x04 + (chan) * 8)
+
+#define IDMA_DECO_BAR(chan)            TDMA_DECO_BAR(chan)
+#define IDMA_DECO_SIZE(chan)           (0x04 + (chan) * 8)
+#define IDMA_DECO_REMAP(chan)          (0x60 + (chan) * 4)
+#define IDMA_DECO_PROT(chan)           (0x70 + (chan) * 4)
+#define IDMA_DECO_ENABLE               0x80 /* bit field, zero enables */
+
+/* decoding address and size masks */
+#define DMA_DECO_ADDR_MASK(x)          ((x) & 0xffff0000)
+#define DMA_DECO_SIZE_MASK(x)          DMA_DECO_ADDR_MASK((x) - 1)
+
+/* TDMA_DECO_WCR fields */
+#define TDMA_WCR_ENABLE                        0x01
+#define TDMA_WCR_TARGET(x)             (((x) & 0x0f) << 4)
+#define TDMA_WCR_ATTR(x)               (((x) & 0xff) << 8)
+
+/* IDMA_DECO_BAR fields */
+#define IDMA_BAR_TARGET(x)             ((x) & 0x0f)
+#define IDMA_BAR_ATTR(x)               (((x) & 0xff) << 8)
+
+/* offsets of registers, starting at "regs control and error" */
+#define TDMA_BYTE_COUNT                0x00
+#define TDMA_SRC_ADDR          0x10
+#define TDMA_DST_ADDR          0x20
+#define TDMA_NEXT_DESC         0x30
+#define TDMA_CTRL              0x40
+#define TDMA_CURR_DESC         0x70
+#define TDMA_ERR_CAUSE         0xc8
+#define TDMA_ERR_MASK          0xcc
+
+#define IDMA_BYTE_COUNT(chan)  (0x00 + (chan) * 4)
+#define IDMA_SRC_ADDR(chan)    (0x10 + (chan) * 4)
+#define IDMA_DST_ADDR(chan)    (0x20 + (chan) * 4)
+#define IDMA_NEXT_DESC(chan)   (0x30 + (chan) * 4)
+#define IDMA_CTRL_LOW(chan)    (0x40 + (chan) * 4)
+#define IDMA_CURR_DESC(chan)   (0x70 + (chan) * 4)
+#define IDMA_CTRL_HIGH(chan)   (0x80 + (chan) * 4)
+#define IDMA_INT_CAUSE         (0xc0)
+#define IDMA_INT_MASK          (0xc4)
+#define IDMA_ERR_ADDR          (0xc8)
+#define IDMA_ERR_SELECT                (0xcc)
+
+/* register offsets common to TDMA and IDMA channel 0 */
+#define DMA_BYTE_COUNT         TDMA_BYTE_COUNT
+#define DMA_SRC_ADDR           TDMA_SRC_ADDR
+#define DMA_DST_ADDR           TDMA_DST_ADDR
+#define DMA_NEXT_DESC          TDMA_NEXT_DESC
+#define DMA_CTRL               TDMA_CTRL
+#define DMA_CURR_DESC          TDMA_CURR_DESC
+
+/* IDMA_INT_CAUSE and IDMA_INT_MASK bits */
+#define IDMA_INT_COMP(chan)    ((1 << 0) << ((chan) * 8))
+#define IDMA_INT_MISS(chan)    ((1 << 1) << ((chan) * 8))
+#define IDMA_INT_APROT(chan)   ((1 << 2) << ((chan) * 8))
+#define IDMA_INT_WPROT(chan)   ((1 << 3) << ((chan) * 8))
+#define IDMA_INT_OWN(chan)     ((1 << 4) << ((chan) * 8))
+#define IDMA_INT_ALL(chan)     (0x1f << (chan) * 8)
+
+/* Owner bit in DMA_BYTE_COUNT and descriptors' count field, used
+ * to signal input data completion in descriptor chain */
+#define DMA_OWN_BIT            (1 << 31)
+
+/* IDMA also has a "Left Byte Count" bit,
+ * indicating not everything was transfered */
+#define IDMA_LEFT_BYTE_COUNT   (1 << 30)
+
+/* filter the actual byte count value from the DMA_BYTE_COUNT field */
+#define DMA_BYTE_COUNT_MASK    (~(DMA_OWN_BIT | IDMA_LEFT_BYTE_COUNT))
+
+extern void mv_dma_memcpy(dma_addr_t, dma_addr_t, unsigned int);
+extern void mv_dma_separator(void);
+extern void mv_dma_clear(void);
+extern void mv_dma_trigger(void);
+
+
+#endif /* _MV_DMA_H */

Reply via email to