I made a new patch sketching the system. It doesn't even compile, but it should give a view how this would be put into work.
On the down side, new memory needs to be allocated for generation of new vectors from previous ones, that may kill some of the performance. Also, supporting DMA to MMIO registers can't be done with pure translation only. I'm not too happy about this model anymore, maybe the model I proposed earlier is better. But it can't get to zero copy.
Index: qemu/vl.h =================================================================== --- qemu.orig/vl.h 2007-10-28 13:26:35.000000000 +0000 +++ qemu/vl.h 2007-10-28 20:25:27.000000000 +0000 @@ -746,6 +746,78 @@ #include "hw/irq.h" +/* Generic DMA API */ + +typedef void DMADriverCompletionFunc(void *opaque, int ret); + +struct qemu_iovec { + target_phys_addr_t iov_base; + size_t iov_len; +}; + +typedef struct DMADriverAIOCB DMADriverAIOCB; + +typedef DMADriverAIOCB * +DMATranslationHandler(void *opaque, DMADriverAIOCB *request); + +typedef struct DMACompletionEntry { + DMATranslationHandler *func; + void *opaque; + struct DMACompletionEntry *next; +} DMACompletionEntry; + +struct DMADriverAIOCB { + unsigned int nent; + struct qemu_iovec *vector; + DMACompletionEntry *cb; + struct DMADriverAIOCB *next; +}; + +typedef struct qemu_bus { + unsigned int bus_bits; + DMATranslationHandler *north_handler; + void *north_handler_opaque; + DMATranslationHandler *south_handler; + void *south_handler_opaque; +} qemu_bus; + +qemu_bus * +bus_init(unsigned int bus_bits, + DMATranslationHandler north_handler, + void *north_handler_opaque, + DMATranslationHandler south_handler, + void *south_handler_opaque); + +/* Direction CPU->bridge->device/memory */ +static inline DMADriverAIOCB * +bus_translate_south(qemu_bus *bus, DMADriverAIOCB *request) +{ + return bus->south_handler(bus->south_handler_opaque, request); +} + +/* From device towards CPU/memory (DMA) */ +static inline DMADriverAIOCB * +bus_translate_north(qemu_bus *bus, DMADriverAIOCB *request) +{ + return bus->north_handler(bus->north_handler_opaque, request); +} + +static inline DMADriverAIOCB * +bus_build_aiocb(const struct qemu_iovec *vector, unsigned int count) +{ + DMADriverAIOCB *d; + + d = qemu_mallocz(sizeof(DMADriverAIOCB)); + d->nent = count; + d->vector = qemu_mallocz(count * sizeof(struct qemu_iovec)); + memcpy(d->vector, vector, count * sizeof(struct qemu_iovec)); + return d; +} + +DMADriverAIOCB *physical_memory_bus_translate_north(void *opaque, + DMADriverAIOCB *request); +DMADriverAIOCB *physical_memory_bus_translate_south(void *opaque, + DMADriverAIOCB *request); /* ISA bus */ extern target_phys_addr_t isa_mem_base; Index: qemu/hw/sun4m.c =================================================================== --- qemu.orig/hw/sun4m.c 2007-10-28 19:12:59.000000000 +0000 +++ qemu/hw/sun4m.c 2007-10-28 20:24:59.000000000 +0000 @@ -306,6 +306,27 @@ env->halted = 1; } +DMADriverAIOCB *physical_memory_bus_translate_north(void *opaque, + DMADriverAIOCB *request) +{ + unsigned int i; + + for (i = 0; i < request->nent; i++) { + if (request->vector[i].iov_base < phys_ram_size) + request->vector[i].iov_base += (unsigned long)phys_ram_base; + else + request->vector[i].iov_len = 0; + } + return request; +} + +DMADriverAIOCB *physical_memory_bus_translate_south(void *opaque, + DMADriverAIOCB *request) +{ + // Does not exist? + return request; +} + static void *sun4m_hw_init(const struct hwdef *hwdef, int RAM_size, DisplayState *ds, const char *cpu_model) Index: qemu/hw/iommu.c =================================================================== --- qemu.orig/hw/iommu.c 2007-10-28 19:12:35.000000000 +0000 +++ qemu/hw/iommu.c 2007-10-28 20:29:23.000000000 +0000 @@ -244,6 +244,24 @@ s->regs[IOMMU_AFAR] = addr; } +DMADriverAIOCB *iommu_bus_translate_north(void *opaque, + DMADriverAIOCB *request) +{ + unsigned int i; + // alloc a new vector + for (i = 0; i < request->nent; i++) { + //translate_vector(request->vector[i]); + } + return request; +} + +DMADriverAIOCB *iommu_bus_translate_south(void *opaque, + DMADriverAIOCB *request) +{ + // 1:1 mapping + return request; +} + void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr, uint8_t *buf, int len, int is_write) { Index: qemu/hw/sparc32_dma.c =================================================================== --- qemu.orig/hw/sparc32_dma.c 2007-10-28 19:12:54.000000000 +0000 +++ qemu/hw/sparc32_dma.c 2007-10-28 20:30:45.000000000 +0000 @@ -128,6 +128,25 @@ } } +DMADriverAIOCB *espdma_bus_translate_north(void *opaque, + DMADriverAIOCB *request) +{ + DMAState *s = opaque; + unsigned int i; + + for (i = 0; i < request->nent; i++) { + request->vector[i].iov_base |= s->dmaregs[1]; + } + return request; +} + +DMADriverAIOCB *espdma_bus_translate_south(void *opaque, + DMADriverAIOCB *request) +{ + // 1:1 mapping + return request; +} + void espdma_memory_read(void *opaque, uint8_t *buf, int len) { DMAState *s = opaque; Index: qemu/hw/esp.c =================================================================== --- qemu.orig/hw/esp.c 2007-10-28 18:57:27.000000000 +0000 +++ qemu/hw/esp.c 2007-10-28 20:37:57.000000000 +0000 @@ -144,15 +144,25 @@ datalen = scsi_send_command(s->current_dev, 0, &buf[1], lun); s->ti_size = datalen; if (datalen != 0) { + DMADriverAIOCB *d; + struct qemu_iovec iov; + s->rregs[4] = STAT_IN | STAT_TC; s->dma_left = 0; s->dma_counter = 0; + iov.iov_base = 0; if (datalen > 0) { + iov.iov_len = datalen; + d = bus_build_aiocb(&iov, 1); + bus_translate_north(s->bus, d); s->rregs[4] |= STAT_DI; - scsi_read_data(s->current_dev, 0); + scsi_read_data(s->current_dev, 0, d); } else { + iov.iov_len = -datalen; + d = bus_build_aiocb(&iov, 1); + bus_translate_north(s->bus, d); s->rregs[4] |= STAT_DO; - scsi_write_data(s->current_dev, 0); + scsi_write_data(s->current_dev, 0, d); } } s->rregs[5] = INTR_BS | INTR_FC; Index: qemu/hw/scsi-disk.c =================================================================== --- qemu.orig/hw/scsi-disk.c 2007-10-28 20:30:35.000000000 +0000 +++ qemu/hw/scsi-disk.c 2007-10-28 20:38:49.000000000 +0000 @@ -162,7 +162,7 @@ } /* Read more data from scsi device into buffer. */ -void scsi_read_data(SCSIDevice *s, uint32_t tag) +void scsi_read_data(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *request) { SCSIRequest *r; uint32_t n; @@ -191,7 +191,7 @@ n = SCSI_DMA_BUF_SIZE / 512; r->buf_len = n * 512; - r->aiocb = bdrv_aio_read(s->bdrv, r->sector, r->dma_buf, n, + r->aiocb = bdrv_aio_read(s->bdrv, r->sector, request, n, scsi_read_complete, r); if (r->aiocb == NULL) scsi_command_complete(r, SENSE_HARDWARE_ERROR);