On 10/29/07, Blue Swirl <[EMAIL PROTECTED]> wrote: > We could cache the resolved addresses to overcome the additional setup > overhead. Each stage should install cache invalidation callbacks or a > method to call for recalculation of the addresses. For example IOMMU > or ESPDMA mappings change very often.
I meant to write "don't change very often." This version actually resolves the host memory address so that scsi-disk could (with some additional plumbing) write directly to final destination. I think both pre- and postprocessing hooks may be needed, but those are not implemented yet. What about error handling? For example, first page is OK but second is not. Truncate all further blocks and install a post-processing hook that raises a bus error? Example output: esp DMADriverAIOCB 0x27433f0 IO ranges: base 0000000000000000 len 0000000000000800 Prehooks: Posthooks: espdma DMADriverAIOCB 0x27433f0 IO ranges: base 00000000fe00000a len 0000000000000800 Prehooks: Posthooks: iommu DMADriverAIOCB 0x27433f0 IO ranges: base 0000000007fe100a len 0000000000000800 Prehooks: Posthooks: physical DMADriverAIOCB 0x27433f0 IO ranges: base 00002b8e6f82200a len 0000000000000800 Prehooks: Posthooks:
Index: qemu/vl.h =================================================================== --- qemu.orig/vl.h 2007-10-29 16:59:37.000000000 +0000 +++ qemu/vl.h 2007-10-30 19:08:35.000000000 +0000 @@ -746,6 +746,109 @@ #include "hw/irq.h" +/* Generic DMA API */ + +typedef void DMADriverCompletionFunc(void *opaque, int ret); + +typedef struct qemu_iolist { + target_phys_addr_t iov_base; + target_phys_addr_t iov_len; + struct qemu_iolist *next; +} qemu_iolist; + +typedef struct DMADriverAIOCB DMADriverAIOCB; + +typedef DMADriverAIOCB * +DMATranslationHandler(void *opaque, DMADriverAIOCB *request, int is_write); + +typedef struct DMACompletionEntry { + DMATranslationHandler *func; + void *opaque; + struct DMACompletionEntry *next; +} DMACompletionEntry; + +struct DMADriverAIOCB { + qemu_iolist *iolist; + DMACompletionEntry *prehook; + DMACompletionEntry *posthook; + struct DMADriverAIOCB *next; +}; + +typedef struct qemu_bus { + unsigned int bus_bits; + DMATranslationHandler *north_handler; + void *north_handler_opaque; + DMATranslationHandler *south_handler; + void *south_handler_opaque; +} qemu_bus; + +static inline qemu_bus * +bus_init(unsigned int bus_bits, + DMATranslationHandler north_handler, + void *north_handler_opaque, + DMATranslationHandler south_handler, + void *south_handler_opaque) +{ + qemu_bus *bus; + + bus = qemu_mallocz(sizeof(qemu_bus)); + bus->bus_bits = bus_bits; + bus->north_handler = north_handler; + bus->north_handler_opaque = north_handler_opaque; + bus->south_handler = south_handler; + bus->south_handler_opaque = south_handler_opaque; + return bus; +} + +/* Direction CPU->bridge->device/memory */ +static inline DMADriverAIOCB * +bus_translate_south(qemu_bus *bus, DMADriverAIOCB *request, int is_write) +{ + return bus->south_handler(bus->south_handler_opaque, request, is_write); +} + +/* From device towards CPU/memory (DMA) */ +static inline DMADriverAIOCB * +bus_translate_north(qemu_bus *bus, DMADriverAIOCB *request, int is_write) +{ + return bus->north_handler(bus->north_handler_opaque, request, is_write); +} + +static inline DMADriverAIOCB * +bus_build_aiocb(target_phys_addr_t addr, target_phys_addr_t len) +{ + DMADriverAIOCB *d; + + d = qemu_mallocz(sizeof(DMADriverAIOCB)); + d->iolist = qemu_mallocz(sizeof(qemu_iolist)); + d->iolist->iov_base = addr; + d->iolist->iov_len = len; + return d; +} + +#if 1 || DEBUG_GDMA +static inline void +bus_dump_aiocb(DMADriverAIOCB *d) +{ + qemu_iolist *io; + DMACompletionEntry *e; + + fprintf(stderr, "DMADriverAIOCB %p\nIO ranges:\n", d); + for (io = d->iolist; io != NULL; io = io->next) { + fprintf(stderr, "base " TARGET_FMT_plx " len " TARGET_FMT_plx "\n", + io->iov_base, io->iov_len); + } + fprintf(stderr, "Prehooks:\n"); + for (e = d->prehook; e != NULL; e = e->next) { + fprintf(stderr, "func %p opaque %p\n", e->func, e->opaque); + } + fprintf(stderr, "Posthooks:\n"); + for (e = d->posthook; e != NULL; e = e->next) { + fprintf(stderr, "func %p opaque %p\n", e->func, e->opaque); + } +} +#endif + /* ISA bus */ extern target_phys_addr_t isa_mem_base; @@ -1253,7 +1356,8 @@ extern QEMUMachine ss5_machine, ss10_machine; /* iommu.c */ -void *iommu_init(target_phys_addr_t addr); +void *iommu_init(target_phys_addr_t addr, qemu_bus *parent_bus, + qemu_bus **bus); void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr, uint8_t *buf, int len, int is_write); static inline void sparc_iommu_memory_read(void *opaque, @@ -1308,11 +1412,13 @@ /* esp.c */ void esp_scsi_attach(void *opaque, BlockDriverState *bd, int id); void *esp_init(BlockDriverState **bd, target_phys_addr_t espaddr, - void *dma_opaque, qemu_irq irq, qemu_irq *reset); + void *dma_opaque, qemu_irq irq, qemu_irq *reset, + qemu_bus *parent_bus, qemu_bus **bus); /* sparc32_dma.c */ void *sparc32_dma_init(target_phys_addr_t daddr, qemu_irq parent_irq, - void *iommu, qemu_irq **dev_irq, qemu_irq **reset); + void *iommu, qemu_irq **dev_irq, qemu_irq **reset, + qemu_bus *parent_bus, qemu_bus **bus); void ledma_memory_read(void *opaque, target_phys_addr_t addr, uint8_t *buf, int len, int do_bswap); void ledma_memory_write(void *opaque, target_phys_addr_t addr, @@ -1428,6 +1534,8 @@ scsi_{read,write}_data. */ void scsi_read_data(SCSIDevice *s, uint32_t tag); int scsi_write_data(SCSIDevice *s, uint32_t tag); +void scsi_read_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d); +int scsi_write_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d); void scsi_cancel_io(SCSIDevice *s, uint32_t tag); uint8_t *scsi_get_buf(SCSIDevice *s, uint32_t tag); Index: qemu/hw/sun4m.c =================================================================== --- qemu.orig/hw/sun4m.c 2007-10-29 16:59:37.000000000 +0000 +++ qemu/hw/sun4m.c 2007-10-30 19:09:12.000000000 +0000 @@ -306,6 +306,32 @@ env->halted = 1; } +static DMADriverAIOCB * +physical_memory_bus_translate_north(void *opaque, DMADriverAIOCB *request, + int is_write) +{ + qemu_iolist *io; + + for (io = request->iolist; io != NULL; io = io->next) { + if (io->iov_base < phys_ram_size) + io->iov_base += (unsigned long)phys_ram_base; + else + io->iov_len = 0; + } + fprintf(stderr, "physical\n"); + bus_dump_aiocb(request); + return request; +} + +static DMADriverAIOCB * +physical_memory_bus_translate_south(void *opaque, + DMADriverAIOCB *request, + int is_write) +{ + // Does not exist? + return request; +} + static void *sun4m_hw_init(const struct hwdef *hwdef, int RAM_size, DisplayState *ds, const char *cpu_model) @@ -317,6 +343,8 @@ qemu_irq *cpu_irqs[MAX_CPUS], *slavio_irq, *slavio_cpu_irq, *espdma_irq, *ledma_irq; qemu_irq *esp_reset, *le_reset; + qemu_bus *memory_bus, *iommu_bus, *espdma_bus, *ledma_bus, *esp_bus, + *scsi_bus; /* init CPUs */ sparc_find_by_name(cpu_model, &def); @@ -345,7 +373,12 @@ /* allocate RAM */ cpu_register_physical_memory(0, RAM_size, 0); - iommu = iommu_init(hwdef->iommu_base); + memory_bus = bus_init(TARGET_PHYS_ADDR_BITS, + physical_memory_bus_translate_north, + NULL, + physical_memory_bus_translate_south, + NULL); + iommu = iommu_init(hwdef->iommu_base, memory_bus, &iommu_bus); slavio_intctl = slavio_intctl_init(hwdef->intctl_base, hwdef->intctl_base + 0x10000ULL, &hwdef->intbit_to_level[0], @@ -354,11 +387,12 @@ hwdef->clock_irq); espdma = sparc32_dma_init(hwdef->dma_base, slavio_irq[hwdef->esp_irq], - iommu, &espdma_irq, &esp_reset); + iommu, &espdma_irq, &esp_reset, iommu_bus, + &espdma_bus); ledma = sparc32_dma_init(hwdef->dma_base + 16ULL, slavio_irq[hwdef->le_irq], iommu, &ledma_irq, - &le_reset); + &le_reset, iommu_bus, &ledma_bus); if (graphic_depth != 8 && graphic_depth != 24) { fprintf(stderr, "qemu: Unsupported depth: %d\n", graphic_depth); @@ -392,7 +426,7 @@ fdctrl_init(slavio_irq[hwdef->fd_irq], 0, 1, hwdef->fd_base, fd_table); main_esp = esp_init(bs_table, hwdef->esp_base, espdma, *espdma_irq, - esp_reset); + esp_reset, espdma_bus, &esp_bus); for (i = 0; i < MAX_DISKS; i++) { if (bs_table[i]) { Index: qemu/hw/iommu.c =================================================================== --- qemu.orig/hw/iommu.c 2007-10-29 16:59:37.000000000 +0000 +++ qemu/hw/iommu.c 2007-10-30 19:40:15.000000000 +0000 @@ -104,6 +104,7 @@ target_phys_addr_t addr; uint32_t regs[IOMMU_NREGS]; target_phys_addr_t iostart; + qemu_bus *bus; } IOMMUState; static uint32_t iommu_mem_readw(void *opaque, target_phys_addr_t addr) @@ -244,6 +245,64 @@ s->regs[IOMMU_AFAR] = addr; } +static DMADriverAIOCB * +iommu_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write) +{ + IOMMUState *s = opaque; + qemu_iolist *io, *new_io, *next_io; + uint32_t flags; + int l, first; + target_phys_addr_t addr, prev_addr, len, page, phys_addr; + + for (io = request->iolist; io != NULL; io = next_io) { + addr = io->iov_base; + len = io->iov_len; + next_io = io->next; + first = 1; + while (len > 0) { + page = addr & TARGET_PAGE_MASK; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + flags = iommu_page_get_flags(s, page); + if (!(flags & IOPTE_VALID)) { + return NULL; + } + if (is_write) { + if (!(flags & IOPTE_WRITE)) { + return NULL; + } + } + phys_addr = iommu_translate_pa(s, addr, flags); + if (!first && addr != (prev_addr + TARGET_PAGE_SIZE)) { + new_io = qemu_mallocz(sizeof(qemu_iolist)); + new_io->iov_base = phys_addr; + new_io->iov_len = len; + new_io->next = io->next; + io->next = new_io; + } else { + io->iov_base = phys_addr; + io->iov_len = len; + } + prev_addr = page; + len -= l; + addr += l; + first = 0; + } + } + fprintf(stderr, "iommu\n"); + bus_dump_aiocb(request); + bus_translate_north(s->bus, request, is_write); + return request; +} + +static DMADriverAIOCB * +iommu_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write) +{ + // 1:1 mapping + return request; +} + void sparc_iommu_memory_rw(void *opaque, target_phys_addr_t addr, uint8_t *buf, int len, int is_write) { @@ -311,7 +370,7 @@ s->regs[IOMMU_CTRL] = IOMMU_VERSION; } -void *iommu_init(target_phys_addr_t addr) +void *iommu_init(target_phys_addr_t addr, qemu_bus *parent_bus, qemu_bus **bus) { IOMMUState *s; int iommu_io_memory; @@ -321,9 +380,12 @@ return NULL; s->addr = addr; + s->bus = parent_bus; iommu_io_memory = cpu_register_io_memory(0, iommu_mem_read, iommu_mem_write, s); cpu_register_physical_memory(addr, IOMMU_NREGS * 4, iommu_io_memory); + *bus = bus_init(32, iommu_bus_translate_north, s, + iommu_bus_translate_south, s); register_savevm("iommu", addr, 2, iommu_save, iommu_load, s); qemu_register_reset(iommu_reset, s); Index: qemu/hw/sparc32_dma.c =================================================================== --- qemu.orig/hw/sparc32_dma.c 2007-10-29 16:59:37.000000000 +0000 +++ qemu/hw/sparc32_dma.c 2007-10-30 19:29:06.000000000 +0000 @@ -60,6 +60,7 @@ qemu_irq irq; void *iommu; qemu_irq dev_reset; + qemu_bus *bus; }; /* Note: on sparc, the lance 16 bit bus is swapped */ @@ -128,6 +129,27 @@ } } +static DMADriverAIOCB * +espdma_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write) +{ + DMAState *s = opaque; + qemu_iolist *io; + + for (io = request->iolist; io != NULL; io = io->next) + io->iov_base = (target_phys_addr_t)s->dmaregs[1]; + fprintf(stderr, "espdma\n"); + bus_dump_aiocb(request); + bus_translate_north(s->bus, request, is_write); + return request; +} + +static DMADriverAIOCB * +espdma_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write) +{ + // 1:1 mapping + return request; +} + void espdma_memory_read(void *opaque, uint8_t *buf, int len) { DMAState *s = opaque; @@ -238,7 +260,8 @@ } void *sparc32_dma_init(target_phys_addr_t daddr, qemu_irq parent_irq, - void *iommu, qemu_irq **dev_irq, qemu_irq **reset) + void *iommu, qemu_irq **dev_irq, qemu_irq **reset, + qemu_bus *parent_bus, qemu_bus **bus) { DMAState *s; int dma_io_memory; @@ -258,6 +281,9 @@ *dev_irq = qemu_allocate_irqs(dma_set_irq, s, 1); *reset = &s->dev_reset; + s->bus = parent_bus; + *bus = bus_init(32, espdma_bus_translate_north, s, + espdma_bus_translate_south, s); return s; } Index: qemu/hw/esp.c =================================================================== --- qemu.orig/hw/esp.c 2007-10-29 16:59:37.000000000 +0000 +++ qemu/hw/esp.c 2007-10-30 19:27:46.000000000 +0000 @@ -74,6 +74,7 @@ uint8_t *async_buf; uint32_t async_len; void *dma_opaque; + qemu_bus *bus; }; #define STAT_DO 0x00 @@ -144,15 +145,25 @@ datalen = scsi_send_command(s->current_dev, 0, &buf[1], lun); s->ti_size = datalen; if (datalen != 0) { + DMADriverAIOCB *d; + s->rregs[4] = STAT_IN | STAT_TC; s->dma_left = 0; s->dma_counter = 0; if (datalen > 0) { + d = bus_build_aiocb(0, datalen); + fprintf(stderr, "esp\n"); + bus_dump_aiocb(d); + bus_translate_north(s->bus, d, 1); s->rregs[4] |= STAT_DI; - scsi_read_data(s->current_dev, 0); + scsi_read_data_aio(s->current_dev, 0, d); } else { + d = bus_build_aiocb(0, -datalen); + fprintf(stderr, "esp\n"); + bus_dump_aiocb(d); + bus_translate_north(s->bus, d, 0); s->rregs[4] |= STAT_DO; - scsi_write_data(s->current_dev, 0); + scsi_write_data_aio(s->current_dev, 0, d); } } s->rregs[5] = INTR_BS | INTR_FC; @@ -330,6 +341,22 @@ } } +static DMADriverAIOCB * +esp_bus_translate_north(void *opaque, DMADriverAIOCB *request, int is_write) +{ + // 1:1 mapping? + fprintf(stderr, "esp\n"); + bus_dump_aiocb(request); + return request; +} + +static DMADriverAIOCB * +esp_bus_translate_south(void *opaque, DMADriverAIOCB *request, int is_write) +{ + // 1:1 mapping + return request; +} + static void esp_reset(void *opaque) { ESPState *s = opaque; @@ -575,7 +602,8 @@ } void *esp_init(BlockDriverState **bd, target_phys_addr_t espaddr, - void *dma_opaque, qemu_irq irq, qemu_irq *reset) + void *dma_opaque, qemu_irq irq, qemu_irq *reset, + qemu_bus *parent_bus, qemu_bus **bus) { ESPState *s; int esp_io_memory; @@ -587,9 +615,11 @@ s->bd = bd; s->irq = irq; s->dma_opaque = dma_opaque; - + s->bus = parent_bus; esp_io_memory = cpu_register_io_memory(0, esp_mem_read, esp_mem_write, s); cpu_register_physical_memory(espaddr, ESP_SIZE, esp_io_memory); + *bus = bus_init(32, esp_bus_translate_north, s, + esp_bus_translate_south, s); esp_reset(s); Index: qemu/hw/scsi-disk.c =================================================================== --- qemu.orig/hw/scsi-disk.c 2007-10-29 16:59:37.000000000 +0000 +++ qemu/hw/scsi-disk.c 2007-10-30 18:55:19.000000000 +0000 @@ -199,6 +199,16 @@ r->sector_count -= n; } +void scsi_read_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d) +{ + scsi_read_data(s, tag); +} + +int scsi_write_data_aio(SCSIDevice *s, uint32_t tag, DMADriverAIOCB *d) +{ + return scsi_write_data(s, tag); +} + static void scsi_write_complete(void * opaque, int ret) { SCSIRequest *r = (SCSIRequest *)opaque;