Stefan Hajnoczi wrote: > On Wed, Dec 8, 2010 at 12:13 PM, Alexander Graf <ag...@suse.de> wrote: > >> +struct AHCIDevice { >> + IDEBus port; >> + int port_no; >> + uint32_t port_state; >> + uint32_t finished; >> + AHCIPortRegs port_regs; >> + struct AHCIState *hba; >> + uint8_t *lst; >> + uint8_t *res_fis; >> + uint8_t *cmd_fis; >> > > Are these unmapped on reset? >
On which reset? Port reset sends a d2h fis to the guest, so we still need at least the res mapping. I couldn't find the exact spot where the state of registers after reset is defined. > >> + int cmd_fis_len; >> + int dma_status; >> + BlockDriverCompletionFunc *dma_cb; >> + AHCICmdHdr *cur_cmd; >> + NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; >> > > Are the ncq_tfs[] elements cleaned up on reset (i.e. cancellation and > free sglist)? > > >> +static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted) >> +{ >> + target_phys_addr_t len = wanted; >> + >> + if (*ptr) { >> + cpu_physical_memory_unmap(*ptr, 1, len, len); >> + } >> + >> + *ptr = cpu_physical_memory_map(addr, &len, 1); >> + if (len < wanted) { >> + cpu_physical_memory_unmap(*ptr, 1, len, len); >> > > *ptr = NULL; > > >> +static void ncq_cb(void *opaque, int ret) >> +{ >> + NCQTransferState *ncq_tfs = (NCQTransferState *)opaque; >> + IDEState *ide_state; >> + >> + if (ret < 0) { >> + /* XXX error */ >> + } >> > > Missing error handling. > Yes, that's what the XXX stands for :). > >> +static void process_ncq_command(AHCIState *s, int port, uint8_t *cmd_fis, >> + int slot, QEMUSGList *sg) >> +{ >> + NCQFrame *ncq_fis = (NCQFrame*)cmd_fis; >> + uint8_t tag = ncq_fis->tag >> 3; >> + NCQTransferState *ncq_tfs = &s->dev[port].ncq_tfs[tag]; >> + >> + if (ncq_tfs->used) { >> + /* error - already in use */ >> + fprintf(stderr, "%s: tag %d already used\n", __FUNCTION__, tag); >> + return; >> + } >> + >> + ncq_tfs->used = 1; >> + ncq_tfs->drive = &s->dev[port]; >> + ncq_tfs->drive->cmd_fis = cmd_fis; >> + ncq_tfs->drive->cmd_fis_len = 0x20; >> + ncq_tfs->slot = slot; >> + ncq_tfs->lba = ((uint64_t)ncq_fis->lba5 << 40) | >> + ((uint64_t)ncq_fis->lba4 << 32) | >> + ((uint64_t)ncq_fis->lba3 << 24) | >> + ((uint64_t)ncq_fis->lba2 << 16) | >> + ((uint64_t)ncq_fis->lba1 << 8) | >> + (uint64_t)ncq_fis->lba0; >> + >> + /* Note: We calculate the sector count, but don't currently rely on it. >> + * The total size of the DMA buffer tells us the transfer size instead. >> */ >> + ncq_tfs->sector_count = ((uint16_t)ncq_fis->sector_count_high << 8) | >> + ncq_fis->sector_count_low; >> + >> + DPRINTF(port, "NCQ transfer LBA from %ld to %ld, drive max %ld\n", >> + ncq_tfs->lba, ncq_tfs->lba + ncq_tfs->sector_count - 2, >> + s->dev[port].port.ifs[0].nb_sectors - 1); >> + >> + ncq_tfs->sglist = *sg; >> + ncq_tfs->tag = tag; >> + >> + switch(ncq_fis->command) { >> + case READ_FPDMA_QUEUED: >> + DPRINTF(port, "NCQ reading %d sectors from LBA %ld, tag %d\n", >> + ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag); >> + ncq_tfs->is_read = 1; >> + >> + /* XXX: The specification is unclear about whether the DMA Setup >> + * FIS here should have the I bit set, but it suggest that it >> should >> + * not. Linux works without this interrupt, so I disabled it. >> + * If someone knows if it is needed, please tell me, or fix >> this. */ >> + >> + /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */ >> + DPRINTF(port, "tag %d aio read %ld\n", ncq_tfs->tag, >> ncq_tfs->lba); >> + dma_bdrv_read(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist, >> + ncq_tfs->lba, ncq_cb, ncq_tfs); >> + break; >> + case WRITE_FPDMA_QUEUED: >> + DPRINTF(port, "NCQ writing %d sectors to LBA %ld, tag %d\n", >> + ncq_tfs->sector_count-1, ncq_tfs->lba, ncq_tfs->tag); >> + ncq_tfs->is_read = 0; >> + /* ahci_trigger_irq(s,s->dev[port],PORT_IRQ_STAT_DSS); */ >> + DPRINTF(port, "tag %d aio write %ld\n", ncq_tfs->tag, >> ncq_tfs->lba); >> + dma_bdrv_write(ncq_tfs->drive->port.ifs[0].bs, &ncq_tfs->sglist, >> + ncq_tfs->lba, ncq_cb, ncq_tfs); >> + break; >> + default: >> + hw_error("ahci: tried to process non-NCQ command as NCQ\n"); >> > > Guest triggerable abort. > Those happen. The guest can shoot itself in the foot. We have more of these in other places. Just check virtio.c and search for abort() :). > >> + break; >> + } >> +} >> + >> +static int handle_cmd(AHCIState *s, int port, int slot) >> +{ >> + IDEState *ide_state; >> + >> + int sglist_alloc_hint; >> + QEMUSGList sglist; >> + int atapi_packet_len = 0; >> + AHCIPortRegs *pr; >> + uint32_t opts; >> + uint64_t tbl_addr; >> + AHCICmdHdr *cmd; >> + uint8_t *cmd_fis; >> + >> + target_phys_addr_t cmd_len; >> + int i; >> + >> + if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) { >> + /* Engine currently busy, try again later */ >> + DPRINTF(port, "engine busy\n"); >> + return -1; >> + } >> + >> + pr = &s->dev[port].port_regs; >> + cmd = &((AHCICmdHdr *)s->dev[port].lst)[slot]; >> + >> + if (!s->dev[port].lst) { >> + hw_error("%s: lst not given but cmd handled", __FUNCTION__); >> > > Guest triggerable abort. > > >> + } >> + >> + opts = le32_to_cpu(cmd->opts); >> + tbl_addr = le64_to_cpu(cmd->tbl_addr); >> + >> + sglist_alloc_hint = opts >> AHCI_CMD_HDR_PRDT_LEN; >> + cmd_len = 0x80 + (sglist_alloc_hint * sizeof(AHCI_SG)); >> + cmd_fis = cpu_physical_memory_map(tbl_addr, &cmd_len, 1); >> > > NULL dereference later if cpu_physical_memory_map() fails due to > invalid address (tbl_addr). > > >> + >> + /* The device we are working for */ >> + ide_state = &s->dev[port].port.ifs[0]; >> + >> + if (!ide_state->bs) { >> + hw_error("%s: guest accessed unused port", __FUNCTION__); >> > > Guest triggerable abort. > > >> + } >> + >> + /* Get number of entries in the PRDT, init a qemu sglist accordingly */ >> + memset(&sglist, 0, sizeof(sglist)); >> + >> + if (sglist_alloc_hint > 0) { >> + AHCI_SG *tbl = (AHCI_SG *)(&cmd_fis[0x80]); >> + >> + qemu_sglist_init(&sglist, sglist_alloc_hint); >> + /* Parse the PRDs and create qemu sglist entries accordingly */ >> + for (i = 0; i < sglist_alloc_hint; i++) { >> + /* flags_size is zero-based */ >> + qemu_sglist_add(&sglist, le64_to_cpu(tbl[i].addr), >> + le32_to_cpu(tbl[i].flags_size) + 1); >> + } >> + } >> > > Only the SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER codepath seems to > clean up sglist. The guest can leak host memory by setting > sglist_alloc_hint > 0 and not using > SATA_FIS_REG_H2D_UPDATE_COMMAND_REGISTER. > True, the sglist should only be created in dma_prepare (then the core is responsible for cleanup) or ncq command issue. Alex