This patch adds a new parameter to "-drive" Using "cache=off" with "-drive" will open the disk image file using "O_DIRECT".
By default, "cache" is set to "on" to keep original behavior of qemu. v3 modify hw/sd.c to allocate buffer on init and not on each blk_read()/blk_write() and add "cache=" in qemu-doc.texi. example: "-drive file=my_disk.qcow2,cache=off" --- block-raw-posix.c | 8 +++++++ block-raw-win32.c | 4 +++ block.c | 2 - block.h | 1 hw/fdc.c | 7 +++++- hw/ide.c | 18 +++++++++++++---- hw/scsi-disk.c | 3 +- hw/sd.c | 56 ++++++++++++++++++++++++++---------------------------- osdep.c | 20 +++++++++++++++++++ osdep.h | 1 qemu-doc.texi | 2 + vl.c | 28 +++++++++++++++++++++++---- 12 files changed, 110 insertions(+), 40 deletions(-) Index: qemu/block.c =================================================================== --- qemu.orig/block.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/block.c 2007-12-18 11:01:01.000000000 +0100 @@ -380,7 +380,7 @@ int bdrv_open2(BlockDriverState *bs, con /* Note: for compatibility, we open disk image files as RDWR, and RDONLY as fallback */ if (!(flags & BDRV_O_FILE)) - open_flags = BDRV_O_RDWR; + open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); ret = drv->bdrv_open(bs, filename, open_flags); Index: qemu/block.h =================================================================== --- qemu.orig/block.h 2007-12-18 10:58:14.000000000 +0100 +++ qemu/block.h 2007-12-18 11:01:01.000000000 +0100 @@ -44,6 +44,7 @@ typedef struct QEMUSnapshotInfo { use a disk image format on top of it (default for bdrv_file_open()) */ +#define BDRV_O_DIRECT 0x0020 #ifndef QEMU_IMG void bdrv_info(void); Index: qemu/hw/fdc.c =================================================================== --- qemu.orig/hw/fdc.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/hw/fdc.c 2007-12-18 11:01:01.000000000 +0100 @@ -382,7 +382,7 @@ struct fdctrl_t { uint8_t cur_drv; uint8_t bootsel; /* Command FIFO */ - uint8_t fifo[FD_SECTOR_LEN]; + uint8_t *fifo; uint32_t data_pos; uint32_t data_len; uint8_t data_state; @@ -598,6 +598,11 @@ fdctrl_t *fdctrl_init (qemu_irq irq, int fdctrl = qemu_mallocz(sizeof(fdctrl_t)); if (!fdctrl) return NULL; + fdctrl->fifo = qemu_memalign(512, FD_SECTOR_LEN); + if (fdctrl->fifo == NULL) { + qemu_free(fdctrl); + return NULL; + } fdctrl->result_timer = qemu_new_timer(vm_clock, fdctrl_result_timer, fdctrl); Index: qemu/hw/ide.c =================================================================== --- qemu.orig/hw/ide.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/hw/ide.c 2007-12-18 11:01:01.000000000 +0100 @@ -365,7 +365,7 @@ typedef struct IDEState { EndTransferFunc *end_transfer_func; uint8_t *data_ptr; uint8_t *data_end; - uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4]; + uint8_t *io_buffer; QEMUTimer *sector_write_timer; /* only used for win2k install hack */ uint32_t irq_count; /* counts IRQs when using win2k install hack */ /* CF-ATA extended error */ @@ -2377,17 +2377,24 @@ struct partition { static int guess_disk_lchs(IDEState *s, int *pcylinders, int *pheads, int *psectors) { - uint8_t buf[512]; + uint8_t *buf; int ret, i, heads, sectors, cylinders; struct partition *p; uint32_t nr_sects; + buf = qemu_memalign(512, 512); + if (buf == NULL) + return -1; ret = bdrv_read(s->bs, 0, buf, 1); - if (ret < 0) + if (ret < 0) { + qemu_free(buf); return -1; + } /* test msdos magic */ - if (buf[510] != 0x55 || buf[511] != 0xaa) + if (buf[510] != 0x55 || buf[511] != 0xaa) { + qemu_free(buf); return -1; + } for(i = 0; i < 4; i++) { p = ((struct partition *)(buf + 0x1be)) + i; nr_sects = le32_to_cpu(p->nr_sects); @@ -2408,9 +2415,11 @@ static int guess_disk_lchs(IDEState *s, printf("guessed geometry: LCHS=%d %d %d\n", cylinders, heads, sectors); #endif + qemu_free(buf); return 0; } } + qemu_free(buf); return -1; } @@ -2425,6 +2434,7 @@ static void ide_init2(IDEState *ide_stat for(i = 0; i < 2; i++) { s = ide_state + i; + s->io_buffer = qemu_memalign(512, MAX_MULT_SECTORS*512 + 4); if (i == 0) s->bs = hd0; else Index: qemu/hw/scsi-disk.c =================================================================== --- qemu.orig/hw/scsi-disk.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/hw/scsi-disk.c 2007-12-18 11:01:01.000000000 +0100 @@ -46,7 +46,7 @@ typedef struct SCSIRequest { int sector_count; /* The amounnt of data in the buffer. */ int buf_len; - uint8_t dma_buf[SCSI_DMA_BUF_SIZE]; + uint8_t *dma_buf; BlockDriverAIOCB *aiocb; struct SCSIRequest *next; } SCSIRequest; @@ -78,6 +78,7 @@ static SCSIRequest *scsi_new_request(SCS free_requests = r->next; } else { r = qemu_malloc(sizeof(SCSIRequest)); + r->dma_buf = qemu_memalign(512, SCSI_DMA_BUF_SIZE); } r->dev = s; r->tag = tag; Index: qemu/hw/sd.c =================================================================== --- qemu.orig/hw/sd.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/hw/sd.c 2007-12-18 11:19:19.000000000 +0100 @@ -96,6 +96,7 @@ struct SDState { qemu_irq readonly_cb; qemu_irq inserted_cb; BlockDriverState *bdrv; + uint8_t *buf; }; static void sd_set_status(SDState *sd) @@ -405,6 +406,7 @@ SDState *sd_init(BlockDriverState *bs, i SDState *sd; sd = (SDState *) qemu_mallocz(sizeof(SDState)); + sd->buf = qemu_memalign(512, 512); sd->spi = is_spi; sd_reset(sd, bs); bdrv_set_change_cb(sd->bdrv, sd_cardchange, sd); @@ -1281,64 +1283,60 @@ int sd_do_command(SDState *sd, struct sd } /* No real need for 64 bit addresses here */ -static void sd_blk_read(BlockDriverState *bdrv, - void *data, uint32_t addr, uint32_t len) +static void sd_blk_read(SDState *sd) { - uint8_t buf[512]; - uint32_t end = addr + len; + uint32_t addr = sd->data_start; + uint32_t end = addr + sd->blk_len; - if (!bdrv || bdrv_read(bdrv, addr >> 9, buf, 1) == -1) { + if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) { printf("sd_blk_read: read error on host side\n"); return; } if (end > (addr & ~511) + 512) { - memcpy(data, buf + (addr & 511), 512 - (addr & 511)); + memcpy(sd->data, sd->buf + (addr & 511), 512 - (addr & 511)); - if (bdrv_read(bdrv, end >> 9, buf, 1) == -1) { + if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) { printf("sd_blk_read: read error on host side\n"); return; } - memcpy(data + 512 - (addr & 511), buf, end & 511); + memcpy(sd->data + 512 - (addr & 511), sd->buf, end & 511); } else - memcpy(data, buf + (addr & 511), len); + memcpy(sd->data, sd->buf + (addr & 511), sd->blk_len); } -static void sd_blk_write(BlockDriverState *bdrv, - void *data, uint32_t addr, uint32_t len) +static void sd_blk_write(SDState *sd) { - uint8_t buf[512]; - uint32_t end = addr + len; + uint32_t addr = sd->data_start; + uint32_t end = addr + sd->blk_len; - if ((addr & 511) || len < 512) - if (!bdrv || bdrv_read(bdrv, addr >> 9, buf, 1) == -1) { + if ((addr & 511) || sd->blk_len < 512) + if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) { printf("sd_blk_write: read error on host side\n"); return; } if (end > (addr & ~511) + 512) { - memcpy(buf + (addr & 511), data, 512 - (addr & 511)); - if (bdrv_write(bdrv, addr >> 9, buf, 1) == -1) { + memcpy(sd->buf + (addr & 511), sd->data, 512 - (addr & 511)); + if (bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1) { printf("sd_blk_write: write error on host side\n"); return; } - if (bdrv_read(bdrv, end >> 9, buf, 1) == -1) { + if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) { printf("sd_blk_write: read error on host side\n"); return; } - memcpy(buf, data + 512 - (addr & 511), end & 511); - if (bdrv_write(bdrv, end >> 9, buf, 1) == -1) + memcpy(sd->buf, sd->data + 512 - (addr & 511), end & 511); + if (bdrv_write(sd->bdrv, end >> 9, sd->buf, 1) == -1) printf("sd_blk_write: write error on host side\n"); } else { - memcpy(buf + (addr & 511), data, len); - if (!bdrv || bdrv_write(bdrv, addr >> 9, buf, 1) == -1) + memcpy(sd->buf + (addr & 511), sd->data, sd->blk_len); + if (!sd->bdrv || bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1) printf("sd_blk_write: write error on host side\n"); } } -#define BLK_READ_BLOCK(a, len) sd_blk_read(sd->bdrv, sd->data, a, len) -#define BLK_WRITE_BLOCK(a, len) sd_blk_write(sd->bdrv, sd->data, a, len) #define APP_READ_BLOCK(a, len) memset(sd->data, 0xec, len) #define APP_WRITE_BLOCK(a, len) @@ -1363,7 +1361,7 @@ void sd_write_data(SDState *sd, uint8_t if (sd->data_offset >= sd->blk_len) { /* TODO: Check CRC before committing */ sd->state = sd_programming_state; - BLK_WRITE_BLOCK(sd->data_start, sd->data_offset); + sd_blk_write(sd); sd->blk_written ++; sd->csd[14] |= 0x40; /* Bzzzzzzztt .... Operation complete. */ @@ -1376,7 +1374,7 @@ void sd_write_data(SDState *sd, uint8_t if (sd->data_offset >= sd->blk_len) { /* TODO: Check CRC before committing */ sd->state = sd_programming_state; - BLK_WRITE_BLOCK(sd->data_start, sd->data_offset); + sd_blk_write(sd); sd->blk_written ++; sd->data_start += sd->blk_len; sd->data_offset = 0; @@ -1497,7 +1495,7 @@ uint8_t sd_read_data(SDState *sd) case 11: /* CMD11: READ_DAT_UNTIL_STOP */ if (sd->data_offset == 0) - BLK_READ_BLOCK(sd->data_start, sd->blk_len); + sd_blk_read(sd); ret = sd->data[sd->data_offset ++]; if (sd->data_offset >= sd->blk_len) { @@ -1519,7 +1517,7 @@ uint8_t sd_read_data(SDState *sd) case 17: /* CMD17: READ_SINGLE_BLOCK */ if (sd->data_offset == 0) - BLK_READ_BLOCK(sd->data_start, sd->blk_len); + sd_blk_read(sd); ret = sd->data[sd->data_offset ++]; if (sd->data_offset >= sd->blk_len) @@ -1528,7 +1526,7 @@ uint8_t sd_read_data(SDState *sd) case 18: /* CMD18: READ_MULTIPLE_BLOCK */ if (sd->data_offset == 0) - BLK_READ_BLOCK(sd->data_start, sd->blk_len); + sd_blk_read(sd); ret = sd->data[sd->data_offset ++]; if (sd->data_offset >= sd->blk_len) { Index: qemu/osdep.c =================================================================== --- qemu.orig/osdep.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/osdep.c 2007-12-18 11:01:01.000000000 +0100 @@ -61,6 +61,10 @@ void *qemu_malloc(size_t size) } #if defined(_WIN32) +void *qemu_memalign(size_t alignment, size_t size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} void *qemu_vmalloc(size_t size) { @@ -172,6 +176,22 @@ static void kqemu_vfree(void *ptr) #endif +void *qemu_memalign(size_t alignment, size_t size) +{ +#if defined(_POSIX_C_SOURCE) + int ret; + void *ptr; + ret = posix_memalign(&ptr, alignment, size); + if (ret != 0) + return NULL; + return ptr; +#elif defined(_BSD) + return valloc(size); +#else + return memalign(alignment, size); +#endif +} + /* alloc shared memory pages */ void *qemu_vmalloc(size_t size) { Index: qemu/osdep.h =================================================================== --- qemu.orig/osdep.h 2007-12-18 10:58:14.000000000 +0100 +++ qemu/osdep.h 2007-12-18 11:01:01.000000000 +0100 @@ -48,6 +48,7 @@ void *qemu_mallocz(size_t size); void qemu_free(void *ptr); char *qemu_strdup(const char *str); +void *qemu_memalign(size_t alignment, size_t size); void *qemu_vmalloc(size_t size); void qemu_vfree(void *ptr); Index: qemu/vl.c =================================================================== --- qemu.orig/vl.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/vl.c 2007-12-18 11:01:01.000000000 +0100 @@ -4880,8 +4880,11 @@ static int drive_init(const char *str, i BlockDriverState *bdrv; int max_devs; int index; + int cache; + int bdrv_flags; char *params[] = { "bus", "unit", "if", "index", "cyls", "heads", - "secs", "trans", "media", "snapshot", "file", NULL }; + "secs", "trans", "media", "snapshot", "file", + "cache", NULL }; if (check_params(buf, sizeof(buf), params, str) < 0) { fprintf(stderr, "qemu: unknowm parameter '%s' in '%s'\n", @@ -4895,6 +4898,7 @@ static int drive_init(const char *str, i unit_id = -1; translation = BIOS_ATA_TRANSLATION_AUTO; index = -1; + cache = 1; if (!strcmp(machine->name, "realview") || !strcmp(machine->name, "SS-5") || @@ -5037,6 +5041,17 @@ static int drive_init(const char *str, i } } + if (get_param_value(buf, sizeof(buf), "cache", str)) { + if (!strcmp(buf, "off")) + cache = 0; + else if (!strcmp(buf, "on")) + cache = 1; + else { + fprintf(stderr, "qemu: invalid cache option\n"); + return -1; + } + } + get_param_value(file, sizeof(file), "file", str); /* compute bus and unit according index */ @@ -5127,8 +5142,12 @@ static int drive_init(const char *str, i } if (!file[0]) return 0; - if (bdrv_open(bdrv, file, snapshot ? BDRV_O_SNAPSHOT : 0) < 0 || - qemu_key_check(bdrv, file)) { + bdrv_flags = 0; + if (snapshot) + bdrv_flags |= BDRV_O_SNAPSHOT; + if (!cache) + bdrv_flags |= BDRV_O_DIRECT; + if (bdrv_open(bdrv, file, bdrv_flags) < 0 || qemu_key_check(bdrv, file)) { fprintf(stderr, "qemu: could not open disk image %s\n", file); return -1; @@ -7476,7 +7495,8 @@ static void help(int exitcode) "-hdc/-hdd file use 'file' as IDE hard disk 2/3 image\n" "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n" "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]\n" - " [,cyls=c,heads=h,secs=s[,trans=t]][snapshot=on|off]\n" + " [,cyls=c,heads=h,secs=s[,trans=t]][snapshot=on|off]" + " [,cache=on|off]\n" " use 'file' as a drive image\n" "-mtdblock file use 'file' as on-board Flash memory image\n" "-sd file use 'file' as SecureDigital card image\n" Index: qemu/block-raw-posix.c =================================================================== --- qemu.orig/block-raw-posix.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/block-raw-posix.c 2007-12-18 15:06:21.000000000 +0100 @@ -106,6 +106,10 @@ static int raw_open(BlockDriverState *bs } if (flags & BDRV_O_CREAT) open_flags |= O_CREAT | O_TRUNC; +#ifdef O_DIRECT + if (flags & BDRV_O_DIRECT) + open_flags |= O_DIRECT; +#endif s->type = FTYPE_FILE; @@ -659,6 +663,10 @@ static int hdev_open(BlockDriverState *b open_flags |= O_RDONLY; bs->read_only = 1; } +#ifdef O_DIRECT + if (flags & BDRV_O_DIRECT) + open_flags |= O_DIRECT; +#endif s->type = FTYPE_FILE; #if defined(__linux__) Index: qemu/block-raw-win32.c =================================================================== --- qemu.orig/block-raw-win32.c 2007-12-18 10:58:14.000000000 +0100 +++ qemu/block-raw-win32.c 2007-12-18 15:05:22.000000000 +0100 @@ -105,6 +105,8 @@ static int raw_open(BlockDriverState *bs #else overlapped = FILE_FLAG_OVERLAPPED; #endif + if (flags & BDRV_O_DIRECT) + overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); @@ -473,6 +475,8 @@ static int hdev_open(BlockDriverState *b #else overlapped = FILE_FLAG_OVERLAPPED; #endif + if (flags & BDRV_O_DIRECT) + overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, create_flags, overlapped, NULL); Index: qemu/qemu-doc.texi =================================================================== --- qemu.orig/qemu-doc.texi 2007-12-18 14:51:12.000000000 +0100 +++ qemu/qemu-doc.texi 2007-12-18 15:00:27.000000000 +0100 @@ -250,6 +250,8 @@ This option defines the type of the medi These options have the same definition as they have in @option{-hdachs}. @item [EMAIL PROTECTED] @var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}). [EMAIL PROTECTED] [EMAIL PROTECTED] [EMAIL PROTECTED] is "on" or "off" and allows to disable host cache to access data. @end table Instead of @option{-cdrom} you can use: