Hello.

Attached is a C file (and small patch) to add support for multi-file raw
images to QEMU.  The rationale (for me at least) is as follows:

I use rsync to backup my home directory.  The act of starting up QEMU
changes a 20GB file on my drive.  This causes 20GB of extra copying next
time I do backups.  If I could split the drive image into smaller parts
(maybe 2048 10MB files) then the amount of extra copying is drastically
reduced (since only a few of these files are modified).

There are definitely other reasons that this may be useful.

It works as follows:

1) Create a bunch of files of equal size with names of the form

      harddriveXX

   where XX is a hex number starting from 00 going to whatever.

NB: you can have any number of XXs from 0 (ie: basically a single-file
image) to 6 (ie: up to 16 million parts).

2) Run qemu multi:harddrive000

QEMU will detect the multi-part image, do a sanity check to ensure all
the parts are there, of the same size, and accessible and then start
using the device as the harddrive (this consists of calling 'stat' and
'access' on each file).


Some notes:
 o I've tested only on Linux.  I'm positive the code is not portable
   to other systems.  Feedback about this, please.

 o Included is optional support for limiting the number of open fds.
   Cache eviction is done using a least-recently-opened policy
   (efficiently implemented using a ring buffer).

 o The code makes use of the euidaccess() syscall which is Linux-only.
   BSD has eaccess() to do the same thing.  Both of these calls are
   approximately equal to POSIX access() except that the euid of the
   process is considered instead of the real uid.  The call is used
   to determine if the device should be marked 'read_only' by checking
   for write access to the files comprising the device.  If access()
   is used and QEMU is installed setuid/gid to give the user access to
   a drive image then the result of using access() will be that the
   drive is incorrectly flagged read-only.

 o If the files comprising the device are deleted (for example) while
   QEMU is running then this is quite bad.  Currently this will result
   in read/write requests returning -1.  Maybe it makes sense to panic
   and cause QEMU to exit.

 o All comments welcome.

Cheers.
/*
 * Block driver for multiple-file raw images.
 * Copyright © 2006 Ryan Lortie <[EMAIL PROTECTED]>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include "vl.h"
#include "block_int.h"

/* The maximum number of fds to use. */
#define LIMIT_FDS 15

#ifdef LIMIT_FDS

#define RING_BUFFER_SIZE LIMIT_FDS
struct _RingBuffer
{
  int buffer[RING_BUFFER_SIZE+1];
  int head;
  int tail;
};

typedef struct _RingBuffer RingBuffer;

static RingBuffer *
ring_buffer_new (void)
{
  RingBuffer *ring;

  ring = qemu_mallocz (sizeof (RingBuffer));

  return ring;
}

static int
ring_buffer_get (RingBuffer *ring)
{
  int value;

  if (ring->head == ring->tail)
    return -1;

  value = ring->buffer[ring->head];
  ring->head = (ring->head + 1) % (RING_BUFFER_SIZE + 1);

  return value;
}

static int
ring_buffer_put (RingBuffer *ring, int value)
{
  int new_tail;

  new_tail = (ring->tail + 1) % (RING_BUFFER_SIZE + 1);

  if (ring->head == new_tail)
    return -1;

  ring->buffer[ring->tail] = value;
  ring->tail = new_tail;

  return 0;
}
#endif

struct _MultiStatus
{
  char *level;
  int *fds;

  char *filename;
  int fnlength;
  char pattern[5];

  int64_t filesize;
  int n_files;
  int read_only;
};

typedef struct _MultiStatus MultiStatus;

typedef ssize_t (*disk_operation) (int, const uint8_t *, ssize_t);

static int
multi_status_fill (MultiStatus *status, const char *filename)
{
  struct stat statbuf;
  int zeros = 0;
  int i;

  if (!strstart(filename, "multi:", NULL))
    return -1;

  filename += 6;

  for (i = 0; filename[i]; i++)
    if (filename[i] == '0')
      zeros++;
    else
      zeros = 0;

  if (zeros > 6)
    zeros = 6;

  status->fnlength = strlen (filename);
  status->filename = qemu_malloc (status->fnlength + 1);
  status->fnlength -= zeros;
  strcpy (status->filename, filename);

  if (zeros == 0)
    status->pattern[status->fnlength] = '\0';
  else
    sprintf (status->pattern, "%%0%ux", zeros);

  status->n_files = 1 << (4 * zeros);

  status->read_only = 0;
  for (i = 0; i < status->n_files; i++)
  {
    sprintf (status->filename + status->fnlength, status->pattern, i);

    if (stat (status->filename, &statbuf))
    {
      if (errno == ENOENT)
        status->n_files = i;
      break;
    }

    if (euidaccess (status->filename, W_OK))
      /* file is read-only */
      status->read_only = 1;

    if (i == 0)
      status->filesize = statbuf.st_size / 512;

    if (status->filesize * 512 != statbuf.st_size)
      break;
  }

  if (i != status->n_files || i == 0)
  {
    qemu_free (status->filename);
    return -1;
  }

  status->fds = qemu_malloc (sizeof (int) * i);
  status->level = qemu_mallocz (i);

  return 0;
}

static void
multi_status_clear (MultiStatus *status)
{
  qemu_free (status->fds);
  qemu_free (status->level);
  qemu_free (status->filename);
}

static int
multi_get_fd (MultiStatus *status, int fileno, char level)
{
  int mode;
#ifdef LIMIT_FDS
  static RingBuffer *fd_buffer;

  if (fd_buffer == NULL)
    fd_buffer = ring_buffer_new ();
#endif

  if (level <= status->level[fileno])
    return status->fds[fileno];

  if (status->level[fileno])
    close (status->fds[fileno]);
#ifdef LIMIT_FDS
  else
    /* add the new open file to the ring buffer */
    if (ring_buffer_put (fd_buffer, fileno))
    {
      /* no room in the ring buffer -- close a file */
      int closefile;

      closefile = ring_buffer_get (fd_buffer);
      close (status->fds[closefile]);
      status->level[closefile] = '\0';

      /* there will be room for us now. */
      ring_buffer_put (fd_buffer, fileno);
    }
#endif

  if (level == 'w')
    mode = O_RDWR;
  else
    mode = O_RDONLY;

  sprintf (status->filename + status->fnlength, status->pattern, fileno);
  status->fds[fileno] = open (status->filename, mode);

  status->level[fileno] = level;

  /* this might return -1.  this should really be fatal since we're
   * in quite bad shape at this point... */
  return status->fds[fileno];
}

/* the 'const' in the type signature is a lie if (type == 'r') */
static int
multi_disk_operation (BlockDriverState *bs, int64_t sector,
                      const uint8_t *buffer, int sectors,
                      char type)
{
  MultiStatus *status = bs->opaque;
  disk_operation operation;
  uint64_t file, offset;
  uint64_t this_access;
  ssize_t size;
  int fd;

  switch (type)
  {
    case 'r':
      operation = (disk_operation) read;
      break;

    case 'w':
      operation = (disk_operation) write;
      break;

    default:
      return -1;
  }

  while (sectors)
  {
    offset = sector % status->filesize;
    file = sector / status->filesize;

    if (file > status->n_files)
      return -1;

    /* maximum possible sectors left in current file */
    this_access = status->filesize - offset;

    /* access no more than the remaining sectors */
    if (this_access > sectors)
      this_access = sectors;

    /* get the fd of the file in question (possibly opening it) */
    if ((fd = multi_get_fd (status, file, type)) < 0)
      return -1;

    if (lseek (fd, offset * 512, SEEK_SET) < 0)
      return -1;

    size = operation (fd, buffer, this_access * 512);

    /* make sure we read a whole positive number of sectors */
    if (size <= 0 || (size & 511))
      return -1;

    sectors -= (size / 512);
    sector += (size / 512);
    buffer += size;
  }

  return 0;
}

/* --- begin BlockDriver methods --- */
static int
multi_probe (const uint8_t *buffer, int buffer_size, const char *filename)
{
  MultiStatus status;

  if (multi_status_fill (&status, filename))
    return 0;

  multi_status_clear (&status);

  return 1000;
}

static int
multi_open (BlockDriverState *bs, const char *filename)
{
  MultiStatus *status = bs->opaque;

  if (multi_status_fill (status, filename))
    return -1;

  bs->total_sectors = status->n_files * status->filesize;
  bs->read_only = status->read_only;

  return 0;
}

static int
multi_read (BlockDriverState *bs, int64_t sector,
            uint8_t *buf, int sectors)
{
  return multi_disk_operation (bs, sector, buf, sectors, 'r');
}

static int
multi_write (BlockDriverState *bs, int64_t sector,
             const uint8_t *buf, int sectors)
{
  return multi_disk_operation (bs, sector, buf, sectors, 'w');
}


static void
multi_close (BlockDriverState *bs)
{
  MultiStatus *status = bs->opaque;
  int i;

  for (i = 0; i < status->n_files; i++)
    if (status->level[i])
      close (status->fds[i]);

  multi_status_clear (status);
}

BlockDriver bdrv_multi = {
  "multi",
  sizeof (MultiStatus),
  multi_probe,
  multi_open,
  multi_read,
  multi_write,
  multi_close
};
? block-multi.c
Index: Makefile
===================================================================
RCS file: /sources/qemu/qemu/Makefile,v
retrieving revision 1.99
diff -u -p -r1.99 Makefile
--- Makefile	30 Apr 2006 23:54:18 -0000	1.99
+++ Makefile	11 May 2006 06:09:13 -0000
@@ -22,7 +22,7 @@ all: dyngen$(EXESUF) $(TOOLS) $(DOCS)
 	$(MAKE) -C $$d $@ || exit 1 ; \
         done
 
-qemu-img$(EXESUF): qemu-img.c block.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c
+qemu-img$(EXESUF): qemu-img.c block.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c block-multi.c
 	$(CC) -DQEMU_TOOL $(CFLAGS) $(LDFLAGS) $(DEFINES) -o $@ $^ -lz $(LIBS)
 
 dyngen$(EXESUF): dyngen.c
Index: Makefile.target
===================================================================
RCS file: /sources/qemu/qemu/Makefile.target,v
retrieving revision 1.108
diff -u -p -r1.108 Makefile.target
--- Makefile.target	3 May 2006 22:02:44 -0000	1.108
+++ Makefile.target	11 May 2006 06:09:13 -0000
@@ -273,7 +273,7 @@ endif
 
 # must use static linking to avoid leaving stuff in virtual address space
 VL_OBJS=vl.o osdep.o block.o readline.o monitor.o pci.o console.o loader.o
-VL_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
+VL_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o block-dmg.o block-bochs.o block-vpc.o block-vvfat.o block-multi.o
 ifdef CONFIG_WIN32
 VL_OBJS+=tap-win32.o
 endif
Index: block.c
===================================================================
RCS file: /sources/qemu/qemu/block.c,v
retrieving revision 1.26
diff -u -p -r1.26 block.c
--- block.c	25 Apr 2006 22:36:06 -0000	1.26
+++ block.c	11 May 2006 06:09:14 -0000
@@ -794,4 +794,5 @@ void bdrv_init(void)
     bdrv_register(&bdrv_bochs);
     bdrv_register(&bdrv_vpc);
     bdrv_register(&bdrv_vvfat);
+    bdrv_register(&bdrv_multi);
 }
Index: vl.h
===================================================================
RCS file: /sources/qemu/qemu/vl.h,v
retrieving revision 1.118
diff -u -p -r1.118 vl.h
--- vl.h	10 May 2006 22:17:36 -0000	1.118
+++ vl.h	11 May 2006 06:09:15 -0000
@@ -477,6 +477,7 @@ extern BlockDriver bdrv_dmg;
 extern BlockDriver bdrv_bochs;
 extern BlockDriver bdrv_vpc;
 extern BlockDriver bdrv_vvfat;
+extern BlockDriver bdrv_multi;
 
 void bdrv_init(void);
 BlockDriver *bdrv_find_format(const char *format_name);

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
Qemu-devel mailing list
Qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel

Reply via email to