Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Wed, Oct 29, 2008 at 12:27:19PM +, Mark McLoughlin wrote:
> On Wed, 2008-10-29 at 14:20 +0200, [EMAIL PROTECTED] wrote:
> > diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
> > index b9067b8..27d5f02 100644
> > --- a/qemu/hw/piix_pci.c
> > +++ b/qemu/hw/piix_pci.c
> > @@ -246,9 +246,9 @@ static void piix3_set_irq(qemu_irq *pic, int
> > irq_num, int level)
> >  int piix_get_irq(int pin)
> >  {
> >  if (piix3_dev)
> > -return piix3_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> > +return piix3_dev->config[0x60+pin];
> >  if (piix4_dev)
> > -return piix4_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> > +return piix4_dev->config[0x60+pin];
> >  
> >  return 0;
> >  }
> 
> Another rebase mixup?

Argh. Indeed. Here's 5/6 again without the offending hunk. Hopefully
that's the last one for today or I might have to return my "git
competency" boy scout badge.

>From 1dd6f84986a4224635ad5a6f9edfa57b1c5a1e7b Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
Date: Wed, 29 Oct 2008 14:12:08 +0200
Subject: [PATCH 5/6] device assignment: support for assigning PCI devices to 
guests

This patch has been contributed to by the following people:

Or Sagi <[EMAIL PROTECTED]>
Nir Peleg <[EMAIL PROTECTED]>
Amit Shah <[EMAIL PROTECTED]>
Ben-Ami Yassour <[EMAIL PROTECTED]>
Weidong Han <[EMAIL PROTECTED]>
Glauber de Oliveira Costa <[EMAIL PROTECTED]>
Muli Ben-Yehuda <[EMAIL PROTECTED]>

With this patch, we can assign a device on the host machine to a
guest.

A new command-line option, -pcidevice is added.
To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this:

-pcidevice host=04:08.0

* The host driver for the device, if any, is to be removed before
assigning the device (else device assignment will fail).

* A device that shares IRQ with another host device cannot currently
be assigned.

* The RAW_IO capability is needed for this to work

This works only with the in-kernel irqchip method; to use the
userspace irqchip, a kernel module (irqhook) and some extra changes
are needed.

Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
---
 qemu/Makefile.target|3 +
 qemu/configure  |   21 ++
 qemu/hw/device-assignment.c |  616 +++
 qemu/hw/device-assignment.h |  106 
 qemu/hw/pc.c|   18 ++
 qemu/hw/pci.c   |8 +
 qemu/hw/piix_pci.c  |4 +-
 qemu/qemu-kvm.c |   13 +
 qemu/qemu-kvm.h |8 +
 qemu/vl.c   |   26 ++
 10 files changed, 821 insertions(+), 2 deletions(-)
 create mode 100644 qemu/hw/device-assignment.c
 create mode 100644 qemu/hw/device-assignment.h

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index d9bdeca..64d4e44 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+ifeq ($(USE_KVM_DEVICE_ASSIGNMENT), 1)
+OBJS+= device-assignment.o
+endif
 ifeq ($(USE_KVM_PIT), 1)
 OBJS+= i8254-kvm.o
 endif
diff --git a/qemu/configure b/qemu/configure
index 922a156..618dbce 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -101,6 +101,7 @@ linux="no"
 kqemu="no"
 kvm="no"
 kvm_cap_pit="no"
+kvm_cap_device_assignment="no"
 profiler="no"
 kernel_path=""
 cocoa="no"
@@ -749,6 +750,9 @@ fi
 # KVM probe
 
 if test "$kvm" = "yes" ; then
+
+# test for KVM_CAP_PIT
+
 cat > $TMPC <
 #ifndef KVM_CAP_PIT
@@ -759,6 +763,19 @@ EOF
 if $cc $ARCH_CFLAGS $CFLAGS -I"$kernel_path"/include -o $TMPE ${OS_CFLAGS} 
$TMPC 2> /dev/null ; then
kvm_cap_pit="yes"
 fi
+
+# test for KVM_CAP_DEVICE_ASSIGNMENT
+
+cat > $TMPC <
+#ifndef KVM_CAP_DEVICE_ASSIGNMENT
+#error "kvm no device assignment capability"
+#endif
+int main(void) { return 0; }
+EOF
+if $cc $ARCH_CFLAGS $CFLAGS -I"$kernel_path"/include -o $TMPE ${OS_CFLAGS} 
$TMPC 2> /dev/null ; then
+   kvm_cap_device_assignment="yes"
+fi
 fi
 
 ##
@@ -1515,6 +1532,10 @@ configure_kvm() {
echo "USE_KVM_PIT=1" >> $config_mak
echo "#define USE_KVM_PIT 1" >> $config_h
 fi
+if test $kvm_cap_device_assignment = "yes" ; then
+   echo "USE_KVM_DEVICE_ASSIGNMENT=1" >> $config_mak
+   echo "#define USE_KVM_DEVICE_ASSIGNMENT 1" >> $config_h
+fi
 disable_cpu_emulation
   fi
 }
diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
new file mode 100644
index 000..78b7e14
--- /dev/null
+++ b/qemu/hw/device-assignment.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as pub

Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Mark McLoughlin
On Wed, 2008-10-29 at 14:20 +0200, [EMAIL PROTECTED] wrote:
> diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
> index b9067b8..27d5f02 100644
> --- a/qemu/hw/piix_pci.c
> +++ b/qemu/hw/piix_pci.c
> @@ -246,9 +246,9 @@ static void piix3_set_irq(qemu_irq *pic, int
> irq_num, int level)
>  int piix_get_irq(int pin)
>  {
>  if (piix3_dev)
> -return piix3_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> +return piix3_dev->config[0x60+pin];
>  if (piix4_dev)
> -return piix4_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> +return piix4_dev->config[0x60+pin];
>  
>  return 0;
>  }

Another rebase mixup?

Cheers,
Mark.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Wed, Oct 29, 2008 at 11:15:10AM +, Mark McLoughlin wrote:
> On Wed, 2008-10-29 at 12:31 +0200, Muli Ben-Yehuda wrote:
> > On Tue, Oct 28, 2008 at 04:55:22PM +, Mark McLoughlin wrote:
> 
> > > nr_assigned_devices isn't actually used anywhere.
> > 
> > Nuked.
> 
> Still there.
> 
> > > > +#define MAX_IO_REGIONS (6)
> > > 
> > > Perhaps a comment to say this is the number of BARs in the config space
> > > header?
> > 
> > Sure, comments are cheap.
> 
> You didn't add one though :-)
> 
> > > > +
> > > > +if (kvm_enabled() && device_assignment_enabled) {
> > > 
> > > The device_assignment_enabled flag looks like it shouldn't be needed.
> > > 
> > > If assigned_devices_index remains zero, nothing should happen
> > > anyway.
> > 
> > Nuked.
> 
> Still there.

Sorry, I messed up the rebase! v10 coming up in a few minutes.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
   <->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Mark McLoughlin
On Wed, 2008-10-29 at 12:31 +0200, Muli Ben-Yehuda wrote:
> On Tue, Oct 28, 2008 at 04:55:22PM +, Mark McLoughlin wrote:

> > nr_assigned_devices isn't actually used anywhere.
> 
> Nuked.

Still there.

> > > +#define MAX_IO_REGIONS (6)
> > 
> > Perhaps a comment to say this is the number of BARs in the config space
> > header?
> 
> Sure, comments are cheap.

You didn't add one though :-)

> > > +
> > > +if (kvm_enabled() && device_assignment_enabled) {
> > 
> > The device_assignment_enabled flag looks like it shouldn't be needed.
> > 
> > If assigned_devices_index remains zero, nothing should happen
> > anyway.
> 
> Nuked.

Still there.

Cheers,
Mark.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Mark McLoughlin
On Wed, 2008-10-29 at 12:31 +0200, Muli Ben-Yehuda wrote:
> On Tue, Oct 28, 2008 at 04:55:22PM +, Mark McLoughlin wrote:
> > On Tue, 2008-10-28 at 12:06 +0200, [EMAIL PROTECTED] wrote:

> > > +void assigned_dev_set_vector(int irq, int vector);
> > > +void assigned_dev_ack_mirq(int vector);
...
> > > +extern const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE];
> > > +extern int assigned_devices_index;
> > 
> > Neither of these two are implemented anywhere.
> 
> Actually they are, we use them to pass the arguments from main to
> pc.c.

Heh, sorry ... I added the comment in the wrong place :-)

assigned_dev_{set_vector,ack_mirq} aren't implemented anywhere

Cheers,
Mark.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Wed, Oct 29, 2008 at 12:25:50PM +0200, Muli Ben-Yehuda wrote:

> > > You still succeed if KVM_CAP_DEVICE_ASSIGNMENT isn't defined?
> > > That means a newer userspace compiled on an older kernel will
> > > silently fail if they try to do device assignment.  There's
> > > probably no reason to build this file if
> > > KVM_CAP_DEVICE_ASSIGNMENT isn't defined (see how the in-kernel
> > > PIT gets conditionally build depending on whether that cap is
> > > available).
> > 
> > Ok, I'll take a look at this.
> 
> I reworked it per your suggestion so that device assignment is a kvm
> only feature for now. I am pretty sure Amit intended for the patches
> to support device assignment without kvm too, but getting rid of it
> did make things simpler.

By the way, one thing I ran into here is that we check the PIT and
DEVICE_ASSIGNMENT capabilities based on the kernel headers under
kernel/ first, and sync the kernel headers from the patched kernel
tree into kernel/ second.  In other words, if you configure userspace
with a patched kernel tree and just edit include/linux/kvm.h to have
or not have the capability, the build will use the old copy of the
header first, and only pick up the new copy later after it resynch's
the header. Seems rather counter-intuitive.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
   <->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Tue, Oct 28, 2008 at 04:55:22PM +, Mark McLoughlin wrote:
> On Tue, 2008-10-28 at 12:06 +0200, [EMAIL PROTECTED] wrote:
> ...
> > +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
> > +   uint8_t r_dev, uint8_t r_func)
> > +{
> > +char dir[128], name[128];
> > +int fd, r = 0;
> > +FILE *f;
> > +unsigned long long start, end, size, flags;
> > +PCIRegion *rp;
> > +PCIDevRegions *dev = &pci_dev->real_device;
> > +
> > +dev->region_number = 0;
> > +
> > +snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/:%02x:%02x.%x/",
> > +r_bus, r_dev, r_func);
> > +
> > +snprintf(name, sizeof(name), "%sconfig", dir);
> > +
> > +fd = open(name, O_RDWR);
> > +if (fd == -1) {
> > +fprintf(stderr, "%s: %s: %m\n", __func__, name);
> > +return 1;
> > +}
> > +dev->config_fd = fd;
> > +again:
> > +r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
> > +if (r < 0) {
> > +if (errno == EINTR || errno == EAGAIN)
> > +goto again;
> > +fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
> > +}
> > +
> > +snprintf(name, sizeof(name), "%sresource", dir);
> > +
> > +f = fopen(name, "r");
> > +if (f == NULL) {
> > +fprintf(stderr, "%s: %s: %m\n", __func__, name);
> > +return 1;
> > +}
> > +r = -1;
> > +while (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3) {
> > +r++;
> > +rp = dev->regions + r;
> 
> Could, in theory, overflow dev->regions here. Suggest:
> 
> +for (r = 0; r < MAX_IO_REGIONS; r++) {
> +if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
> +break;

Fixed, thanks Mark. I think it also uncovered a buglet where we would
skip the last region with the original code, which should be ok now.

> > +rp->valid = 0;
> > +size = end - start + 1;
> > +flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
> > +if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
> > +continue;
> > +if (flags & IORESOURCE_MEM) {
> > +flags &= ~IORESOURCE_IO;
> > +   snprintf(name, sizeof(name), "%sresource%d", dir, r);
> > +fd = open(name, O_RDWR);
> > +if (fd == -1)
> > +continue;   /* probably ROM */
> > +rp->resource_fd = fd;
> > +} else
> > +flags &= ~IORESOURCE_PREFETCH;
> > +
> > +rp->type = flags;
> > +rp->valid = 1;
> > +rp->base_addr = start;
> > +rp->size = size;
> > +DEBUG("region %d size %d start 0x%x type %d resource_fd %d\n",
> > +  r, rp->size, start, rp->type, rp->resource_fd);
> > +}
> > +fclose(f);
> > +
> > +dev->region_number = r;
> > +return 0;
> > +}
> > +
> > +static int disable_iommu;
> 
> Why is this global?
> 
> The flag is set per-device on the command-line and only affects whether
> we pass KVM_DEV_ASSIGN_ENABLE_IOMMU to kvm_assign_pci_device()

Made per-device by moving it to struct AssignedDevInfo.

> > +int nr_assigned_devices;
> > +static LIST_HEAD(, AssignedDevInfo) adev_head;
> > +
> > +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> > +{
> > +return (uint32_t)bus << 8 | (uint32_t)devfn;
> > +}
> > +
> > +static AssignedDevice *register_real_device(PCIBus *e_bus,
> > +const char *e_dev_name,
> > +int e_devfn, uint8_t r_bus,
> > +uint8_t r_dev, uint8_t r_func)
> > +{
> > +int r;
> > +AssignedDevice *pci_dev;
> > +uint8_t e_device, e_intx;
> > +
> > +DEBUG("Registering real physical device %s (devfn=0x%x)\n",
> > +  e_dev_name, e_devfn);
> > +
> > +pci_dev = (AssignedDevice *)
> > +pci_register_device(e_bus, e_dev_name, sizeof(AssignedDevice),
> > +e_devfn, assigned_dev_pci_read_config,
> > +assigned_dev_pci_write_config);
> > +if (NULL == pci_dev) {
> > +fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
> > +__func__, e_dev_name);
> > +return NULL;
> > +}
> > +if (get_real_device(pci_dev, r_bus, r_dev, r_func)) {
> > +fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
> > +__func__, e_dev_name);
> > +goto out;
> > +}
> > +
> > +/* handle real device's MMIO/PIO BARs */
> > +if (assigned_dev_register_regions(pci_dev->real_device.regions,
> > +  pci_dev->real_device.region_number,
> > +  pci_dev))
> > +goto out;
> > +
> > +/* handle interrupt routing */
> > +e_device = (pci_dev->dev.devfn >> 3) & 0x1f;
> > +e_intx = pci_dev->dev.config[0x3d] - 1;
> > +pci_dev->intpin = e

Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Wed, Oct 29, 2008 at 03:56:54PM +0800, Zhang, Xiantao wrote:
> Muli Ben-Yehuda wrote:
> > On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote:
> > 
> >>> +ifeq ($(USE_KVM), 1)
> >>> +OBJS+= device-assignment.o
> >>> +endif
> >> 
> >> I don't think you want to build this on PPC so I think you need a
> >> stronger check.
> > 
> > Good point. How about checking TARGET_BASE_ARCH = i386?
> 
> It should work for ia64, please include ia64 when you do it in this
> way. 

Fixing it to work for ia64 will take a few more changes than just
this, and I'd rather not do it now. This patchset has been through a
tremendous amount of churn, and I hope it can finally find its way
into the tree now. Once it's in, adding ia64 support will be a lot
simpler.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
   <->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Muli Ben-Yehuda
On Tue, Oct 28, 2008 at 05:53:05PM +0200, Muli Ben-Yehuda wrote:
> On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote:
> 
> >> +ifeq ($(USE_KVM), 1)
> >> +OBJS+= device-assignment.o
> >> +endif
> >
> > I don't think you want to build this on PPC so I think you need a
> > stronger check.
> 
> Good point. How about checking TARGET_BASE_ARCH = i386?

Turns out this stanza is already enclosed in exactly that.

> >> +#ifdef KVM_CAP_IOMMU
> >> +/* We always enable the IOMMU if present
> >> + * (or when not disabled on the command line)
> >> + */
> >> +r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> >> +if (r && !disable_iommu)
> >> +assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
> >> +#endif
> >> +r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
> >> +if (r < 0) {
> >> +fprintf(stderr, "Could not notify kernel about "
> >> +"assigned device \"%s\"\n", e_dev_name);
> >> +perror("register_real_device");
> >> +goto out;
> >> +}
> >> +}
> >>   
> >
> > You still succeed if KVM_CAP_DEVICE_ASSIGNMENT isn't defined?
> > That means a newer userspace compiled on an older kernel will
> > silently fail if they try to do device assignment.  There's
> > probably no reason to build this file if KVM_CAP_DEVICE_ASSIGNMENT
> > isn't defined (see how the in-kernel PIT gets conditionally build
> > depending on whether that cap is available).
> 
> Ok, I'll take a look at this.

I reworked it per your suggestion so that device assignment is a kvm
only feature for now. I am pretty sure Amit intended for the patches
to support device assignment without kvm too, but getting rid of it
did make things simpler.

cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
   <->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Han, Weidong
Muli Ben-Yehuda wrote:
> On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote:
> 
>>> +ifeq ($(USE_KVM), 1)
>>> +OBJS+= device-assignment.o
>>> +endif
>> 
>> I don't think you want to build this on PPC so I think you need a
>> stronger check.
> 
> Good point. How about checking TARGET_BASE_ARCH = i386?
> 
>>> +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
>>> +   uint32_t value)
>>> +{
>>> +AssignedDevRegion *r_access = opaque;
>>> +uint32_t r_pio = guest_to_host_ioport(r_access, addr); +
>>> +DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx
>>> value=%08x\n", +  r_pio, (int)r_access->e_physbase,
>>> +  (unsigned long)r_access->r_virtbase, value);
>>> 
>> 
>> The format doesn't match the parameter count.
> 
> Yep, already fixed.
> 
>>> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int
>>> region_num, +uint32_t addr,
>>> uint32_t size, int type) +{
>>> +AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
>>> +AssignedDevRegion *region = &r_dev->v_addrs[region_num];
>>> +uint32_t old_port = region->u.r_baseport;
>>> +uint32_t old_num = region->e_size;
>>> +int first_map = (old_num == 0);
>>> +struct ioperm_data data;
>>> +int i;
>>> +
>>> +region->e_physbase = addr;
>>> +region->e_size = size;
>>> +
>>> +DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d
>>> region_num=%d \n", +  addr, region->u.r_baseport, type,
>>> size, region_num); + +memset(&data, 0, sizeof(data));
>>> +
>>> +if (!first_map) {
>>> +   data.start_port = old_port;
>>> +   data.num = old_num; +   data.turn_on = 0;
>>> +
>>> +   for (i = 0; i < smp_cpus; ++i)
>>> +   kvm_ioperm(qemu_kvm_cpu_env(i), &data);
>>> 
>> 
>> How does this interact with VCPU hot-plug?
> 
> I have no idea. Weidong?

maybe we need to keep an assigned io port range list, when VCPU hot-plug in, do 
ioperm for those io ports on new VCPU. Has VCPU hot-plug been already 
supported? 

Regards,
Weidong

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-29 Thread Zhang, Xiantao
Muli Ben-Yehuda wrote:
> On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote:
> 
>>> +ifeq ($(USE_KVM), 1)
>>> +OBJS+= device-assignment.o
>>> +endif
>> 
>> I don't think you want to build this on PPC so I think you need a
>> stronger check.
> 
> Good point. How about checking TARGET_BASE_ARCH = i386?

It should work for ia64, please include ia64 when you do it in this way. 
Xiantao--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Mark McLoughlin
On Tue, 2008-10-28 at 12:06 +0200, [EMAIL PROTECTED] wrote:
...
> +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
> +   uint8_t r_dev, uint8_t r_func)
> +{
> +char dir[128], name[128];
> +int fd, r = 0;
> +FILE *f;
> +unsigned long long start, end, size, flags;
> +PCIRegion *rp;
> +PCIDevRegions *dev = &pci_dev->real_device;
> +
> +dev->region_number = 0;
> +
> +snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/:%02x:%02x.%x/",
> +  r_bus, r_dev, r_func);
> +
> +snprintf(name, sizeof(name), "%sconfig", dir);
> +
> +fd = open(name, O_RDWR);
> +if (fd == -1) {
> +fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +return 1;
> +}
> +dev->config_fd = fd;
> +again:
> +r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
> +if (r < 0) {
> +if (errno == EINTR || errno == EAGAIN)
> +goto again;
> +fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
> +}
> +
> +snprintf(name, sizeof(name), "%sresource", dir);
> +
> +f = fopen(name, "r");
> +if (f == NULL) {
> +fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +return 1;
> +}
> +r = -1;
> +while (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3) {
> +r++;
> +rp = dev->regions + r;

Could, in theory, overflow dev->regions here. Suggest:

+for (r = 0; r < MAX_IO_REGIONS; r++) {
+if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
+break;

> +rp->valid = 0;
> +size = end - start + 1;
> +flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
> +if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
> +continue;
> +if (flags & IORESOURCE_MEM) {
> +flags &= ~IORESOURCE_IO;
> + snprintf(name, sizeof(name), "%sresource%d", dir, r);
> +fd = open(name, O_RDWR);
> +if (fd == -1)
> +continue;   /* probably ROM */
> +rp->resource_fd = fd;
> +} else
> +flags &= ~IORESOURCE_PREFETCH;
> +
> +rp->type = flags;
> +rp->valid = 1;
> +rp->base_addr = start;
> +rp->size = size;
> +DEBUG("region %d size %d start 0x%x type %d resource_fd %d\n",
> +  r, rp->size, start, rp->type, rp->resource_fd);
> +}
> +fclose(f);
> +
> +dev->region_number = r;
> +return 0;
> +}
> +
> +static int disable_iommu;

Why is this global?

The flag is set per-device on the command-line and only affects whether
we pass KVM_DEV_ASSIGN_ENABLE_IOMMU to kvm_assign_pci_device()

> +int nr_assigned_devices;
> +static LIST_HEAD(, AssignedDevInfo) adev_head;
> +
> +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> +{
> +return (uint32_t)bus << 8 | (uint32_t)devfn;
> +}
> +
> +static AssignedDevice *register_real_device(PCIBus *e_bus,
> +const char *e_dev_name,
> +int e_devfn, uint8_t r_bus,
> +uint8_t r_dev, uint8_t r_func)
> +{
> +int r;
> +AssignedDevice *pci_dev;
> +uint8_t e_device, e_intx;
> +
> +DEBUG("Registering real physical device %s (devfn=0x%x)\n",
> +  e_dev_name, e_devfn);
> +
> +pci_dev = (AssignedDevice *)
> +pci_register_device(e_bus, e_dev_name, sizeof(AssignedDevice),
> +e_devfn, assigned_dev_pci_read_config,
> +assigned_dev_pci_write_config);
> +if (NULL == pci_dev) {
> +fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
> +__func__, e_dev_name);
> +return NULL;
> +}
> +if (get_real_device(pci_dev, r_bus, r_dev, r_func)) {
> +fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
> +__func__, e_dev_name);
> +goto out;
> +}
> +
> +/* handle real device's MMIO/PIO BARs */
> +if (assigned_dev_register_regions(pci_dev->real_device.regions,
> +  pci_dev->real_device.region_number,
> +  pci_dev))
> +goto out;
> +
> +/* handle interrupt routing */
> +e_device = (pci_dev->dev.devfn >> 3) & 0x1f;
> +e_intx = pci_dev->dev.config[0x3d] - 1;
> +pci_dev->intpin = e_intx;
> +pci_dev->run = 0;
> +pci_dev->girq = 0;
> +pci_dev->h_busnr = r_bus;
> +pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +if (kvm_enabled()) {
> +struct kvm_assigned_pci_dev assigned_dev_data;
> +
> +memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> +assigned_dev_data.assigned_dev_id  =
> +calc_assigned_dev_id(pci_dev->h_busnr,
> + (uint32_t)pci_dev->h_devfn);
> +

Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Muli Ben-Yehuda
On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote:

>> +ifeq ($(USE_KVM), 1)
>> +OBJS+= device-assignment.o
>> +endif
>
> I don't think you want to build this on PPC so I think you need a
> stronger check.

Good point. How about checking TARGET_BASE_ARCH = i386?

>> +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
>> +   uint32_t value)
>> +{
>> +AssignedDevRegion *r_access = opaque;
>> +uint32_t r_pio = guest_to_host_ioport(r_access, addr);
>> +
>> +DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
>> +  r_pio, (int)r_access->e_physbase,
>> +  (unsigned long)r_access->r_virtbase, value);
>>   
>
> The format doesn't match the parameter count.

Yep, already fixed.

>> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
>> +uint32_t addr, uint32_t size, int 
>> type)
>> +{
>> +AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
>> +AssignedDevRegion *region = &r_dev->v_addrs[region_num];
>> +uint32_t old_port = region->u.r_baseport;
>> +uint32_t old_num = region->e_size;
>> +int first_map = (old_num == 0);
>> +struct ioperm_data data;
>> +int i;
>> +
>> +region->e_physbase = addr;
>> +region->e_size = size;
>> +
>> +DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n",
>> +  addr, region->u.r_baseport, type, size, region_num);
>> +
>> +memset(&data, 0, sizeof(data));
>> +
>> +if (!first_map) {
>> +data.start_port = old_port;
>> +data.num = old_num; +   data.turn_on = 0;
>> +
>> +for (i = 0; i < smp_cpus; ++i)
>> +kvm_ioperm(qemu_kvm_cpu_env(i), &data);
>>   
>
> How does this interact with VCPU hot-plug?

I have no idea. Weidong?

>> +#ifdef KVM_CAP_IOMMU
>> +/* We always enable the IOMMU if present
>> + * (or when not disabled on the command line)
>> + */
>> +r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
>> +if (r && !disable_iommu)
>> +assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
>> +#endif
>> +r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
>> +if (r < 0) {
>> +fprintf(stderr, "Could not notify kernel about "
>> +"assigned device \"%s\"\n", e_dev_name);
>> +perror("register_real_device");
>> +goto out;
>> +}
>> +}
>>   
>
> You still succeed if KVM_CAP_DEVICE_ASSIGNMENT isn't defined?  That
> means a newer userspace compiled on an older kernel will silently
> fail if they try to do device assignment.  There's probably no
> reason to build this file if KVM_CAP_DEVICE_ASSIGNMENT isn't defined
> (see how the in-kernel PIT gets conditionally build depending on
> whether that cap is available).

Ok, I'll take a look at this.

>> +#endif
>> +term_printf("Registered host PCI device %02x:%02x.%1x "
>> +"(\"%s\") as guest device %02x:%02x.%1x\n",
>> +r_bus, r_dev, r_func, e_dev_name,
>> +pci_bus_num(e_bus), e_device, r_func);
>>
>>   
>
> If I read the code correctly, this term_printf() happens regardless
> of whether this is being done for PCI hotplug or for command-line
> assignment?  That's a problem as it'll print garbage on the monitor
> when you start QEMU which could break management applications.

Is there a more suitable alternative or shall I just nuke it?

>> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
>> index d559f0c..5fdb726 100644
>> --- a/qemu/hw/pc.c
>> +++ b/qemu/hw/pc.c
>> @@ -33,6 +33,7 @@
>>  #include "boards.h"
>>  #include "console.h"
>>  #include "fw_cfg.h"
>> +#include "device-assignment.h"
>>   #include "qemu-kvm.h"
>>  @@ -1157,6 +1158,21 @@ static void pc_init1(ram_addr_t ram_size, int 
>> vga_ram_size,
>>   if (pci_enabled)
>>  virtio_balloon_init(pci_bus);
>> +
>> +if (kvm_enabled() && device_assignment_enabled) {
>> +int i;
>>   
>
> Stray tab.

Grrr. Silly emacs.

>
>> +for (i = 0; i < assigned_devices_index; i++) {
>> +if (add_assigned_device(assigned_devices[i]) < 0) {
>> +fprintf(stderr, "Warning: could not add assigned device 
>> %s\n",
>> +assigned_devices[i]);
>> +}
>> +}
>> +
>> +if (init_all_assigned_devices(pci_bus)) {
>> +fprintf(stderr, "Failed to initialize assigned devices\n");
>> +exit (1);
>> +}
>> +}
>>  }
>>  +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
>> +case QEMU_OPTION_pcidevice:
>> +device_assignment_enabled = 1;
>> +if (assigned_devices_index >= MAX_DEV_ASSIGN_CMDLINE) {
>> +fprintf(stderr, "Too many assigned devices\n");
>> +exit(1);
>> +}
>> +assigned_devices[assigned_devices_index] = optarg;
>> +assigned_devices_index++;
>> +break;
>>   
>

Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Muli Ben-Yehuda
On Tue, Oct 28, 2008 at 11:36:10PM +0800, Han, Weidong wrote:
> > diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
> > index c5f3f29..5e66832 100644
> > --- a/qemu/qemu-kvm.c
> > +++ b/qemu/qemu-kvm.c
> > @@ -20,6 +20,7 @@ int kvm_pit = 1;
> >  #include "console.h"
> >  #include "block.h"
> >  #include "compatfd.h"
> > +#include "hw/device-assignment.h"
> 
> It's not necessary.

Indeed, left overs from my ioperm bits. Removed.

> >  #include "qemu-kvm.h"
> >  #include 
> > @@ -27,6 +28,7 @@ int kvm_pit = 1;
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> 
> It's not necessary.

This one is needed on my compile system for the ioperm() declaration.

Cheers,
Muli

-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
   <->
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Anthony Liguori

[EMAIL PROTECTED] wrote:

Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
---
 qemu/Makefile.target|3 +
 qemu/hw/device-assignment.c |  641 +++
 qemu/hw/device-assignment.h |  117 
 qemu/hw/pc.c|   16 +
 qemu/hw/pci.c   |7 +
 qemu/qemu-kvm.c |   14 +
 qemu/qemu-kvm.h |8 +
 qemu/vl.c   |   28 ++
 8 files changed, 834 insertions(+), 0 deletions(-)
 create mode 100644 qemu/hw/device-assignment.c
 create mode 100644 qemu/hw/device-assignment.h

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index d9bdeca..5d44e08 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+ifeq ($(USE_KVM), 1)
+OBJS+= device-assignment.o
+endif
  


I don't think you want to build this on PPC so I think you need a 
stronger check.



+static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
+   uint32_t value)
+{
+AssignedDevRegion *r_access = opaque;
+uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+  (unsigned long)r_access->r_virtbase, value);
  


The format doesn't match the parameter count.


+static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
+   uint32_t e_phys, uint32_t e_size, int type)
+{
+AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+uint32_t old_ephys = region->e_physbase;
+uint32_t old_esize = region->e_size;
+int first_map = (region->e_size == 0);
+int ret = 0;
+
+DEBUG("e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d \n",
+  e_phys, (uint32_t)region->r_virtbase, type, e_size, region_num);
+
+region->e_physbase = e_phys;
+region->e_size = e_size;
+
+if (!first_map)
+   kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize);
+
+if (e_size > 0)
+   ret = kvm_register_phys_mem(kvm_context, e_phys,
+region->u.r_virtbase, e_size, 0);
+if (ret != 0) {
+   fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
+   exit(1);
+}
+}
+
+static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
+uint32_t addr, uint32_t size, int type)
+{
+AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+uint32_t old_port = region->u.r_baseport;
+uint32_t old_num = region->e_size;
+int first_map = (old_num == 0);
+struct ioperm_data data;
+int i;
+
+region->e_physbase = addr;
+region->e_size = size;
+
+DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n",
+  addr, region->u.r_baseport, type, size, region_num);
+
+memset(&data, 0, sizeof(data));
+
+if (!first_map) {
+   data.start_port = old_port;
+	data.num = old_num; 
+	data.turn_on = 0;

+
+   for (i = 0; i < smp_cpus; ++i)
+   kvm_ioperm(qemu_kvm_cpu_env(i), &data);
  


How does this interact with VCPU hot-plug?


+}
+
+data.start_port = region->u.r_baseport;
+data.num = size;
+data.turn_on = 1;
+ 
+for (i = 0; i < smp_cpus; ++i)

+   kvm_ioperm(qemu_kvm_cpu_env(i), &data);
+ 
+register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,

+ (r_dev->v_addrs + region_num));
+register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
+ (r_dev->v_addrs + region_num));
+register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
+ (r_dev->v_addrs + region_num));
+register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
+  (r_dev->v_addrs + region_num));
+register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
+  (r_dev->v_addrs + region_num));
+register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
+  (r_dev->v_addrs + region_num));
+}
+
+static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
+  uint32_t val, int len)
+{
+int fd;
+ssize_t ret;
+
+DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+  ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+  (uint16_t) address, val, len);
+
+if (address == 0x4) {
+pci_default_write_config(d, address, val, len);
+/* Continue to program the card */
+}
+
+if ((addre

Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Muli Ben-Yehuda
On Tue, Oct 28, 2008 at 10:10:07PM +0800, Han, Weidong wrote:

> > +DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
> > + r_pio, (int)r_access->e_physbase,
> > + (unsigned long)r_access->r_virtbase, value);
> 
> should be (unsigned long)r_access->u.r_virtbase

Thanks, actually it should be u.r_baseport for IO ports and there were
a number of other bogosities there too. Here's a quick incremental
patch compiled with DEBUG() enabled.

>From 9b917528647b55a1046a5a19d9e2427bb2d86db7 Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
Date: Tue, 28 Oct 2008 17:30:30 +0200
Subject: [PATCH 1/1] fix DEBUG statements

(thanks to Weidong Han for spotting)

Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
---
 qemu/hw/device-assignment.c |   32 
 1 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 89b05f9..8b56599 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -63,9 +63,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t 
addr,
 AssignedDevRegion *r_access = opaque;
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 
-DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
  r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->r_virtbase, value);
+ (unsigned long)r_access->u.r_baseport, value);
 
 outb(value, r_pio);
 }
@@ -76,9 +76,9 @@ static void assigned_dev_ioport_writew(void *opaque, uint32_t 
addr,
 AssignedDevRegion *r_access = opaque;
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 
-DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
-  __func__, r_pio, (int)r_access->e_physbase,
-  (unsigned long)r_access->r_virtbase, value);
+DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+  r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
 
 outw(value, r_pio);
 }
@@ -89,9 +89,9 @@ static void assigned_dev_ioport_writel(void *opaque, uint32_t 
addr,
 AssignedDevRegion *r_access = opaque;
 uint32_t r_pio = guest_to_host_ioport(r_access, addr);
 
-DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
  r_pio, (int)r_access->e_physbase,
-  (unsigned long)r_access->r_virtbase, value);
+  (unsigned long)r_access->u.r_baseport, value);
 
 outl(value, r_pio);
 }
@@ -104,9 +104,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, 
uint32_t addr)
 
 value = inb(r_pio);
 
-DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
-  (unsigned long)r_access->r_virtbase, value);
+  (unsigned long)r_access->u.r_baseport, value);
 
 return value;
 }
@@ -119,9 +119,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, 
uint32_t addr)
 
 value = inw(r_pio);
 
-DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->r_virtbase, value);
+ (unsigned long)r_access->u.r_baseport, value);
 
 return value;
 }
@@ -134,9 +134,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, 
uint32_t addr)
 
 value = inl(r_pio);
 
-DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n",
+DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
   r_pio, (int)r_access->e_physbase,
-  (unsigned long)r_access->r_virtbase, value);
+  (unsigned long)r_access->u.r_baseport, value);
 
 return value;
 }
@@ -151,8 +151,8 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int 
region_num,
 int first_map = (region->e_size == 0);
 int ret = 0;
 
-DEBUG("e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d \n",
-  e_phys, (uint32_t)region->r_virtbase, type, e_size, region_num);
+DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
+  e_phys, region->u.r_virtbase, type, e_size, region_num);
 
 region->e_physbase = e_phys;
 region->e_size = e_size;
@@ -425,7 +425,7 @@ again:
 rp->valid = 1;
 rp->base_addr = start;
 rp->size = size;
-DEBUG("region %d size %d start 0x%x type %d resource_fd %d\n",
+DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
   r, rp->size, start, rp->type, rp->resource_fd);
 }
 fclose(f);
-- 
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http:/

RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Han, Weidong
[EMAIL PROTECTED] wrote:
> From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
> 
> This patch has been contributed to by the following people:
> 
> Or Sagi <[EMAIL PROTECTED]>
> Nir Peleg <[EMAIL PROTECTED]>
> Amit Shah <[EMAIL PROTECTED]>
> Ben-Ami Yassour <[EMAIL PROTECTED]>
> Weidong Han <[EMAIL PROTECTED]>
> Glauber de Oliveira Costa <[EMAIL PROTECTED]>
> Muli Ben-Yehuda <[EMAIL PROTECTED]>
> 
> With this patch, we can assign a device on the host machine to a
> guest.
> 
> A new command-line option, -pcidevice is added.
> To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this:
> 
> -pcidevice host=04:08.0
> 
> * The host driver for the device, if any, is to be removed before
> assigning the device (else device assignment will fail).
> 
> * A device that shares IRQ with another host device cannot currently
> be assigned.
> 
> * The RAW_IO capability is needed for this to work
> 
> This works only with the in-kernel irqchip method; to use the
> userspace irqchip, a kernel module (irqhook) and some extra changes
> are needed.
> 
> [muli: lots of small fixes from Muli and Weidong Han addressing all v7
> review comments]
> 
> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
> ---
>  qemu/Makefile.target|3 +
>  qemu/hw/device-assignment.c |  641
>  +++
>  qemu/hw/device-assignment.h |  117  qemu/hw/pc.c
>  |   16 + qemu/hw/pci.c   |7 +
>  qemu/qemu-kvm.c |   14 +
>  qemu/qemu-kvm.h |8 +
>  qemu/vl.c   |   28 ++
>  8 files changed, 834 insertions(+), 0 deletions(-)
>  create mode 100644 qemu/hw/device-assignment.c
>  create mode 100644 qemu/hw/device-assignment.h
> 
> diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
> index c5f3f29..5e66832 100644
> --- a/qemu/qemu-kvm.c
> +++ b/qemu/qemu-kvm.c
> @@ -20,6 +20,7 @@ int kvm_pit = 1;
>  #include "console.h"
>  #include "block.h"
>  #include "compatfd.h"
> +#include "hw/device-assignment.h"

It's not necessary.

> 
>  #include "qemu-kvm.h"
>  #include 
> @@ -27,6 +28,7 @@ int kvm_pit = 1;
>  #include 
>  #include 
>  #include 
> +#include 

It's not necessary.

Regards,
Weidong

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Han, Weidong
Han, Weidong wrote:
> [EMAIL PROTECTED] wrote:
>> From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
>> 
>> This patch has been contributed to by the following people:
>> 
>> Or Sagi <[EMAIL PROTECTED]>
>> Nir Peleg <[EMAIL PROTECTED]>
>> Amit Shah <[EMAIL PROTECTED]>
>> Ben-Ami Yassour <[EMAIL PROTECTED]>
>> Weidong Han <[EMAIL PROTECTED]>
>> Glauber de Oliveira Costa <[EMAIL PROTECTED]>
>> Muli Ben-Yehuda <[EMAIL PROTECTED]>
>> 
>> With this patch, we can assign a device on the host machine to a
>> guest. 
>> 
>> A new command-line option, -pcidevice is added.
>> To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use
>> this: 
>> 
>> -pcidevice host=04:08.0
>> 
>> * The host driver for the device, if any, is to be removed before
>> assigning the device (else device assignment will fail).
>> 
>> * A device that shares IRQ with another host device cannot currently
>> be assigned. 
>> 
>> * The RAW_IO capability is needed for this to work
>> 
>> This works only with the in-kernel irqchip method; to use the
>> userspace irqchip, a kernel module (irqhook) and some extra changes
>> are needed. 
>> 
>> [muli: lots of small fixes from Muli and Weidong Han addressing all
>> v7 review comments] 
>> 
>> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
>> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
>> ---
>>  qemu/Makefile.target|3 +
>>  qemu/hw/device-assignment.c |  641
>>  +++
>>  qemu/hw/device-assignment.h |  117  qemu/hw/pc.c
>>  |   16 + qemu/hw/pci.c   |7 +
>>  qemu/qemu-kvm.c |   14 +
>>  qemu/qemu-kvm.h |8 +
>>  qemu/vl.c   |   28 ++
>>  8 files changed, 834 insertions(+), 0 deletions(-)
>>  create mode 100644 qemu/hw/device-assignment.c
>>  create mode 100644 qemu/hw/device-assignment.h
>> 
>> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
>> index d9bdeca..5d44e08 100644
>> --- a/qemu/Makefile.target
>> +++ b/qemu/Makefile.target
>> @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW)
>>  dma.o OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o
>>  pc.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
>>  OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o +ifeq
>> ($(USE_KVM), 1) +OBJS+= device-assignment.o
>> +endif
>>  ifeq ($(USE_KVM_PIT), 1)
>>  OBJS+= i8254-kvm.o
>>  endif
>> diff --git a/qemu/hw/device-assignment.c
>> b/qemu/hw/device-assignment.c new file mode 100644 index
>> 000..89b05f9 --- /dev/null
>> +++ b/qemu/hw/device-assignment.c
>> @@ -0,0 +1,641 @@
>> +/*
>> + * Copyright (c) 2007, Neocleus Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> modify it + * under the terms and conditions of the GNU General
>> Public License, + * version 2, as published by the Free Software
>> Foundation. + * + * This program is distributed in the hope it will
>> be useful, but WITHOUT + * ANY WARRANTY; without even the implied
>> warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.
>> See the GNU General Public License for + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> along with + * this program; if not, write to the Free Software
>> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
>> 02111-1307 USA. + * + *
>> + *  Assign a PCI device from the host to a guest VM. + *
>> + *  Adapted for KVM by Qumranet.
>> + *
>> + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
>> + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
>> + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
>> + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
>> + *  Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED]) + */
>> +#include 
>> +#include 
>> +#include "qemu-kvm.h"
>> +#include "hw.h"
>> +#include "pc.h"
>> +#include "sysemu.h"
>> +#include "console.h"
>> +#include "device-assignment.h"
>> +
>> +/* From linux/ioport.h */
>> +#define IORESOURCE_IO   0x0100  /* Resource type */
>> +#define IORESOURCE_MEM  0x0200
>> +#define IORESOURCE_IRQ  0x0400
>> +#define IORESOURCE_DMA  0x0800
>> +#define IORESOURCE_PREFETCH 0x1000  /* No side effects */ +
>> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
>> +
>> +#ifdef DEVICE_ASSIGNMENT_DEBUG
>> +#define DEBUG(fmt, ...)   \
>> +do {  \
>> +  fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);\ +  
>> } while (0) +#else
>> +#define DEBUG(fmt, ...) do { } while(0)
>> +#endif
>> +
>> +static uint32_t guest_to_host_ioport(AssignedDevRegion *region,
>> uint32_t addr) +{ +return region->u.r_baseport + (addr -
>> region->e_physbase); +} +
>> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
>> +   uint32_t value) +{
>> +AssignedDevRegion *r_acces

RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests

2008-10-28 Thread Han, Weidong
[EMAIL PROTECTED] wrote:
> From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
> 
> This patch has been contributed to by the following people:
> 
> Or Sagi <[EMAIL PROTECTED]>
> Nir Peleg <[EMAIL PROTECTED]>
> Amit Shah <[EMAIL PROTECTED]>
> Ben-Ami Yassour <[EMAIL PROTECTED]>
> Weidong Han <[EMAIL PROTECTED]>
> Glauber de Oliveira Costa <[EMAIL PROTECTED]>
> Muli Ben-Yehuda <[EMAIL PROTECTED]>
> 
> With this patch, we can assign a device on the host machine to a
> guest.
> 
> A new command-line option, -pcidevice is added.
> To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this:
> 
> -pcidevice host=04:08.0
> 
> * The host driver for the device, if any, is to be removed before
> assigning the device (else device assignment will fail).
> 
> * A device that shares IRQ with another host device cannot currently
> be assigned.
> 
> * The RAW_IO capability is needed for this to work
> 
> This works only with the in-kernel irqchip method; to use the
> userspace irqchip, a kernel module (irqhook) and some extra changes
> are needed.
> 
> [muli: lots of small fixes from Muli and Weidong Han addressing all v7
> review comments]
> 
> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
> ---
>  qemu/Makefile.target|3 +
>  qemu/hw/device-assignment.c |  641
>  +++
>  qemu/hw/device-assignment.h |  117  qemu/hw/pc.c
>  |   16 + qemu/hw/pci.c   |7 +
>  qemu/qemu-kvm.c |   14 +
>  qemu/qemu-kvm.h |8 +
>  qemu/vl.c   |   28 ++
>  8 files changed, 834 insertions(+), 0 deletions(-)
>  create mode 100644 qemu/hw/device-assignment.c
>  create mode 100644 qemu/hw/device-assignment.h
> 
> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
> index d9bdeca..5d44e08 100644
> --- a/qemu/Makefile.target
> +++ b/qemu/Makefile.target
> @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
>  OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
>  OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
>  OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
> +ifeq ($(USE_KVM), 1)
> +OBJS+= device-assignment.o
> +endif
>  ifeq ($(USE_KVM_PIT), 1)
>  OBJS+= i8254-kvm.o
>  endif
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> new file mode 100644
> index 000..89b05f9
> --- /dev/null
> +++ b/qemu/hw/device-assignment.c
> @@ -0,0 +1,641 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + *
> + *  Assign a PCI device from the host to a guest VM.
> + *
> + *  Adapted for KVM by Qumranet.
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED])
> + */
> +#include 
> +#include 
> +#include "qemu-kvm.h"
> +#include "hw.h"
> +#include "pc.h"
> +#include "sysemu.h"
> +#include "console.h"
> +#include "device-assignment.h"
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO   0x0100  /* Resource type */
> +#define IORESOURCE_MEM  0x0200
> +#define IORESOURCE_IRQ  0x0400
> +#define IORESOURCE_DMA  0x0800
> +#define IORESOURCE_PREFETCH 0x1000  /* No side effects */
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, ...)   \
> +do {  \
> +  fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);\
> +} while (0)
> +#else
> +#define DEBUG(fmt, ...) do { } while(0)
> +#endif
> +
> +static uint32_t guest_to_host_ioport(AssignedDevRegion *region,
> uint32_t addr) +{
> +return region->u.r_baseport + (addr - region->e_physbase);
> +}
> +
> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
> +   uint32_t value)
> +{
> +AssignedDevRegion *r_access = opaque;
> +uint32_t r_pio = guest_to_host_ioport(r_access, addr);
> +
> +DEBUG("r_pio=%08x e_physbase=%0