RE: [PATCH 1/2] KVM/userspace: Support for assigning PCI devices to guest

2008-07-22 Thread Ben-Ami Yassour
On Thu, 2008-07-17 at 15:52 +0800, Han, Weidong wrote:
 Ben-Ami Yassour wrote:
  From: Or Sagi [EMAIL PROTECTED]
  
  From: Nir Peleg [EMAIL PROTECTED]
  From: Amit Shah [EMAIL PROTECTED]
  From: Glauber de Oliveira Costa [EMAIL PROTECTED]
  
  We can assign a device from the host machine to a guest. The
  original code comes from Neocleus.
  
  A new command-line option, -pcidevice is added.
  For example, to invoke it for an Ethernet device sitting at
  PCI bus:dev.fn 04:08.0 with host IRQ 18, use this:
  
  -pcidevice Ethernet/04:08.0
  
  The host ethernet driver is to be removed before doing the
  passthrough. If not, the device assignment fails but the
  guest continues without the assignment.
  
  If kvm uses the in-kernel irqchip, interrupts are routed to the
  guest via the kvm module (accompanied kernel changes are necessary).
  
  If -no-kvm-irqchip is used, the 'irqhook' module, available
  separately, is to be used for interrupt injection into the guest. In
  this case, an extra parameter, -intr-number is to be appended to
  the above-mentioned pcidevice parameter.
  
  Signed-off-by: Amit Shah [EMAIL PROTECTED]
  ---
  +static pt_dev_t *register_real_device(PCIBus *e_bus, const char
  *e_dev_name, +int e_devfn,
 uint8_t r_bus, uint8_t r_dev,
  + uint8_t r_func)
  +{
  +   int rc;
  +   pt_dev_t *pci_dev;
  +   uint8_t e_device, e_intx;
  +
  +   DEBUG(register_real_device: Registering real physical 
  + device %s (devfn=0x%x)\n, e_dev_name, e_devfn);
  +
  +   pci_dev = (pt_dev_t *) pci_register_device(e_bus, e_dev_name,
  +  sizeof(pt_dev_t),
 e_devfn,
  +  pt_pci_read_config,
  +  pt_pci_write_config);
  +
  +   if (NULL == pci_dev) {
  +   fprintf(stderr, register_real_device: Error: Couldn't 
  +   register real device %s\n, e_dev_name);
  +   return NULL;
  +   }
  +   if (pt_get_real_device(pci_dev, r_bus, r_dev, r_func)) {
  +   fprintf(stderr, register_real_device: Error: Couldn't
 get 
  +   real device (%s)!\n, e_dev_name);
  +   return NULL;
  +   }
  +
  +   /* handle real device's MMIO/PIO BARs */
  +   if (pt_register_regions(pci_dev-real_device.regions,
  +   pci_dev-real_device.region_number,
 pci_dev))
  +   return NULL;
  +
  +   /* handle interrupt routing */
  +   e_device = (pci_dev-dev.devfn  3)  0x1f;
  +   e_intx = pci_dev-dev.config[0x3d] - 1;
  +   pci_dev-intpin = e_intx;
  +   pci_dev-run = 0;
  +   pci_dev-girq = 0;
  +   pci_dev-h_busnr = r_bus;
  +   pci_dev-h_devfn = PCI_DEVFN(r_dev, r_func);
  +
  +#ifdef KVM_CAP_PCI_PASSTHROUGH
  +   if (kvm_enabled()) {
  +   struct kvm_pci_passthrough_dev pci_pt_dev;
  +
  +   memset(pci_pt_dev, 0, sizeof(pci_pt_dev));
  +   pci_pt_dev.guest.busnr = pci_bus_num(e_bus);
  +   pci_pt_dev.guest.devfn = PCI_DEVFN(e_device, r_func);
 
 Why combine e_device and r_func? The guest devfn is e_devfn.
the current assumption is that e_func and r_func are identical.

 
  +   pci_pt_dev.host.busnr  = pci_dev-h_busnr;
  +   pci_pt_dev.host.devfn  = pci_dev-h_devfn;
  +
  +   /* We'll set the value of the guest irq as and when
  +* the piix config gets updated. See pci_pt_update_irq.
  +* The host irq field never gets used anyway
  +*/
  +
  +   rc = kvm_update_pci_pt_device(kvm_context, pci_pt_dev);
  +   if (rc  0) {
  +   fprintf(stderr, Could not notify kernel about 
  +   passthrough device\n);
  +   perror(pt-ioctl);
  +   return NULL;
  +   }
  +   }
  +#endif
  +
  +   fprintf(logfile, Registered host PCI device %02x:%02x.%1x 
  +   as guest device %02x:%02x.%1x\n,
  +   r_bus, r_dev, r_func,
  +   pci_bus_num(e_bus), e_device, r_func);
 
 Second r_func should replaced by e_devfn  0x7.
same as above.

 
 Randy (Weidong)
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] KVM/userspace: Support for assigning PCI devices to guest

2008-07-17 Thread Han, Weidong
Ben-Ami Yassour wrote:
 From: Or Sagi [EMAIL PROTECTED]
 
 From: Nir Peleg [EMAIL PROTECTED]
 From: Amit Shah [EMAIL PROTECTED]
 From: Glauber de Oliveira Costa [EMAIL PROTECTED]
 
 We can assign a device from the host machine to a guest. The
 original code comes from Neocleus.
 
 A new command-line option, -pcidevice is added.
 For example, to invoke it for an Ethernet device sitting at
 PCI bus:dev.fn 04:08.0 with host IRQ 18, use this:
 
 -pcidevice Ethernet/04:08.0
 
 The host ethernet driver is to be removed before doing the
 passthrough. If not, the device assignment fails but the
 guest continues without the assignment.
 
 If kvm uses the in-kernel irqchip, interrupts are routed to the
 guest via the kvm module (accompanied kernel changes are necessary).
 
 If -no-kvm-irqchip is used, the 'irqhook' module, available
 separately, is to be used for interrupt injection into the guest. In
 this case, an extra parameter, -intr-number is to be appended to
 the above-mentioned pcidevice parameter.
 
 Signed-off-by: Amit Shah [EMAIL PROTECTED]
 ---
 +static pt_dev_t *register_real_device(PCIBus *e_bus, const char
 *e_dev_name, +  int e_devfn,
uint8_t r_bus, uint8_t r_dev,
 +   uint8_t r_func)
 +{
 + int rc;
 + pt_dev_t *pci_dev;
 + uint8_t e_device, e_intx;
 +
 + DEBUG(register_real_device: Registering real physical 
 +   device %s (devfn=0x%x)\n, e_dev_name, e_devfn);
 +
 + pci_dev = (pt_dev_t *) pci_register_device(e_bus, e_dev_name,
 +sizeof(pt_dev_t),
e_devfn,
 +pt_pci_read_config,
 +pt_pci_write_config);
 +
 + if (NULL == pci_dev) {
 + fprintf(stderr, register_real_device: Error: Couldn't 
 + register real device %s\n, e_dev_name);
 + return NULL;
 + }
 + if (pt_get_real_device(pci_dev, r_bus, r_dev, r_func)) {
 + fprintf(stderr, register_real_device: Error: Couldn't
get 
 + real device (%s)!\n, e_dev_name);
 + return NULL;
 + }
 +
 + /* handle real device's MMIO/PIO BARs */
 + if (pt_register_regions(pci_dev-real_device.regions,
 + pci_dev-real_device.region_number,
pci_dev))
 + return NULL;
 +
 + /* handle interrupt routing */
 + e_device = (pci_dev-dev.devfn  3)  0x1f;
 + e_intx = pci_dev-dev.config[0x3d] - 1;
 + pci_dev-intpin = e_intx;
 + pci_dev-run = 0;
 + pci_dev-girq = 0;
 + pci_dev-h_busnr = r_bus;
 + pci_dev-h_devfn = PCI_DEVFN(r_dev, r_func);
 +
 +#ifdef KVM_CAP_PCI_PASSTHROUGH
 + if (kvm_enabled()) {
 + struct kvm_pci_passthrough_dev pci_pt_dev;
 +
 + memset(pci_pt_dev, 0, sizeof(pci_pt_dev));
 + pci_pt_dev.guest.busnr = pci_bus_num(e_bus);
 + pci_pt_dev.guest.devfn = PCI_DEVFN(e_device, r_func);

Why combine e_device and r_func? The guest devfn is e_devfn.

 + pci_pt_dev.host.busnr  = pci_dev-h_busnr;
 + pci_pt_dev.host.devfn  = pci_dev-h_devfn;
 +
 + /* We'll set the value of the guest irq as and when
 +  * the piix config gets updated. See pci_pt_update_irq.
 +  * The host irq field never gets used anyway
 +  */
 +
 + rc = kvm_update_pci_pt_device(kvm_context, pci_pt_dev);
 + if (rc  0) {
 + fprintf(stderr, Could not notify kernel about 
 + passthrough device\n);
 + perror(pt-ioctl);
 + return NULL;
 + }
 + }
 +#endif
 +
 + fprintf(logfile, Registered host PCI device %02x:%02x.%1x 
 + as guest device %02x:%02x.%1x\n,
 + r_bus, r_dev, r_func,
 + pci_bus_num(e_bus), e_device, r_func);

Second r_func should replaced by e_devfn  0x7.

Randy (Weidong)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] KVM/userspace: Support for assigning PCI devices to guest

2008-06-27 Thread Amit Shah
From: Or Sagi [EMAIL PROTECTED]
From: Nir Peleg [EMAIL PROTECTED]
From: Amit Shah [EMAIL PROTECTED]
From: Glauber de Oliveira Costa [EMAIL PROTECTED]

We can assign a device from the host machine to a guest. The
original code comes from Neocleus.

A new command-line option, -pcidevice is added.
For example, to invoke it for an Ethernet device sitting at
PCI bus:dev.fn 04:08.0 with host IRQ 18, use this:

-pcidevice Ethernet/04:08.0

The host ethernet driver is to be removed before doing the
passthrough. If not, the device assignment fails but the
guest continues without the assignment.

If kvm uses the in-kernel irqchip, interrupts are routed to the
guest via the kvm module (accompanied kernel changes are necessary).

If -no-kvm-irqchip is used, the 'irqhook' module, available
separately, is to be used for interrupt injection into the guest. In
this case, an extra parameter, -intr-number is to be appended to
the above-mentioned pcidevice parameter.

Signed-off-by: Amit Shah [EMAIL PROTECTED]
---
 libkvm/libkvm-x86.c   |9 +-
 libkvm/libkvm.h   |   16 ++
 qemu/Makefile.target  |1 +
 qemu/hw/isa.h |2 +
 qemu/hw/pc.c  |9 +
 qemu/hw/pci-passthrough.c |  594 +
 qemu/hw/pci-passthrough.h |   93 +++
 qemu/hw/pci.c |   12 +
 qemu/hw/pci.h |1 +
 qemu/hw/piix_pci.c|   19 ++
 qemu/vl.c |   17 ++
 11 files changed, 772 insertions(+), 1 deletions(-)
 create mode 100644 qemu/hw/pci-passthrough.c
 create mode 100644 qemu/hw/pci-passthrough.h

diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c
index ea97bdd..0c4cdbe 100644
--- a/libkvm/libkvm-x86.c
+++ b/libkvm/libkvm-x86.c
@@ -126,6 +126,14 @@ static int kvm_init_tss(kvm_context_t kvm)
return 0;
 }
 
+#ifdef KVM_CAP_PCI_PASSTHROUGH
+int kvm_update_pci_pt_device(kvm_context_t kvm,
+struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+   return ioctl(kvm-vm_fd, KVM_UPDATE_PCI_PT_DEV, pci_pt_dev);
+}
+#endif
+
 int kvm_arch_create_default_phys_mem(kvm_context_t kvm,
   unsigned long phys_mem_bytes,
   void **vm_mem)
@@ -435,7 +443,6 @@ void kvm_show_code(kvm_context_t kvm, int vcpu)
fprintf(stderr, code:%s\n, code_str);
 }
 
-
 /*
  * Returns available msr list.  User must free.
  */
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index ad6e26a..ccb086f 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -12,6 +12,7 @@
 #endif
 
 #include linux/kvm.h
+#include linux/kvm_para.h
 
 #include signal.h
 
@@ -639,4 +640,19 @@ int kvm_enable_vapic(kvm_context_t kvm, int vcpu, uint64_t 
vapic);
 
 #endif
 
+#ifdef KVM_CAP_PCI_PASSTHROUGH
+/*!
+ * \brief Notifies host kernel about changes to a PCI device assigned to guest
+ *
+ * Used for PCI device assignment, this function notifies the host
+ * kernel about the assigning of the physical PCI device and the guest
+ * PCI parameters or updates to the PCI config space from the guest
+ * (mainly the device irq)
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param pci_pt_dev Parameters like irq, PCI bus, devfn number, etc
+ */
+int kvm_update_pci_pt_device(kvm_context_t kvm,
+struct kvm_pci_passthrough_dev *pci_pt_dev);
+#endif
 #endif
diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index 77b2301..432011f 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -602,6 +602,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+OBJS+= pci-passthrough.o
 ifeq ($(USE_KVM_PIT), 1)
 OBJS+= i8254-kvm.o
 endif
diff --git a/qemu/hw/isa.h b/qemu/hw/isa.h
index 89b3004..c720f5e 100644
--- a/qemu/hw/isa.h
+++ b/qemu/hw/isa.h
@@ -1,5 +1,7 @@
 /* ISA bus */
 
+#include hw.h
+
 extern target_phys_addr_t isa_mem_base;
 
 int register_ioport_read(int start, int length, int size,
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 6334c76..0b0606a 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -32,6 +32,7 @@
 #include smbus.h
 #include boards.h
 #include console.h
+#include pci-passthrough.h
 
 #include qemu-kvm.h
 
@@ -995,6 +996,14 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
 }
 }
 
+/* Initialize pass-through */
+if (pci_enabled) {
+int r = -1;
+do {
+pt_init_device(pci_bus, r);
+   } while (r = 0);
+}
+
 rtc_state = rtc_init(0x70, i8259[8]);
 
 qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c
new file mode 100644
index 000..250d7ef
--- /dev/null
+++ b/qemu/hw/pci-passthrough.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ *
+ * This program is free software; you