Re: kvm-userspace broken?

2009-05-04 Thread Avi Kivity

Hans de Bruin wrote:

Avi Kivity wrote:

Oliver Rath wrote:

Hi List,

maybe i missed some announcements, but

git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git


givs no response:

kvm-userspace # git clone 
git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
Initialized empty Git repository in 
/home/oliver/kvm-userspace/kvm-userspace/.git/

fatal: The remote end hung up unexpectedly


Whats up there?
  


kvm-userspace.git has been retired; it's now playing golf in 
git://git.kernel.org/pub/scm/virt/kvm/retired/kvm-userspace.git.  Use 
git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git instead.


The latest tarbal on sourceforge is kvm-85, yet the clone I just made 
from qemu-kvm 'git descibes' itself as kvm-84-756-gf7d114d. Is that 
right?




Try 'git fetch --tags origin'

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH -v3] KVM: MCE: Add MCE support to KVM

2009-05-04 Thread Huang Ying
The related MSRs are emulated. MCE capability is exported via
extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
such as the mcg_cap. MCE is injected via vcpu ioctl command
KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
not implemented.


ChangeLog:

v3:

- Fix KVM_CAP_MCE declaration

v2:

- Add MCE capability exportation support.
- Allocate MCE banks registers simulation backing memory during VCPU
  initialization.


Signed-off-by: Huang Ying ying.hu...@intel.com

---
 arch/x86/include/asm/kvm.h  |1 
 arch/x86/include/asm/kvm_host.h |5 
 arch/x86/kvm/x86.c  |  220 +++-
 include/linux/kvm.h |   21 +++
 4 files changed, 223 insertions(+), 24 deletions(-)

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #include asm/msr.h
 #include asm/desc.h
 #include asm/mtrr.h
+#include asm/mce.h
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS  \
@@ -55,6 +56,10 @@
  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+#define KVM_MAX_MCE_BANKS 32
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -738,23 +743,43 @@ static int set_msr_mtrr(struct kvm_vcpu 
return 0;
 }
 
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+   u64 mcg_cap = vcpu-arch.mcg_cap;
+   unsigned bank_num = mcg_cap  0xff;
+
switch (msr) {
-   case MSR_EFER:
-   set_efer(vcpu, data);
-   break;
-   case MSR_IA32_MC0_STATUS:
-   pr_unimpl(vcpu, %s: MSR_IA32_MC0_STATUS 0x%llx, nop\n,
-  __func__, data);
-   break;
case MSR_IA32_MCG_STATUS:
-   pr_unimpl(vcpu, %s: MSR_IA32_MCG_STATUS 0x%llx, nop\n,
-   __func__, data);
+   vcpu-arch.mcg_status = data;
break;
case MSR_IA32_MCG_CTL:
-   pr_unimpl(vcpu, %s: MSR_IA32_MCG_CTL 0x%llx, nop\n,
-   __func__, data);
+   if (!(mcg_cap  MCG_CTL_P))
+   return 1;
+   if (data != 0  data != ~(u64)0)
+   return -1;
+   vcpu-arch.mcg_ctl = data;
+   break;
+   default:
+   if (msr = MSR_IA32_MC0_CTL 
+   msr  MSR_IA32_MC0_CTL + 4 * bank_num) {
+   u32 offset = msr - MSR_IA32_MC0_CTL;
+   /* only 0 or all 1s can be written to IA32_MCi_CTL */
+   if ((offset  0x3) == 0 
+   data != 0  data != ~(u64)0)
+   return -1;
+   vcpu-arch.mce_banks[offset] = data;
+   break;
+   }
+   return 1;
+   }
+   return 0;
+}
+
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+   switch (msr) {
+   case MSR_EFER:
+   set_efer(vcpu, data);
break;
case MSR_IA32_DEBUGCTLMSR:
if (!data) {
@@ -810,6 +835,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
kvm_request_guest_time_update(vcpu);
break;
}
+   case MSR_IA32_MCG_CTL:
+   case MSR_IA32_MCG_STATUS:
+   case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+   return set_msr_mce(vcpu, msr, data);
default:
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data);
return 1;
@@ -865,26 +894,49 @@ static int get_msr_mtrr(struct kvm_vcpu 
return 0;
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
u64 data;
+   u64 mcg_cap = vcpu-arch.mcg_cap;
+   unsigned bank_num = mcg_cap  0xff;
 
switch (msr) {
-   case 0xc0010010: /* SYSCFG */
-   case 0xc0010015: /* HWCR */
-   case MSR_IA32_PLATFORM_ID:
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
-   case MSR_IA32_MC0_CTL:
-   case MSR_IA32_MCG_STATUS:
+   data = 0;
+   break;
case MSR_IA32_MCG_CAP:
+   data = vcpu-arch.mcg_cap;
+   break;
case MSR_IA32_MCG_CTL:
-   case MSR_IA32_MC0_MISC:
-   case MSR_IA32_MC0_MISC+4:
-   case MSR_IA32_MC0_MISC+8:
-   case MSR_IA32_MC0_MISC+12:
-   case MSR_IA32_MC0_MISC+16:
-   case MSR_IA32_MC0_MISC+20:
+   if (!(mcg_cap  MCG_CTL_P))
+   return 1;
+   data = vcpu-arch.mcg_ctl;
+  

[RFC -v3 1/2] QEMU-KVM: MCE: Add MCE simulation to qemu/tcg

2009-05-04 Thread Huang Ying
- MCE features are initialized when VCPU is intialized according to CPUID.
- A monitor command mce is added to inject a MCE.
- A new interrupt mask: CPU_INTERRUPT_MCE is added to inject the MCE.

Signed-off-by: Huang Ying ying.hu...@intel.com

---
 cpu-all.h   |4 ++
 cpu-exec.c  |4 ++
 monitor.c   |   49 +
 target-i386/cpu.h   |   22 +++
 target-i386/helper.c|   70 
 target-i386/op_helper.c |   34 +++
 6 files changed, 183 insertions(+)

--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -202,6 +202,7 @@
 #define CR4_DE_MASK   (1  3)
 #define CR4_PSE_MASK  (1  4)
 #define CR4_PAE_MASK  (1  5)
+#define CR4_MCE_MASK  (1  6)
 #define CR4_PGE_MASK  (1  7)
 #define CR4_PCE_MASK  (1  8)
 #define CR4_OSFXSR_SHIFT 9
@@ -248,6 +249,17 @@
 #define PG_ERROR_RSVD_MASK 0x08
 #define PG_ERROR_I_D_MASK  0x10
 
+#define MCE_CAP_DEF0x100
+#define MCE_BANKS_DEF  4
+
+#define MCG_CTL_P  (1UL8)
+
+#define MCG_STATUS_MCIP(1UL2)
+
+#define MCI_STATUS_VAL (1UL63)
+#define MCI_STATUS_OVER(1UL62)
+#define MCI_STATUS_UC  (1UL61)
+
 #define MSR_IA32_TSC0x10
 #define MSR_IA32_APICBASE   0x1b
 #define MSR_IA32_APICBASE_BSP   (18)
@@ -288,6 +300,11 @@
 
 #define MSR_MTRRdefType0x2ff
 
+#define MSR_MC0_CTL0x400
+#define MSR_MC0_STATUS 0x401
+#define MSR_MC0_ADDR   0x402
+#define MSR_MC0_MISC   0x403
+
 #define MSR_EFER0xc080
 
 #define MSR_EFER_SCE   (1  0)
@@ -674,6 +691,11 @@ typedef struct CPUX86State {
user */
 struct APICState *apic_state;
 uint32_t mp_state;
+
+uint64 mcg_cap;
+uint64 mcg_status;
+uint64 mcg_ctl;
+uint64 *mce_banks;
 } CPUX86State;
 
 CPUX86State *cpu_x86_init(const char *cpu_model);
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -3104,7 +3104,23 @@ void helper_wrmsr(void)
 case MSR_MTRRdefType:
 env-mtrr_deftype = val;
 break;
+case MSR_MCG_STATUS:
+env-mcg_status = val;
+break;
+case MSR_MCG_CTL:
+if ((env-mcg_cap  MCG_CTL_P)
+ (val == 0 || val == ~(uint64_t)0))
+env-mcg_ctl = val;
+break;
 default:
+if ((uint32_t)ECX = MSR_MC0_CTL
+ (uint32_t)ECX  MSR_MC0_CTL + (4 * env-mcg_cap  0xff)) {
+uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+if ((offset  0x3) != 0
+|| (val == 0 || val == ~(uint64_t)0))
+env-mce_banks[offset] = val;
+break;
+}
 /* XXX: exception ? */
 break;
 }
@@ -3223,7 +3239,25 @@ void helper_rdmsr(void)
 /* XXX: exception ? */
 val = 0;
 break;
+case MSR_MCG_CAP:
+val = env-mcg_cap;
+break;
+case MSR_MCG_CTL:
+if (env-mcg_cap  MCG_CTL_P)
+val = env-mcg_ctl;
+else
+val = 0;
+break;
+case MSR_MCG_STATUS:
+val = env-mcg_status;
+break;
 default:
+if ((uint32_t)ECX = MSR_MC0_CTL
+ (uint32_t)ECX  MSR_MC0_CTL + (4 * env-mcg_cap  0xff)) {
+uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+val = env-mce_banks[offset];
+break;
+}
 /* XXX: exception ? */
 val = 0;
 break;
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1432,6 +1432,75 @@ static void breakpoint_handler(CPUState 
 }
 #endif /* !CONFIG_USER_ONLY */
 
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+uint64_t mcg_cap = cenv-mcg_cap;
+unsigned bank_num = mcg_cap  0xff;
+uint64_t *banks = cenv-mce_banks;
+
+if (bank = bank_num || !(status  MCI_STATUS_VAL))
+return;
+
+/*
+ * if MSR_MCG_CTL is not all 1s, the uncorrected error
+ * reporting is disabled
+ */
+if ((status  MCI_STATUS_UC)  (mcg_cap  MCG_CTL_P) 
+cenv-mcg_ctl != ~(uint64_t)0)
+return;
+banks += 4 * bank;
+/*
+ * if MSR_MCi_CTL is not all 1s, the uncorrected error
+ * reporting is disabled for the bank
+ */
+if ((status  MCI_STATUS_UC)  banks[0] != ~(uint64_t)0)
+return;
+if (status  MCI_STATUS_UC) {
+if ((cenv-mcg_status  MCG_STATUS_MCIP) ||
+!(cenv-cr[4]  CR4_MCE_MASK)) {
+fprintf(stderr, injects mce exception while previous 
+one is in progress!\n);
+qemu_log_mask(CPU_LOG_RESET, Triple fault\n);
+qemu_system_reset_request();
+return;
+}
+if (banks[1]  MCI_STATUS_VAL)
+

[RFC -v3 2/2] QEMU-KVM: MCE: Add MCE simulation support to qemu/kvm

2009-05-04 Thread Huang Ying
KVM ioctls are used to initialize MCE simulation and inject MCE. The
real MCE simulation is implemented in Linux kernel.


ChangeLog:

v3:

- Re-based on qemu/tcg MCE support patch

v2:

- Use new kernel MCE capability exportion interface.


Signed-off-by: Huang Ying ying.hu...@intel.com

---
 kvm/kernel/arch/x86/include/asm/kvm.h |1 
 kvm/kernel/include/linux/kvm.h|   21 +
 kvm/libkvm/libkvm-x86.c   |   39 +
 kvm/libkvm/libkvm.h   |4 +++
 qemu-kvm-x86.c|   23 +++
 qemu-kvm.c|   40 ++
 qemu-kvm.h|3 ++
 target-i386/helper.c  |5 
 8 files changed, 136 insertions(+)

--- a/kvm/libkvm/libkvm-x86.c
+++ b/kvm/libkvm/libkvm-x86.c
@@ -378,6 +378,45 @@ int kvm_set_msrs(kvm_context_t kvm, int 
 return r;
 }
 
+int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap,
+  int *max_banks)
+{
+#ifdef KVM_CAP_MCE
+int r;
+
+r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+if (r  0) {
+*max_banks = r;
+return ioctl(kvm-fd, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+}
+#endif
+return -ENOSYS;
+}
+
+int kvm_setup_mce(kvm_context_t kvm, int vcpu, uint64_t *mcg_cap)
+{
+#ifdef KVM_CAP_MCE
+int r;
+
+r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+if (r  0)
+return ioctl(kvm-vcpu_fd[vcpu], KVM_X86_SETUP_MCE, mcg_cap);
+#endif
+return -ENOSYS;
+}
+
+int kvm_set_mce(kvm_context_t kvm, int vcpu, struct kvm_x86_mce *m)
+{
+#ifdef KVM_CAP_MCE
+int r;
+
+r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+if (r  0)
+return ioctl(kvm-vcpu_fd[vcpu], KVM_X86_SET_MCE, m);
+#endif
+return -ENOSYS;
+}
+
 static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
 {
fprintf(stderr,
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -27,6 +27,10 @@ typedef struct kvm_context *kvm_context_
 struct kvm_msr_list *kvm_get_msr_list(kvm_context_t);
 int kvm_get_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
 int kvm_set_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
+int kvm_get_mce_cap_supported(kvm_context_t, uint64_t *mce_cap, int 
*max_banks);
+int kvm_setup_mce(kvm_context_t, int vcpu, uint64_t *mcg_cap);
+struct kvm_x86_mce;
+int kvm_set_mce(kvm_context_t, int vcpu, struct kvm_x86_mce *mce);
 #endif
 
 /*!
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -566,6 +566,29 @@ int kvm_arch_qemu_init_env(CPUState *cen
do_cpuid_ent(cpuid_ent[cpuid_nent++], i, 0, copy);
 
 kvm_setup_cpuid2(kvm_context, cenv-cpu_index, cpuid_nent, cpuid_ent);
+
+#ifdef KVM_CAP_MCE
+if (((cenv-cpuid_version  8)0xF) = 6
+ (cenv-cpuid_features(CPUID_MCE|CPUID_MCA)) == 
(CPUID_MCE|CPUID_MCA)
+ kvm_check_extension(kvm_context, KVM_CAP_MCE)  0) {
+uint64_t mcg_cap;
+int banks;
+
+if (kvm_get_mce_cap_supported(kvm_context, mcg_cap, banks))
+perror(kvm_get_mce_cap_supported FAILED);
+else {
+if (banks  MCE_BANKS_DEF)
+banks = MCE_BANKS_DEF;
+mcg_cap = MCE_CAP_DEF;
+mcg_cap |= banks;
+if (kvm_setup_mce(kvm_context, cenv-cpu_index, mcg_cap))
+perror(kvm_setup_mce FAILED);
+else
+cenv-mcg_cap = mcg_cap;
+}
+}
+#endif
+
 return 0;
 }
 
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -233,4 +233,7 @@ static inline void cpu_synchronize_state
 }
 }
 
+void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+uint64_t mcg_status, uint64_t addr, uint64_t misc);
+
 #endif
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1442,6 +1442,11 @@ void cpu_inject_x86_mce(CPUState *cenv, 
 unsigned bank_num = mcg_cap  0xff;
 uint64_t *banks = cenv-mce_banks;
 
+if (kvm_enabled()) {
+kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+return;
+}
+
 if (bank = bank_num || !(status  MCI_STATUS_VAL))
 return;
 
--- a/kvm/kernel/include/linux/kvm.h
+++ b/kvm/kernel/include/linux/kvm.h
@@ -455,6 +455,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#ifdef __KVM_HAVE_MCE
+#define KVM_CAP_MCE 31
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -494,6 +497,20 @@ struct kvm_irq_routing {
 
 #endif
 
+#ifdef KVM_CAP_MCE
+/* x86 MCE */
+struct kvm_x86_mce {
+   __u64 status;
+   __u64 addr;
+   __u64 misc;
+   __u64 mcg_status;
+   __u8 bank;
+   __u8 pad1;
+   __u16 pad2;
+   __u32 pad3;
+};
+#endif
+
 /*
  * ioctls for VM fds
  */
@@ -581,6 +598,10 @@ struct kvm_irq_routing {
 #define KVM_NMI   _IO(KVMIO,  0x9a)
 /* 

Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Stefan Weil
Anthony Liguori schrieb:
 Sorry this explanation is long winded, but this is a messy situation.

 In Linux, there isn't a very consistent policy about userspace kernel
 header inclusion.  On a typical Linux system, you're likely to find
 kernel headers in three places.

 glibc headers (/usr/include/{linux,asm})

 These headers are installed by glibc.  They very often are based on
 much older kernel versions that the kernel you have in your
 distribution.  For software that depends on these headers, very often
 this means that your software detects features being missing that are
 present on your kernel.  Furthermore, glibc only installs the headers
 it needs so very often certain headers have dependencies that aren't
 met.  A classic example is linux/compiler.h and the broken
 usbdevice_fs.h header that depends on it.  There are still
 distributions today that QEMU doesn't compile on because of this.

 Today, most of QEMU's code depends on these headers.

 /lib/modules/$(uname -r)/build

 These are the kernel headers that are installed as part of your
 kernel.  In general, this is a pretty good place to find the headers
 that are associated with the kernel version you're actually running
 on.  However, these headers are part of the kernel build tree and are
 not always guaranteed to be includable from userspace.

 random kernel tree

 Developers, in particular, like to point things at their random kernel
 trees.  In general though, relying on a full kernel source tree being
 available isn't a good idea.  Kernel headers change dramatically
 across versions too so it's very likely that we would need to have a
 lot of #ifdefs dependent on kernel versions, or some of the uglier
 work arounds we have in usb-linux.c.

 I think the best way to avoid #ifdefs and dependencies on
 broken/incomplete glibc headers is to include all of the Linux headers
 we need within QEMU.  The attached patch does just this.

 I think there's room for discussion about whether we really want to do
 this.  We could potentially depend on some more common glibc headers
 (like asm/types.h) while bringing in less reliable headers
 (if_tun.h/virtio*).  Including them all seems like the most robust
 solution to me though.

 Comments?

 Regards,

 Anthony Liguori

For Debian systems, those headers are installed by package linux-libc-dev.
There are also packages for cross compilation in emdebian
(linux-libc-dev-mips-cross, linux-libc-dev-powerpc-cross, ...).

Yes, those headers did not always match the features of the current kernel,
so --enable-kvm did not work. This is fixed now - there is a linux-libc-dev
2.6.29-3 which is up-to-date.

So, at the moment I see no need to fill the QEMU source tree with
linux header files.

For special needs the configure option (--kerneldir=PATH)
should be sufficient.

Regards

Stefan Weil

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM x86_64 with SR-IOV..?

2009-05-04 Thread Nicholas A. Bellinger
On Sun, 2009-05-03 at 22:28 -0700, Nicholas A. Bellinger wrote:
  On Mon, 2009-05-04 at 10:09 +0800, Sheng Yang wrote:
Greetings Sheng,
   
So, I have been trying the latest kvm-85 release on a v2.6.30-rc3
checkout from linux-2.6.git on a CentOS 5u3 x86_64 install on Intel
IOH-5520 based dual socket Nehalem board.  I have enabled DMAR and
Interrupt Remapping my KVM host using v2.6.30-rc3 and from what I can
tell, the KVM_CAP_* defines from libkvm are enabled with building kvm-85
after './configure --kerneldir=/usr/src/linux-2.6.git' and the PCI
passthrough code is being enabled in kvm-85/qemu/hw/device-assignment.c
AFAICT..
   
From there, I use the freshly installed qemu-x86_64-system binary to
   
start a Debian 5 x86_64 HVM (that previously had been moving network
packets under Xen for PCIe passthrough). I see the MSI-X interrupt
remapping working on the KVM host for the passed -pcidevice, and the
MMIO mappings from the qemu build that I also saw while using
Xen/qemu-dm built with PCI passthrough are there as well..
   
   
   Hi Nicholas
   
But while the KVM guest is booting, I see the following exception(s)
from qemu-x86_64-system for one of the VFs for a multi-function PCIe
device:
   
BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
   
   This one is mostly harmless.
   
  
  Ok, good to know..  :-)
  
I try with one of the on-board e1000e ports (02:00.0) and I see the same
exception along with some MSI-X exceptions from qemu-x86_64-system in
KVM guest.. However, I am still able to see the e1000e and the other
vxge multi-function device with lspci, but I am unable to dhcp or ping
with the e1000e and VF from multi-function device fails to register the
MSI-X interrupt in the guest..
   
   Did you see the interrupt in the guest and host side?
  
  Ok, I am restarting the e1000e test with a fresh Fedora 11 install and
  KVM host kernel 2.6.29.1-111.fc11.x86_64.   After unbinding and
  attaching the e1000e single-function device at 02:00.0 to pci-stub with:
  
 echo 8086 10d3  /sys/bus/pci/drivers/pci-stub/new_id
 echo :02:00.0  /sys/bus/pci/devices/:02:00.0/driver/unbind
 echo :02:00.0  /sys/bus/pci/drivers/pci-stub/bind 
  
  I see the following the KVM host kernel ring buffer:
  
 e1000e :02:00.0: PCI INT A disabled
 pci-stub :02:00.0: PCI INT A - GSI 17 (level, low) - IRQ 17
 pci-stub :02:00.0: irq 58 for MSI/MSI-X
  

Ok, I also noticed the following output in /proc/interrupts on KVM host
with dual Intel E5520 processors (16 CPUs)

[r...@barret ~]# cat /proc/interrupts | grep MSI
 55: 22  0  0  0  0  0  
0  0  0 94 436974  0  0  0  
0  0   PCI-MSI-edge  eth0-rx-0
 56: 27  0  0  0  0  0  
0  0  0 613054  0  0  15253  0  
0  0   PCI-MSI-edge  eth0-tx-0
 57:  3  0  0  0  0  0  
0  0  0  0  0  0  0  0  
0  0   PCI-MSI-edge  eth0
 58:521  0  0  0  5  0  
0  0  0  0  0  0  0  0  
0  0   PCI-MSI-edge  kvm_assigned_msi_device

eth0 is the other e1000e port at 03:00.0 that is in use on the KVM host,
and it looks like the other e1000e port at 02:00.0 has been setup to
kvm_assigned_msi_device on irq 58.

I also noticed the following after starting a KVM guest in host's ring
buffer (not sure if this has anything to do with -pcidevice usage)

kvm: 3428: cpu6 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu5 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu9 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu1 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu8 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu2 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu3 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu4 unhandled wrmsr: 0xc0010117 data 0
kvm: 3428: cpu0 unhandled wrmsr: 0xc0010117 data 0


I think you can try on-
   board e1000e for MSI-X first. And please ensure correlated driver have 
   been 
   loaded correctly.
  
  nod..
  
And what do you mean by some MSI-X exceptions? Better with 
   the log.
  
  Ok, with the Fedora 11 installed qemu-kemu, I see the expected
  kvm_destroy_phys_mem() statements:
  
  #kvm-host qemu-kvm -m 2048 -smp 8 -pcidevice host=02:00.0 
  lenny64guest1-orig.img 
  BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
  BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
  
  However I still see the following in the KVM guest kernel ring buffer
  running v2.6.30-rc in the HVM guest.
  
  [5.523790] 

Re: [Qemu-devel] [PATCH] kvm: Add helpers for checking and requiring kvm extensions

2009-05-04 Thread Juan Quintela
Avi Kivity a...@redhat.com wrote:

Hi

 diff --git a/kvm-all.c b/kvm-all.c
 index 36659a9..1642a2a 100644
 --- a/kvm-all.c
 +++ b/kvm-all.c
 @@ -64,6 +64,30 @@ struct KVMState
  
  static KVMState *kvm_state;
  
 +int kvm_check_extension(int extension)
 +{
 +int ret;
 +
 +ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, extension);
 +if (ret  0) {
 +fprintf(stderr, KVM_CHECK_EXTENSION failed: %s\n, strerror(errno));
 +exit(1);
 +}
 +return ret;
 +}

Are you sure you want the exit(1) in this case?
With the exit() call, you are unable to check if one extension is
present at all.  And you check the return of the following code.
  s-coalesced_mmio = 0;
  #ifdef KVM_CAP_COALESCED_MMIO
 -ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
 -if (ret  0)
 -s-coalesced_mmio = ret;
 +s-coalesced_mmio = kvm_check_extension(KVM_CAP_COALESCED_MMIO);
  #endif

You can remove the ifdef at this point.

Later, Juan.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm-userspace broken?

2009-05-04 Thread Carlo Marcelo Arenas Belon
On Mon, May 04, 2009 at 09:40:03AM +0300, Avi Kivity wrote:
 Hans de Bruin wrote:
 Avi Kivity wrote:

 kvm-userspace.git has been retired; it's now playing golf in  
 git://git.kernel.org/pub/scm/virt/kvm/retired/kvm-userspace.git.  Use 
 git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git instead.

 The latest tarbal on sourceforge is kvm-85, yet the clone I just made  
 from qemu-kvm 'git descibes' itself as kvm-84-756-gf7d114d. Is that  
 right?

 Try 'git fetch --tags origin'

$ git tag | grep kvm-85 | head -1
kvm-85
$ git describe
kvm-84-773-g0925f80

easily reproducable with a fresh clone.

Carlo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Anthony Liguori wrote:

Sorry this explanation is long winded, but this is a messy situation.

In Linux, there isn't a very consistent policy about userspace kernel 
header inclusion.  On a typical Linux system, you're likely to find 
kernel headers in three places.


glibc headers (/usr/include/{linux,asm})

These headers are installed by glibc.  They very often are based on 
much older kernel versions that the kernel you have in your 
distribution.  For software that depends on these headers, very often 
this means that your software detects features being missing that are 
present on your kernel.  Furthermore, glibc only installs the headers 
it needs so very often certain headers have dependencies that aren't 
met.  A classic example is linux/compiler.h and the broken 
usbdevice_fs.h header that depends on it.  There are still 
distributions today that QEMU doesn't compile on because of this.


Today, most of QEMU's code depends on these headers.

/lib/modules/$(uname -r)/build

These are the kernel headers that are installed as part of your 
kernel.  In general, this is a pretty good place to find the headers 
that are associated with the kernel version you're actually running 
on.  However, these headers are part of the kernel build tree and are 
not always guaranteed to be includable from userspace.




I thought these were for external modules, not for userspace.


random kernel tree

Developers, in particular, like to point things at their random kernel 
trees.  In general though, relying on a full kernel source tree being 
available isn't a good idea.  Kernel headers change dramatically 
across versions too so it's very likely that we would need to have a 
lot of #ifdefs dependent on kernel versions, or some of the uglier 
work arounds we have in usb-linux.c.


I think the best way to avoid #ifdefs and dependencies on 
broken/incomplete glibc headers is to include all of the Linux headers 
we need within QEMU.  The attached patch does just this.


I think there's room for discussion about whether we really want to do 
this.  We could potentially depend on some more common glibc headers 
(like asm/types.h) while bringing in less reliable headers 
(if_tun.h/virtio*).  Including them all seems like the most robust 
solution to me though.


Comments? 


I think we need to use the output of 'make headers-install', which 
removes things like __user and CONFIG_*.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm-userspace broken?

2009-05-04 Thread Avi Kivity

Carlo Marcelo Arenas Belon wrote:

On Mon, May 04, 2009 at 09:40:03AM +0300, Avi Kivity wrote:
  

Hans de Bruin wrote:


Avi Kivity wrote:
  
kvm-userspace.git has been retired; it's now playing golf in  
git://git.kernel.org/pub/scm/virt/kvm/retired/kvm-userspace.git.  Use 
git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git instead.

The latest tarbal on sourceforge is kvm-85, yet the clone I just made  
from qemu-kvm 'git descibes' itself as kvm-84-756-gf7d114d. Is that  
right?
  

Try 'git fetch --tags origin'



$ git tag | grep kvm-85 | head -1
kvm-85
$ git describe
kvm-84-773-g0925f80

easily reproducable with a fresh clone.

  


Looks like the repository diverged after kvm-85rc6; not sure how that 
can happen.  Will merge it back up.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm-userspace broken?

2009-05-04 Thread Avi Kivity

Avi Kivity wrote:

Carlo Marcelo Arenas Belon wrote:

On Mon, May 04, 2009 at 09:40:03AM +0300, Avi Kivity wrote:
 

Hans de Bruin wrote:
   

Avi Kivity wrote:
 
kvm-userspace.git has been retired; it's now playing golf in  
git://git.kernel.org/pub/scm/virt/kvm/retired/kvm-userspace.git.  
Use git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git instead.

The latest tarbal on sourceforge is kvm-85, yet the clone I just 
made  from qemu-kvm 'git descibes' itself as kvm-84-756-gf7d114d. 
Is that  right?
  

Try 'git fetch --tags origin'



$ git tag | grep kvm-85 | head -1
kvm-85
$ git describe
kvm-84-773-g0925f80

easily reproducable with a fresh clone.

  


Looks like the repository diverged after kvm-85rc6; not sure how that 
can happen.  Will merge it back up.




Looks like the tag was set incorrectly; I fixed it up, new tag should be 
up when git.kernel.org catches up.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/04] qemu-kvm: Remove the dependency for phys_ram_base for ipf.c

2009-05-04 Thread Avi Kivity

Jes Sorensen wrote:

Zhang, Xiantao wrote:

Jes Sorensen wrote:
I still can't see the difference with the patch in Avi's tree except 
nvram stuff.  And I believe the global variable you mentioned should 
be only used for nvram. So I propose an incremental patch for that. :)


Hi,

Here is an incremental version of the patch. I think the differences
should be pretty obvious now :-)

It fixes the memcpy issues in the hob and nvram code and also cleans
up the interfaces a lot.

Avi, please add.


Added, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2786468 ] XP SP3 freeze at startup with kvm_intel kernel module v85

2009-05-04 Thread SourceForge.net
Bugs item #2786468, was opened at 2009-05-04 10:18
Message generated for change (Tracker Item Submitted) made by kubrick_fr
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2786468group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Kubrick (kubrick_fr)
Assigned to: Nobody/Anonymous (nobody)
Summary: XP SP3 freeze at startup with kvm_intel kernel module v85

Initial Comment:
Hi,

XP SP3 freeze at startup with kvm_intel kernel module v85. It freezes during 
the resolution change before displaying the Windows XP logo with the 
progression bar. The VM remain open, consuming full CPU, in a kinda 16/9 
display.

qemu-system-x86_64 v85 doesn't work with v85 kvm_intel kernel module but works 
with -no-kvm or v84 kvm_intel kernel module. Also, booting in safe mode works.

Kubrick.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2786468group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM x86_64 with SR-IOV..?

2009-05-04 Thread Sheng Yang
On Monday 04 May 2009 12:36:04 Nicholas A. Bellinger wrote:
 On Mon, 2009-05-04 at 10:09 +0800, Sheng Yang wrote:
  On Monday 04 May 2009 08:53:07 Nicholas A. Bellinger wrote:
   On Sat, 2009-05-02 at 18:22 +0800, Sheng Yang wrote:
On Thu, Apr 30, 2009 at 01:22:54PM -0700, Nicholas A. Bellinger wrote:
 Greetings KVM folks,

 I wondering if any information exists for doing SR-IOV on the new
 VT-d capable chipsets with KVM..?  From what I understand the
 patches for doing this with KVM are floating around, but I have
 been unable to find any user-level docs for actually making it all
 go against a upstream v2.6.30-rc3 code..

 So far I have been doing IOV testing with Xen 3.3 and 3.4.0-pre,
 and I am really hoping to be able to jump to KVM for
 single-function and and then multi-function SR-IOV.  I know that
 the VM migration stuff for IOV in Xen is up and running,  and I
 assume it is being worked in for KVM instance migration as well..? 
 This part is less important (at least for me :-) than getting a
 stable SR-IOV setup running under the KVM hypervisor..  Does anyone
 have any pointers for this..?

 Any comments or suggestions are appreciated!
   
Hi Nicholas
   
The patches are not floating around now. As you know, SR-IOV for
Linux have been in 2.6.30, so then you can use upstream KVM and
qemu-kvm(or recent released kvm-85) with 2.6.30-rc3 as host kernel.
And some time ago, there are several SRIOV related patches for
qemu-kvm, and now they all have been checked in.
   
And for KVM, the extra document is not necessary, for you can simple
assign a VF to guest like any other devices. And how to create VF is
specific for each device driver. So just create a VF then assign it
to KVM guest is fine.
  
   Greetings Sheng,
  
   So, I have been trying the latest kvm-85 release on a v2.6.30-rc3
   checkout from linux-2.6.git on a CentOS 5u3 x86_64 install on Intel
   IOH-5520 based dual socket Nehalem board.  I have enabled DMAR and
   Interrupt Remapping my KVM host using v2.6.30-rc3 and from what I can
   tell, the KVM_CAP_* defines from libkvm are enabled with building
   kvm-85 after './configure --kerneldir=/usr/src/linux-2.6.git' and the
   PCI passthrough code is being enabled in
   kvm-85/qemu/hw/device-assignment.c AFAICT..
  
   From there, I use the freshly installed qemu-x86_64-system binary to
  
   start a Debian 5 x86_64 HVM (that previously had been moving network
   packets under Xen for PCIe passthrough). I see the MSI-X interrupt
   remapping working on the KVM host for the passed -pcidevice, and the
   MMIO mappings from the qemu build that I also saw while using
   Xen/qemu-dm built with PCI passthrough are there as well..
 
  Hi Nicholas
 
   But while the KVM guest is booting, I see the following exception(s)
   from qemu-x86_64-system for one of the VFs for a multi-function PCIe
   device:
  
   BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
 
  This one is mostly harmless.

 Ok, good to know..  :-)

   I try with one of the on-board e1000e ports (02:00.0) and I see the
   same exception along with some MSI-X exceptions from qemu-x86_64-system
   in KVM guest.. However, I am still able to see the e1000e and the other
   vxge multi-function device with lspci, but I am unable to dhcp or ping
   with the e1000e and VF from multi-function device fails to register the
   MSI-X interrupt in the guest..
 
  Did you see the interrupt in the guest and host side?

 Ok, I am restarting the e1000e test with a fresh Fedora 11 install and
 KVM host kernel 2.6.29.1-111.fc11.x86_64.   After unbinding and
 attaching the e1000e single-function device at 02:00.0 to pci-stub with:

echo 8086 10d3  /sys/bus/pci/drivers/pci-stub/new_id
echo :02:00.0  /sys/bus/pci/devices/:02:00.0/driver/unbind
echo :02:00.0  /sys/bus/pci/drivers/pci-stub/bind

 I see the following the KVM host kernel ring buffer:

e1000e :02:00.0: PCI INT A disabled
pci-stub :02:00.0: PCI INT A - GSI 17 (level, low) - IRQ 17
pci-stub :02:00.0: irq 58 for MSI/MSI-X

   I think you can try on-
  board e1000e for MSI-X first. And please ensure correlated driver have
  been loaded correctly.

 nod..

   And what do you mean by some MSI-X exceptions? Better with
  the log.

 Ok, with the Fedora 11 installed qemu-kemu, I see the expected
 kvm_destroy_phys_mem() statements:

 #kvm-host qemu-kvm -m 2048 -smp 8 -pcidevice host=02:00.0
 lenny64guest1-orig.img BUG: kvm_destroy_phys_mem: invalid parameters
 (slot=-1)
 BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)

 However I still see the following in the KVM guest kernel ring buffer
 running v2.6.30-rc in the HVM guest.

 [5.523790] ACPI: PCI Interrupt Link [LNKA] enabled at IRQ 10
 [5.524582] e1000e :00:05.0: PCI INT A - Link[LNKA] - GSI 10
 (level, high) - IRQ 10 [5.525710] e1000e :00:05.0: setting 

Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Edgar E. Iglesias
On Mon, May 04, 2009 at 08:51:21AM +0200, Stefan Weil wrote:
 Anthony Liguori schrieb:
  Sorry this explanation is long winded, but this is a messy situation.
 
  In Linux, there isn't a very consistent policy about userspace kernel
  header inclusion.  On a typical Linux system, you're likely to find
  kernel headers in three places.
 
  glibc headers (/usr/include/{linux,asm})
 
  These headers are installed by glibc.  They very often are based on
  much older kernel versions that the kernel you have in your
  distribution.  For software that depends on these headers, very often
  this means that your software detects features being missing that are
  present on your kernel.  Furthermore, glibc only installs the headers
  it needs so very often certain headers have dependencies that aren't
  met.  A classic example is linux/compiler.h and the broken
  usbdevice_fs.h header that depends on it.  There are still
  distributions today that QEMU doesn't compile on because of this.
 
  Today, most of QEMU's code depends on these headers.
 
  /lib/modules/$(uname -r)/build
 
  These are the kernel headers that are installed as part of your
  kernel.  In general, this is a pretty good place to find the headers
  that are associated with the kernel version you're actually running
  on.  However, these headers are part of the kernel build tree and are
  not always guaranteed to be includable from userspace.
 
  random kernel tree
 
  Developers, in particular, like to point things at their random kernel
  trees.  In general though, relying on a full kernel source tree being
  available isn't a good idea.  Kernel headers change dramatically
  across versions too so it's very likely that we would need to have a
  lot of #ifdefs dependent on kernel versions, or some of the uglier
  work arounds we have in usb-linux.c.
 
  I think the best way to avoid #ifdefs and dependencies on
  broken/incomplete glibc headers is to include all of the Linux headers
  we need within QEMU.  The attached patch does just this.
 
  I think there's room for discussion about whether we really want to do
  this.  We could potentially depend on some more common glibc headers
  (like asm/types.h) while bringing in less reliable headers
  (if_tun.h/virtio*).  Including them all seems like the most robust
  solution to me though.
 
  Comments?
 
  Regards,
 
  Anthony Liguori
 
 For Debian systems, those headers are installed by package linux-libc-dev.
 There are also packages for cross compilation in emdebian
 (linux-libc-dev-mips-cross, linux-libc-dev-powerpc-cross, ...).
 
 Yes, those headers did not always match the features of the current kernel,
 so --enable-kvm did not work. This is fixed now - there is a linux-libc-dev
 2.6.29-3 which is up-to-date.
 
 So, at the moment I see no need to fill the QEMU source tree with
 linux header files.

I agree. The kvm issue seems unfortunate and I don't have any suggestions
on how to avoid it in the future but for other issues, like restructured
header files or renamed struct members etc I think there is a risk we
become sloppy in keeping up to date with current practices.

I don't feel very strongly about it but my gut feeling tells me we
shouldn't be doing this.

Cheers
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: No disable_irq for MSI/MSI-X interrupt on device assignment

2009-05-04 Thread Avi Kivity

Sheng Yang wrote:

Disable interrupt at interrupt handler and enable it when guest ack is for
the level triggered interrupt, to prevent reinjected interrupt. MSI/MSI-X don't
need it.

One possible problem is multiply same vector interrupt injected between irq
handler and scheduled work handler would be merged as one for MSI/MSI-X.
But AFAIK, the drivers handle it well.

The patch fixed the oplin card performance issue(MSI-X performance is half of
MSI/INTx).

 
 	schedule_work(assigned_dev-interrupt_work);
 
-	disable_irq_nosync(irq);

-   assigned_dev-host_irq_disabled = true;
+   if (assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_INTX) {
+   disable_irq_nosync(irq);
+   assigned_dev-host_irq_disabled = true;
+   }
 
 out:

spin_unlock_irqrestore(assigned_dev-assigned_dev_lock, flags);
  


I don't have the spin_lock_irqrestore() here.  The patch applies, but 
with fuzz.  Am I missing a patch?


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] fix apic_debug instances

2009-05-04 Thread Avi Kivity

Glauber Costa wrote:

Appearently nobody turned this on in a while...
setting apic_debug to something compilable, generates
some errors. This patch fixes it.

  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: No disable_irq for MSI/MSI-X interrupt on device assignment

2009-05-04 Thread Sheng Yang
On Monday 04 May 2009 16:25:55 Avi Kivity wrote:
 Sheng Yang wrote:
  Disable interrupt at interrupt handler and enable it when guest ack is
  for the level triggered interrupt, to prevent reinjected interrupt.
  MSI/MSI-X don't need it.
 
  One possible problem is multiply same vector interrupt injected between
  irq handler and scheduled work handler would be merged as one for
  MSI/MSI-X. But AFAIK, the drivers handle it well.
 
  The patch fixed the oplin card performance issue(MSI-X performance is
  half of MSI/INTx).
 
 
  schedule_work(assigned_dev-interrupt_work);
 
  -   disable_irq_nosync(irq);
  -   assigned_dev-host_irq_disabled = true;
  +   if (assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_INTX) {
  +   disable_irq_nosync(irq);
  +   assigned_dev-host_irq_disabled = true;
  +   }
 
   out:
  spin_unlock_irqrestore(assigned_dev-assigned_dev_lock, flags);

 I don't have the spin_lock_irqrestore() here.  The patch applies, but
 with fuzz.  Am I missing a patch?

Oh, it's Marcelo's patchset...

[patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race 
fix

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] don't start cpu main loop while there is still init work to do.

2009-05-04 Thread Avi Kivity

Glauber Costa wrote:

As soon as we call kvm_init_vcpu(), we start the vcpu thread.
However, there is still things that has to be done, as soon
as the new CPUState is created. Examples include initializing the
apic, halting the cpu, etc.

Without this patch, it is possible that the cpu may want to start
using those things, before initializing them, leading to segfaults.
We introduce another state variable, initialized, meaning that
the cpu is already created, but not totally initialized,
to serialize it.

Before this patch:
(qemu) cpu_set X online = segfaults ~ 80 % of the time
After this patch:
(qemu) cpu_set X online = works.

  


Is it possible to move all those things to the vcpu thread, so it 
serializes naturally?


I'd like to avoid vcpu ioctls from more than one thread, in case we ever 
move to a syscall implementation.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm-userspace broken?

2009-05-04 Thread Carlo Marcelo Arenas Belon
On Mon, May 04, 2009 at 11:14:58AM +0300, Avi Kivity wrote:
 Avi Kivity wrote:
 Carlo Marcelo Arenas Belon wrote:
 On Mon, May 04, 2009 at 09:40:03AM +0300, Avi Kivity wrote:
  
 Hans de Bruin wrote:

 Avi Kivity wrote:
  
 kvm-userspace.git has been retired; it's now playing golf in   
 git://git.kernel.org/pub/scm/virt/kvm/retired/kvm-userspace.git. 
  Use git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git 
 instead.
 
 The latest tarbal on sourceforge is kvm-85, yet the clone I just  
 made  from qemu-kvm 'git descibes' itself as kvm-84-756-gf7d114d. 
 Is that  right?
   
 Try 'git fetch --tags origin'
 

 $ git tag | grep kvm-85 | head -1
 kvm-85
 $ git describe
 kvm-84-773-g0925f80

 easily reproducable with a fresh clone.

 Looks like the repository diverged after kvm-85rc6; not sure how that  
 can happen.  Will merge it back up.

 Looks like the tag was set incorrectly; I fixed it up, new tag should be  
 up when git.kernel.org catches up.

$ git fetch --tags
remote: Counting objects: 1, done.
remote: Total 1 (delta 0), reused 0 (delta 0)
Unpacking objects: 100% (1/1), done.
From git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm
 - [tag update]  kvm-85 - kvm-85
$ git describe
kvm-85-253-g0925f80

Carlo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] Present kvm with corret apic phys id.

2009-05-04 Thread Avi Kivity

Glauber Costa wrote:

KVM will 24-shift bits in addr 0x20 (APIC_ID) before actually
using it. We currently load phys_id as s-id. After shifted
by 24 bits, it will result in a meaningless value. We should really
be doing s-id  24, which, after shifted, will lead to the correct
value.

This is for the load function. save has the invert problem.
  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: No disable_irq for MSI/MSI-X interrupt on device assignment

2009-05-04 Thread Avi Kivity

Sheng Yang wrote:

On Monday 04 May 2009 16:25:55 Avi Kivity wrote:
  

Sheng Yang wrote:


Disable interrupt at interrupt handler and enable it when guest ack is
for the level triggered interrupt, to prevent reinjected interrupt.
MSI/MSI-X don't need it.

One possible problem is multiply same vector interrupt injected between
irq handler and scheduled work handler would be merged as one for
MSI/MSI-X. But AFAIK, the drivers handle it well.

The patch fixed the oplin card performance issue(MSI-X performance is
half of MSI/INTx).


schedule_work(assigned_dev-interrupt_work);

-   disable_irq_nosync(irq);
-   assigned_dev-host_irq_disabled = true;
+   if (assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_INTX) {
+   disable_irq_nosync(irq);
+   assigned_dev-host_irq_disabled = true;
+   }

 out:
spin_unlock_irqrestore(assigned_dev-assigned_dev_lock, flags);
  

I don't have the spin_lock_irqrestore() here.  The patch applies, but
with fuzz.  Am I missing a patch?



Oh, it's Marcelo's patchset...

[patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx race 
fix
  


Okay, I am working my queue in reverse order :)

But please note dependencies on other patches for me.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: No disable_irq for MSI/MSI-X interrupt on device assignment

2009-05-04 Thread Sheng Yang
On Monday 04 May 2009 16:34:00 Avi Kivity wrote:
 Sheng Yang wrote:
  On Monday 04 May 2009 16:25:55 Avi Kivity wrote:
  Sheng Yang wrote:
  Disable interrupt at interrupt handler and enable it when guest ack is
  for the level triggered interrupt, to prevent reinjected interrupt.
  MSI/MSI-X don't need it.
 
  One possible problem is multiply same vector interrupt injected between
  irq handler and scheduled work handler would be merged as one for
  MSI/MSI-X. But AFAIK, the drivers handle it well.
 
  The patch fixed the oplin card performance issue(MSI-X performance is
  half of MSI/INTx).
 
 
schedule_work(assigned_dev-interrupt_work);
 
  - disable_irq_nosync(irq);
  - assigned_dev-host_irq_disabled = true;
  + if (assigned_dev-irq_requested_type  KVM_DEV_IRQ_GUEST_INTX) {
  + disable_irq_nosync(irq);
  + assigned_dev-host_irq_disabled = true;
  + }
 
   out:
spin_unlock_irqrestore(assigned_dev-assigned_dev_lock, flags);
 
  I don't have the spin_lock_irqrestore() here.  The patch applies, but
  with fuzz.  Am I missing a patch?
 
  Oh, it's Marcelo's patchset...
 
  [patch 0/4] use smp_send_reschedule in vcpu_kick / assigned dev host intx
  race fix

 Okay, I am working my queue in reverse order :)

 But please note dependencies on other patches for me.

Yes, sorry for totally forgot (mine based on his patchset)... :(

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/04] qemu-kvm: other archs should maintain memory mapping also.

2009-05-04 Thread Avi Kivity

Jes Sorensen wrote:

Avi Kivity wrote:

Currently, use TARGET_I386 to comment out the mapping machanism
for other archs, but mapping machanism should be useful for other archs
to maintain guest's memory mapping. 


Hollis, does this work for you?

If now, you can add a new define KVM_WANT_MAPPING or something, and 
define it for I386 and IA64.


Hi,

This is the must_use_alias patch mentioned in my previous email.




+int destroy_region_works = 0;
+
  


Global name, prefix with kvm_.  Does it actually need to be global?

 
Index: qemu-kvm/target-i386/qemu-kvm-arch.h

===
--- /dev/null
+++ qemu-kvm/target-i386/qemu-kvm-arch.h
@@ -0,0 +1,17 @@
+/*
+ * qemu/kvm x86 integration
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ * Copyright (C) 2009 Silicon Graphics Inc.
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+#ifndef QEMU_KVM_ARCH_H
+#define QEMU_KVM_ARCH_H
+
+extern int destroy_region_works;
+
+extern int kvm_arch_must_use_aliases_source(target_phys_addr_t addr);
+extern int kvm_arch_must_use_aliases_target(target_phys_addr_t addr);
  


The header depends on target_phys_addr_t, so it must include whatever 
defines it.



--- /dev/null
+++ qemu-kvm/target-ia64/qemu-kvm-arch.h
@@ -0,0 +1,22 @@
+/*
+ * qemu/kvm ia64 integration
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ * Copyright (C) 2009 Silicon Graphics Inc.
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+#ifndef QEMU_KVM_ARCH_H
+#define QEMU_KVM_ARCH_H
+
+static inline int kvm_arch_must_use_aliases_source(target_phys_addr_t addr)
+{
+return 0;
+}
+
+static inline int kvm_arch_must_use_aliases_target(target_phys_addr_t addr)
+{
+return 0;
+}
+
+#endif
  


Missing other archs...

Instead of duplicating this for every arch, you can have a #define that 
tells you if you want non-trivial arch definitions, and supply the 
trivial definitions in qemu-kvm.h.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 02/04] qemu-kvm: Add header files for ia64.

2009-05-04 Thread Avi Kivity

Jes Sorensen wrote:

Zhang, Xiantao wrote:

Hi, Jes
There should be no issue here.  You may refer to 
qemu-kvm/kvm/kernel/arch/x86/include/asm/kvm.h, and it also includes 
this stuff.  I remebered this stuff was used to solve the issues when 
uses kernel header files in userspace and the author should be Avi:-) 
You know, kernel header may include CONFIG_$ARCH macros but they are 
meaningless in userspace, so needs a way to solve the gap. But anyway 
we can do some cleanup or introduce new ways to solve the issue. Xiantao


Hi Xiantao,

I noticed it's in the x86 code as well - but I still think it's silly to
add to the ia64 tree when we don't need it.

Of course, Avi should fix up his headers too :-)



It's autogenerated; but we do need a cleaner solution.

qemu upstream is working on a more inclusive Linux header takeover; we 
can use that when it's done.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Anthony Liguori wrote:

Sorry this explanation is long winded, but this is a messy situation.

In Linux, there isn't a very consistent policy about userspace kernel 
header inclusion.  On a typical Linux system, you're likely to find 
kernel headers in three places.


glibc headers (/usr/include/{linux,asm})

These headers are installed by glibc.  They very often are based on 
much older kernel versions that the kernel you have in your 
distribution.  For software that depends on these headers, very often 
this means that your software detects features being missing that are 
present on your kernel.  Furthermore, glibc only installs the headers 
it needs so very often certain headers have dependencies that aren't 
met.  A classic example is linux/compiler.h and the broken 
usbdevice_fs.h header that depends on it.  There are still 
distributions today that QEMU doesn't compile on because of this.


Today, most of QEMU's code depends on these headers.

/lib/modules/$(uname -r)/build

These are the kernel headers that are installed as part of your 
kernel.  In general, this is a pretty good place to find the headers 
that are associated with the kernel version you're actually running 
on.  However, these headers are part of the kernel build tree and are 
not always guaranteed to be includable from userspace.


random kernel tree

Developers, in particular, like to point things at their random kernel 
trees.  In general though, relying on a full kernel source tree being 
available isn't a good idea.  Kernel headers change dramatically 
across versions too so it's very likely that we would need to have a 
lot of #ifdefs dependent on kernel versions, or some of the uglier 
work arounds we have in usb-linux.c.


I think the best way to avoid #ifdefs and dependencies on 
broken/incomplete glibc headers is to include all of the Linux headers 
we need within QEMU.  The attached patch does just this.


I think there's room for discussion about whether we really want to do 
this.  We could potentially depend on some more common glibc headers 
(like asm/types.h) while bringing in less reliable headers 
(if_tun.h/virtio*).  Including them all seems like the most robust 
solution to me though.


Comments?



Thinking again about it, this is not really necessary.

In general a distro provides kernel headers matched to the running 
kernel.  For example F10 provides 
kernel-headers-2.6.27.21-170.2.56.fc10.x86_64 to go along with 
kernel-2.6.27.21-170.2.56.fc10.x86_64.  So a user running a distro 
kernel (the majority, given that most people don't inflict pain upon 
themselves unnecessarily) will have exactly the features exported by the 
kernel.


If a user compiles their own kernel, they will also have the complete 
kernel sources.  We could use --kerneldir, perhaps requiring that the 
user do a 'make headers-install' first and point kerneldir to the result.


The only deviation for this is kvm, which also comes as an external 
kernel module and therefore cannot rely on the installed kernel 
headers.  We could make the external module package (kvm-kmod) supply 
its own set of headers and install them somewhere, or we can carry them 
in qemu (much more convenient).  But I don't think we need to carry such 
a large subset of the kernel headers (which is liable to change as 
kernel headers are added).


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 0/8] virtio: add guest MSI-X support

2009-05-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

So what I see is transports providing something like:

struct virtio_interrupt_mapping {
int virtqueue;
int interrupt;
};

map_vqs_to_interrupt(dev, struct virtio_interrupt_mapping *, int nvirtqueues);
unmap_vqs(dev);
  
  

Isn't that the same thing?  Please explain the flow.



So to map vq 0 to vector 0, vq 1 to vector 1 and vq 2 to vector 2 the driver 
would do:

struct virtio_interrupt_mapping mapping[3] = { {0, 0}, {1, 1}, {2, 2} };
vec = map_vqs_to_interrupt(dev, mapping, 3);
if (vec) {
  error handling
}

and then find_vq as usual.
  


Yes, that works.

Given that pci_enable_msix() can fail, we can put the retry loop in 
virtio-pci, and instead of a static mapping, supply a dynamic mapping:


   static void get_vq_interrupt(..., int nr_interrupts, int vq)
   {
   /* reserve interrupt 0 to config changes; round-robin vqs to 
interrupts */

   return 1 + (vq % (nr_interrupts - 1));
   }

   driver_init()
   {
   map_vqs_to_interrupt(dev, get_vq_interrupt);
   }

map_vqs_to_interrupts() would call get_vq_interrupt() for each vq, 
assuming the maximum nr_interrupts, and retry with smaller nr_interrupts 
on failure.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/04] qemu-kvm: other archs should maintain memory mapping also.

2009-05-04 Thread Jes Sorensen

Avi Kivity wrote:

Jes Sorensen wrote:

+int destroy_region_works = 0;


Global name, prefix with kvm_.  Does it actually need to be global?


Gone, now local to qemu-kvm-x86.c. I moved the initializer into
kvm_arch_create_context() instead.

The header depends on target_phys_addr_t, so it must include whatever 
defines it.


Added an #include cpu-all.h which defines it.


Missing other archs...

Instead of duplicating this for every arch, you can have a #define that 
tells you if you want non-trivial arch definitions, and supply the 
trivial definitions in qemu-kvm.h.


Done, I also added a PPC header file - which may or may not be wanted
at this point. You can just cut it out if you don't think it should be
added.

What do you think of this version then?

Cheers,
Jes


Move must_use_aliases_{source,target} functions to qemu-kvm-arch and
introduce qemu-kvm-arch.h header files, making it possible to inline
functions that a noops on a given architecture.

This removes a lot of ugly #ifdef TARGET_I386 from qemu-kvm.c

Signed-off-by: Jes Sorensen j...@sgi.com
---
 qemu-kvm-x86.c  |   25 +++-
 qemu-kvm.c  |   53 +++-
 qemu-kvm.h  |   13 ++
 target-i386/qemu-kvm-arch.h |   17 ++
 target-ia64/qemu-kvm-arch.h |   14 +++
 target-ppc/qemu-kvm-arch.h  |   14 +++
 6 files changed, 91 insertions(+), 45 deletions(-)

Index: qemu-kvm/qemu-kvm-x86.c
===
--- qemu-kvm.orig/qemu-kvm-x86.c
+++ qemu-kvm/qemu-kvm-x86.c
@@ -28,6 +28,8 @@
 
 static int lm_capable_kernel;
 
+static int destroy_region_works = 0;
+
 int kvm_qemu_create_memory_alias(uint64_t phys_start,
  uint64_t len,
  uint64_t target_phys)
@@ -57,7 +59,10 @@
 for (i = 0; i  kvm_msr_list-nmsrs; ++i)
 	if (kvm_msr_list-indices[i] == MSR_STAR)
 	kvm_has_msr_star = 1;
-	return 0;
+
+destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
+
+return 0;
 }
 
 static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
@@ -856,3 +861,21 @@
 struct ioperm_data *data = _data;
 ioperm(data-start_port, data-num, data-turn_on);
 }
+
+int kvm_arch_must_use_aliases_source(target_phys_addr_t addr)
+{
+if (destroy_region_works)
+return false;
+if (addr == 0xa || addr == 0xa8000)
+return true;
+return false;
+}
+
+int kvm_arch_must_use_aliases_target(target_phys_addr_t addr)
+{
+if (destroy_region_works)
+return false;
+if (addr = 0xe000  addr  0x1ull)
+return true;
+return false;
+}
Index: qemu-kvm/qemu-kvm.c
===
--- qemu-kvm.orig/qemu-kvm.c
+++ qemu-kvm/qemu-kvm.c
@@ -767,10 +767,6 @@
 return 0;
 }
 
-#ifdef TARGET_I386
-static int destroy_region_works = 0;
-#endif
-
 int kvm_qemu_create_context(void)
 {
 int r;
@@ -795,9 +791,6 @@
 return -1;
 }
 }
-#ifdef TARGET_I386
-destroy_region_works = kvm_destroy_memory_region_works(kvm_context);
-#endif
 
 if (kvm_irqchip  kvm_has_gsi_routing(kvm_context)) {
 kvm_clear_gsi_routes(kvm_context);
@@ -829,24 +822,6 @@
 }
 
 #ifdef TARGET_I386
-static int must_use_aliases_source(target_phys_addr_t addr)
-{
-if (destroy_region_works)
-return false;
-if (addr == 0xa || addr == 0xa8000)
-return true;
-return false;
-}
-
-static int must_use_aliases_target(target_phys_addr_t addr)
-{
-if (destroy_region_works)
-return false;
-if (addr = 0xe000  addr  0x1ull)
-return true;
-return false;
-}
-
 static struct mapping {
 target_phys_addr_t phys;
 ram_addr_t ram;
@@ -905,14 +880,13 @@
 area_flags = phys_offset  ~TARGET_PAGE_MASK;
 
 if (area_flags != IO_MEM_RAM) {
-#ifdef TARGET_I386
-if (must_use_aliases_source(start_addr)) {
+if (kvm_arch_must_use_aliases_source(start_addr)) {
 kvm_destroy_memory_alias(kvm_context, start_addr);
 return;
 }
-if (must_use_aliases_target(start_addr))
+if (kvm_arch_must_use_aliases_target(start_addr))
 return;
-#endif
+
 while (size  0) {
 p = find_mapping(start_addr);
 if (p) {
@@ -936,8 +910,7 @@
 if (area_flags = TLB_MMIO)
 return;
 
-#ifdef TARGET_I386
-if (must_use_aliases_source(start_addr)) {
+if (kvm_arch_must_use_aliases_source(start_addr)) {
 p = find_ram_mapping(phys_offset);
 if (p) {
 kvm_create_memory_alias(kvm_context, start_addr, size,
@@ -945,7 +918,6 @@
 }
 return;
 }
-#endif
 
 r = kvm_register_phys_mem(kvm_context, start_addr,
   qemu_get_ram_ptr(phys_offset),
@@ -1256,10 +1228,9 @@
 if (log)
 	

Re: [PATCH 1/2] Fix cross vendor migration issue with unusable bit

2009-05-04 Thread Avi Kivity

Andre Przywara wrote:

AMDs VMCB does not have an explicit unusable segment descriptor field,
so we emulate it by using not present. This has to be setup before
the fixups, because this field is used there.

  


Applied both, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM x86_64 with SR-IOV..?

2009-05-04 Thread Sheng Yang
On Monday 04 May 2009 17:11:59 Nicholas A. Bellinger wrote:
 On Mon, 2009-05-04 at 16:20 +0800, Sheng Yang wrote:
  On Monday 04 May 2009 12:36:04 Nicholas A. Bellinger wrote:
   On Mon, 2009-05-04 at 10:09 +0800, Sheng Yang wrote:
On Monday 04 May 2009 08:53:07 Nicholas A. Bellinger wrote:
 On Sat, 2009-05-02 at 18:22 +0800, Sheng Yang wrote:
  On Thu, Apr 30, 2009 at 01:22:54PM -0700, Nicholas A. Bellinger 
wrote:
   Greetings KVM folks,
  
   I wondering if any information exists for doing SR-IOV on the
   new VT-d capable chipsets with KVM..?  From what I understand
   the patches for doing this with KVM are floating around, but I
   have been unable to find any user-level docs for actually
   making it all go against a upstream v2.6.30-rc3 code..
  
   So far I have been doing IOV testing with Xen 3.3 and
   3.4.0-pre, and I am really hoping to be able to jump to KVM for
   single-function and and then multi-function SR-IOV.  I know
   that the VM migration stuff for IOV in Xen is up and running, 
   and I assume it is being worked in for KVM instance migration
   as well..? This part is less important (at least for me :-)
   than getting a stable SR-IOV setup running under the KVM
   hypervisor..  Does anyone have any pointers for this..?
  
   Any comments or suggestions are appreciated!
 
  Hi Nicholas
 
  The patches are not floating around now. As you know, SR-IOV for
  Linux have been in 2.6.30, so then you can use upstream KVM and
  qemu-kvm(or recent released kvm-85) with 2.6.30-rc3 as host
  kernel. And some time ago, there are several SRIOV related
  patches for qemu-kvm, and now they all have been checked in.
 
  And for KVM, the extra document is not necessary, for you can
  simple assign a VF to guest like any other devices. And how to
  create VF is specific for each device driver. So just create a VF
  then assign it to KVM guest is fine.

 Greetings Sheng,

 So, I have been trying the latest kvm-85 release on a v2.6.30-rc3
 checkout from linux-2.6.git on a CentOS 5u3 x86_64 install on Intel
 IOH-5520 based dual socket Nehalem board.  I have enabled DMAR and
 Interrupt Remapping my KVM host using v2.6.30-rc3 and from what I
 can tell, the KVM_CAP_* defines from libkvm are enabled with
 building kvm-85 after './configure
 --kerneldir=/usr/src/linux-2.6.git' and the PCI passthrough code is
 being enabled in
 kvm-85/qemu/hw/device-assignment.c AFAICT..

 From there, I use the freshly installed qemu-x86_64-system binary
  to

 start a Debian 5 x86_64 HVM (that previously had been moving
 network packets under Xen for PCIe passthrough). I see the MSI-X
 interrupt remapping working on the KVM host for the passed
 -pcidevice, and the MMIO mappings from the qemu build that I also
 saw while using Xen/qemu-dm built with PCI passthrough are there as
 well..
   
Hi Nicholas
   
 But while the KVM guest is booting, I see the following
 exception(s) from qemu-x86_64-system for one of the VFs for a
 multi-function PCIe device:

 BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
   
This one is mostly harmless.
  
   Ok, good to know..  :-)
  
 I try with one of the on-board e1000e ports (02:00.0) and I see the
 same exception along with some MSI-X exceptions from
 qemu-x86_64-system in KVM guest.. However, I am still able to see
 the e1000e and the other vxge multi-function device with lspci, but
 I am unable to dhcp or ping with the e1000e and VF from
 multi-function device fails to register the MSI-X interrupt in the
 guest..
   
Did you see the interrupt in the guest and host side?
  
   Ok, I am restarting the e1000e test with a fresh Fedora 11 install and
   KVM host kernel 2.6.29.1-111.fc11.x86_64.   After unbinding and
   attaching the e1000e single-function device at 02:00.0 to pci-stub
   with:
  
  echo 8086 10d3  /sys/bus/pci/drivers/pci-stub/new_id
  echo :02:00.0  /sys/bus/pci/devices/:02:00.0/driver/unbind
  echo :02:00.0  /sys/bus/pci/drivers/pci-stub/bind
  
   I see the following the KVM host kernel ring buffer:
  
  e1000e :02:00.0: PCI INT A disabled
  pci-stub :02:00.0: PCI INT A - GSI 17 (level, low) - IRQ 17
  pci-stub :02:00.0: irq 58 for MSI/MSI-X
  
 I think you can try on-
board e1000e for MSI-X first. And please ensure correlated driver
have been loaded correctly.
  
   nod..
  
 And what do you mean by some MSI-X exceptions? Better with
the log.
  
   Ok, with the Fedora 11 installed qemu-kemu, I see the expected
   kvm_destroy_phys_mem() statements:
  
   #kvm-host qemu-kvm -m 2048 -smp 8 -pcidevice host=02:00.0
   lenny64guest1-orig.img BUG: kvm_destroy_phys_mem: invalid parameters
   (slot=-1)
   BUG: 

Re: [PATCH 04/04] qemu-kvm: other archs should maintain memory mapping also.

2009-05-04 Thread Jes Sorensen

Avi Kivity wrote:

Avi Kivity wrote:

This is the one implementing the KVM_WANT_MAPPING change.

There is in fact a call to drop_mapping() outside any #ifdef (in 
kvm_cpu_register_physical_memory()).  I'm confused... maybe we should 
make this code unconditional.


Hi Avi,

I don't follow this - if you apply my patch
0005-qemu-kvm-arch-mapping.patch this is no longer the case.

Cheers,
Jes
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: fix compiler warning

2009-05-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

kvm-common.h:25:7: warning: __ia64__ is not defined
  


Went in by another route.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: make clean should propagate into libkvm directory

2009-05-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

make 'clean' target propage into libkvm if it's enabled

  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] qemu-kvm: make kvm_create_pit static

2009-05-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

libkvm-x86.c:55: warning: no previous prototype for ‘kvm_create_pit’

  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/04] qemu-kvm: other archs should maintain memory mapping also.

2009-05-04 Thread Avi Kivity

Avi Kivity wrote:

Jes Sorensen wrote:

Avi Kivity wrote:

Hollis, does this work for you?

If now, you can add a new define KVM_WANT_MAPPING or something, and 
define it for I386 and IA64.



Hi,

This is the one implementing the KVM_WANT_MAPPING change.

Cheers,
Jes




There is in fact a call to drop_mapping() outside any #ifdef (in 
kvm_cpu_register_physical_memory()).  I'm confused... maybe we should 
make this code unconditional.





--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-kmod: fix build on kernels with kvm trace set

2009-05-04 Thread Avi Kivity

Michael S. Tsirkin wrote:

CONFIG_KVM_TRACE in kernel conflicts with the definition
in external module. external-module-compat-comm.h tried
to work around this, but this didn't work as some
code still does #include linux/autoconf.h
directly.

Solve this differently by s/CONFIG_KVM_TRACE/CONFIG_KMOD_KVM_TRACE/
in awk. Had to tighten regular expressions in hack-module.awk
so that they don't trigger on kvm_host.h .
  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM x86_64 with SR-IOV..?

2009-05-04 Thread Nicholas A. Bellinger
On Mon, 2009-05-04 at 17:49 +0800, Sheng Yang wrote:
 On Monday 04 May 2009 17:11:59 Nicholas A. Bellinger wrote:
  On Mon, 2009-05-04 at 16:20 +0800, Sheng Yang wrote:
   On Monday 04 May 2009 12:36:04 Nicholas A. Bellinger wrote:
On Mon, 2009-05-04 at 10:09 +0800, Sheng Yang wrote:
 On Monday 04 May 2009 08:53:07 Nicholas A. Bellinger wrote:
  On Sat, 2009-05-02 at 18:22 +0800, Sheng Yang wrote:
   On Thu, Apr 30, 2009 at 01:22:54PM -0700, Nicholas A. Bellinger 
 wrote:
Greetings KVM folks,
   
I wondering if any information exists for doing SR-IOV on the
new VT-d capable chipsets with KVM..?  From what I understand
the patches for doing this with KVM are floating around, but I
have been unable to find any user-level docs for actually
making it all go against a upstream v2.6.30-rc3 code..
   
So far I have been doing IOV testing with Xen 3.3 and
3.4.0-pre, and I am really hoping to be able to jump to KVM for
single-function and and then multi-function SR-IOV.  I know
that the VM migration stuff for IOV in Xen is up and running, 
and I assume it is being worked in for KVM instance migration
as well..? This part is less important (at least for me :-)
than getting a stable SR-IOV setup running under the KVM
hypervisor..  Does anyone have any pointers for this..?
   
Any comments or suggestions are appreciated!
  
   Hi Nicholas
  
   The patches are not floating around now. As you know, SR-IOV for
   Linux have been in 2.6.30, so then you can use upstream KVM and
   qemu-kvm(or recent released kvm-85) with 2.6.30-rc3 as host
   kernel. And some time ago, there are several SRIOV related
   patches for qemu-kvm, and now they all have been checked in.
  
   And for KVM, the extra document is not necessary, for you can
   simple assign a VF to guest like any other devices. And how to
   create VF is specific for each device driver. So just create a VF
   then assign it to KVM guest is fine.
 
  Greetings Sheng,
 
  So, I have been trying the latest kvm-85 release on a v2.6.30-rc3
  checkout from linux-2.6.git on a CentOS 5u3 x86_64 install on Intel
  IOH-5520 based dual socket Nehalem board.  I have enabled DMAR and
  Interrupt Remapping my KVM host using v2.6.30-rc3 and from what I
  can tell, the KVM_CAP_* defines from libkvm are enabled with
  building kvm-85 after './configure
  --kerneldir=/usr/src/linux-2.6.git' and the PCI passthrough code is
  being enabled in
  kvm-85/qemu/hw/device-assignment.c AFAICT..
 
  From there, I use the freshly installed qemu-x86_64-system binary
   to
 
  start a Debian 5 x86_64 HVM (that previously had been moving
  network packets under Xen for PCIe passthrough). I see the MSI-X
  interrupt remapping working on the KVM host for the passed
  -pcidevice, and the MMIO mappings from the qemu build that I also
  saw while using Xen/qemu-dm built with PCI passthrough are there as
  well..

 Hi Nicholas

  But while the KVM guest is booting, I see the following
  exception(s) from qemu-x86_64-system for one of the VFs for a
  multi-function PCIe device:
 
  BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)

 This one is mostly harmless.
   
Ok, good to know..  :-)
   
  I try with one of the on-board e1000e ports (02:00.0) and I see the
  same exception along with some MSI-X exceptions from
  qemu-x86_64-system in KVM guest.. However, I am still able to see
  the e1000e and the other vxge multi-function device with lspci, but
  I am unable to dhcp or ping with the e1000e and VF from
  multi-function device fails to register the MSI-X interrupt in the
  guest..

 Did you see the interrupt in the guest and host side?
   
Ok, I am restarting the e1000e test with a fresh Fedora 11 install and
KVM host kernel 2.6.29.1-111.fc11.x86_64.   After unbinding and
attaching the e1000e single-function device at 02:00.0 to pci-stub
with:
   
   echo 8086 10d3  /sys/bus/pci/drivers/pci-stub/new_id
   echo :02:00.0  /sys/bus/pci/devices/:02:00.0/driver/unbind
   echo :02:00.0  /sys/bus/pci/drivers/pci-stub/bind
   
I see the following the KVM host kernel ring buffer:
   
   e1000e :02:00.0: PCI INT A disabled
   pci-stub :02:00.0: PCI INT A - GSI 17 (level, low) - IRQ 17
   pci-stub :02:00.0: irq 58 for MSI/MSI-X
   
  I think you can try on-
 board e1000e for MSI-X first. And please ensure correlated driver
 have been loaded correctly.
   
nod..
   
  And what do you mean by some MSI-X exceptions? Better with
 the log.
   
Ok, with the Fedora 11 installed qemu-kemu, I see the expected
kvm_destroy_phys_mem() statements:
   

Re: [PATCH 1/8] virtio: add request_vqs/free_vqs operations

2009-05-04 Thread Rusty Russell
On Mon, 27 Apr 2009 10:01:53 pm Michael S. Tsirkin wrote:
 This adds 2 new optional virtio operations: request_vqs/free_vqs. They will be
 used for MSI support, because MSI needs to know the total number of vectors
 upfront.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

Hi Michael,

  Thanks for this work!  But this interface is horrible.  Either probe for the
number of vqs in virtio_pci, or change find_vq to

int (*find_vqs)(struct virtio_device *, unsigned max,
 struct virtqueue *vqs[]);

Thanks,
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/8] virtio: add request_vqs/free_vqs operations

2009-05-04 Thread Michael S. Tsirkin
On Mon, May 04, 2009 at 09:02:20PM +0930, Rusty Russell wrote:
 On Mon, 27 Apr 2009 10:01:53 pm Michael S. Tsirkin wrote:
  This adds 2 new optional virtio operations: request_vqs/free_vqs. They will 
  be
  used for MSI support, because MSI needs to know the total number of vectors
  upfront.
  
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
 Hi Michael,

Hi Rusty,

   Thanks for this work!  But this interface is horrible.  Either probe for the
 number of vqs in virtio_pci, or change find_vq to
 
   int (*find_vqs)(struct virtio_device *, unsigned max,
struct virtqueue *vqs[]);

I'm happier with the later option: it's easy for a host to expose
support for a very large number of vqs, and I don't want them to
waste resources if guest does not use them.

Thanks for the feedback!

 Thanks,
 Rusty.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Arnd Bergmann
On Sunday 03 May 2009, Anthony Liguori wrote:
 A classic example is linux/compiler.h and the broken usbdevice_fs.h
 header that depends on it.  There are still distributions today that
 QEMU doesn't compile on because of this.

Can you clarify this? I can't find any version of usbdevice_fs.h that
ever included linux/compiler.h (make headers_check would warn about that),
and the only construct used in there that comes from compiler.h is
the __user annotation, which gets stripped in 'make headers_install',
and has been since 2006.

  +# Linux kernel headers CFLAGS
 +if test -z $kerneldir ; then
 +linux_cflags=-I$source_path/linux
 +else
 +linux_cflags=-I$kerneldir/include
 +if test \( $cpu = i386 -o $cpu = x86_64 \) \
 +   -a -d $kerneldir/arch/x86/include ; then
 +   linux_cflags=$linux_cflags -I$kerneldir/arch/x86/include
 +elif test $cpu = ppc -a -d $kerneldir/arch/powerpc/include ;
 then +   linux_cflags=$linux_cflags -I$kerneldir/arch/powerpc/include
 +elif test -d $kerneldir/arch/$cpu/include ; then
 +   linux_cflags=$linux_cflags -I$kerneldir/arch/$cpu/include
 +fi
 +fi

arch/*/include is not the right place to look for user headers.
I think it would be better to assume that the user only points to
valid exported headers, so look for linux/version.h to check that
the files have been configured and look for the absense of
kvm_host.h to make sure that the user did not point to plain
kernel sources.

The exported headers already handle the asm/ links correctly, so
I think you never need to do anything architecture specific
like your fixup.sed.

 +CORE_HDRS=linux/types.h linux/posix_types.h linux/stddef.h linux/compiler.h
 +CORE_HDRS+=linux/byteorder/little_endian.h linux/byteorder/big_endian.h
 +CORE_HDRS+=linux/swab.h linux/ioctl.h
 +
 +CORE_HDRS+=asm-generic/int-ll64.h asm-generic/int-l64.h asm-generic/ioctl.h
 +
 +CORE_HDRS+=asm-x86/types.h asm-x86/posix_types.h
 +CORE_HDRS+=asm-x86/posix_types_32.h asm-x86/posix_types_64.h
 +CORE_HDRS+=asm-x86/byteorder.h asm-x86/swab.h asm-x86/ioctl.h
 +
 +CORE_HDRS+=asm-powerpc/types.h asm-powerpc/posix_types.h
 +CORE_HDRS+=asm-powerpc/byteorder.h asm-powerpc/swab.h asm-powerpc/ioctl.h
 +
 +CORE_HDRS+=asm-sparc/types.h asm-sparc/posix_types.h
 +CORE_HDRS+=asm-sparc/byteorder.h asm-sparc/swab.h asm-sparc/ioctl.h
 +CORE_HDRS+=asm-sparc/asi.h 
 +
 +CORE_HDRS+=asm-arm/types.h asm-arm/posix_types.h
 +CORE_HDRS+=asm-arm/byteorder.h asm-arm/swab.h asm-arm/ioctl.h
 +
 +CORE_HDRS+=asm-parisc/types.h asm-parisc/posix_types.h
 +CORE_HDRS+=asm-parisc/byteorder.h asm-parisc/swab.h asm-parisc/ioctl.h

I don't see the need to copy all the core headers. These should have
been working for ages, and hardly ever see changes that are relevant
to kvm. 

The exceptions are linux/stddef.h and linux/compiler.h, which are
not exported and should never be used outside of the kernel.

 +# Kernel Virtual Machine interface
 +KVM_HDRS=linux/kvm.h linux/kvm_para.h
 +KVM_HDRS+=asm-x86/kvm.h asm-x86/kvm_para.h
 +KVM_HDRS+=asm-powerpc/kvm.h asm-powerpc/kvm_para.h
 +
 +# VirtIO paravirtual IO framework
 +VIRTIO_HDRS=linux/virtio_config.h linux/virtio_net.h linux/virtio_blk.h
 +VIRTIO_HDRS+=linux/virtio_console.h linux/virtio_balloon.h

These should be copied into the qemu source tree, but not at configure
time. They should just reflect the latest upstream version. Qemu already
needs to handle older kernel versions at run time, and by having the
very latest version in the source tree, you can make sure that qemu
will run on any kernel version.

For asm/kvm.h and asm/kvm-para.h, you can have hard-coded files
multiplexing between the architectures, as you would otherwise
generate from your fixup.sed.

 +# tun/tap interfaces
 +TUN_HDRS=linux/if_tun.h linux/if_ether.h
 +
 +# timers
 +TIMER_HDRS=linux/rtc.h linux/hpet.h
 +
 +# USB pass through
 +USB_HDRS=linux/usbdevice_fs.h linux/magic.h
 +
 +# IDE/FD
 +DISK_HDRS=linux/cdrom.h linux/fd.h
 +
 +# Parallel port
 +PPORT_HDRS=linux/ppdev.h linux/parport.h

For all of these, I would again fall back on the distro-provided
headers. You might not get the latest versions, but at least you
can assume that any kernel that the distro provides will also
at least support the ABI from these headers.

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: x86: Drop request_nmi from stats

2009-05-04 Thread Avi Kivity

Jan Kiszka wrote:

The stats entry request_nmi is no longer used as the related user space
interface was dropped. So clean it up.
  


Applied, thanks.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Arnd Bergmann
On Monday 04 May 2009, Mark McLoughlin wrote:
 Right, but if you e.g. try to build a newer qemu-kvm on F10, you
 currently need newer kvm kernel headers - IMHO, we should use #ifdef to
 allow newer qemu-kvm build with older kvm headers.

I think the kvm and virtio headers should just be shipped with
qemu-kvm in their latest versions, rather than relying on the
ones from the kernel. Everything else should come from the
distro-supplied glibc kernel headers.

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Stefan Weil wrote:

Anthony Liguori schrieb:
  


For Debian systems, those headers are installed by package linux-libc-dev.
There are also packages for cross compilation in emdebian
(linux-libc-dev-mips-cross, linux-libc-dev-powerpc-cross, ...).

Yes, those headers did not always match the features of the current kernel,
so --enable-kvm did not work. This is fixed now - there is a linux-libc-dev
2.6.29-3 which is up-to-date.

So, at the moment I see no need to fill the QEMU source tree with
linux header files.
  


We can not just rely on everyone who uses QEMU to use the latest version 
of Debian...


The fact is, linux-libc-dev is *not* meant for applications to use as 
the official kernel ABI.  We shouldn't depend on it.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Arnd Bergmann wrote:

On Monday 04 May 2009, Mark McLoughlin wrote:
  

Right, but if you e.g. try to build a newer qemu-kvm on F10, you
currently need newer kvm kernel headers - IMHO, we should use #ifdef to
allow newer qemu-kvm build with older kvm headers.



I think the kvm and virtio headers should just be shipped with
qemu-kvm in their latest versions, rather than relying on the
ones from the kernel. Everything else should come from the
distro-supplied glibc kernel headers.
  


Just to reiterate, because I should have mentioned it in the original 
note, we need virtio to be buildable on non-Linux systems so those 
headers must not depend on having distro glibc headers.


Regards,

Anthony Liguori


Arnd 
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Avi Kivity wrote:
At least on Fedora, kernel-headers is.  It is installed in 
/usr/include/linux and is synced (sorta) to the installed kernel.


It's not the case with Ubuntu.


Carrying a subset of kernel headers is a bit too much, IMO.


Carrying virtio, kvm, and if_tun would be sufficient IMO.  I think 
depending on /usr/include/linux is okay for kvm and if_tun, but virtio 
needs to be buildable without /usr/include/linux.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Anthony Liguori wrote:



I don't see the need to copy all the core headers. These should have
been working for ages, and hardly ever see changes that are relevant
to kvm.   


If we want to use virtio_*.h instead of duplicating the copies as we 
are now, then we need all of the core headers too or else it won't be 
able to compile on systems that do not have Linux libc headers (like 
win32).


qemu provides virtio, it doesn't consume it.  We can merge the virtio 
headers and remove the linuxisms.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Christoph Hellwig
On Mon, May 04, 2009 at 08:13:16AM -0500, Anthony Liguori wrote:
 The fact is, linux-libc-dev is *not* meant for applications to use as 
 the official kernel ABI.  We shouldn't depend on it.

Umm, it is.  That's exactly the reason what it is for.  Note that the
name of the package varies depending on the distro, but those headers
in /usr/include/linux/ are the ABI for features not shimed by libc.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Christoph Hellwig
On Mon, May 04, 2009 at 04:38:12PM +0300, Avi Kivity wrote:
 qemu provides virtio, it doesn't consume it.  We can merge the virtio 
 headers and remove the linuxisms.

Yeah.  virtio is a one the (virtual) wire protocol, not a kernel ABI in
the tradition sense.  qemu should have it's own defintion.  For kernel
feature qemu uses (mostly kvm, but also the scsi generic ioctl for
example) it should just use the installed kernel headers, and not build
the feature if they are too old.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM-AUTOTEST] [PATCH] support for remote migration

2009-05-04 Thread yogi
Hello everyone,

I like to resubmit patch to add support for remote migration in
kvm-autotest, based on Michael Goldish's suggestions.

To use this patch the following seven parameters should be added to the
existing migration test

remote_dst = yes
hostip = localhost ip or name
remoteip = remote host ip or name
remuser = root
rempassword = password
qemu_path_dst = qemu binary path on remote host
image_dir_dst = images dir on remote host


For example:
- migrate:  install setup
type = migration
vms +=  dst
migration_test_command = help
kill_vm_on_error = yes
hostip = 192.168.1.2
remoteip = 192.168.1.3
remuser = root
rempassword = 123456
remote_dst = yes
qemu_path_dst = /tmp/kvm_autotest_root/qemu
image_dir_dst = /tmp/kvm_autotest_root/images

variants:

The parameter remote_dst = yes, indicates that the VM dst should be
started on the remote host.If the parameter qemu_path_dst and
image_dir_dst, it is assumed tht the qemu binary images path is same on
both local and remote host.

 Regarding remote_login:
 
 - Why should remote_login return a session when it gets an unexpected login 
 prompt? If you get a login prompt doesn't that mean something went wrong? The 
 username is always provided in the ssh command line, so we shouldn't expect 
 to receive a login prompt -- or am I missing something? I am pretty confident 
 this is true in the general case, but maybe it's different when ssh keys have 
 been exchanged between the hosts.
 
 - I think it makes little sense to return a session object when you see a 
 login prompt because that session will be useless. You can't send any 
 commands to it because you don't have a shell prompt yet. Any command you 
 send will be interpreted as a username, and will most likely be the wrong 
 username.
 
 - When a guest is in the process of booting and we try to log into it, 
 remote_login sometimes fails because it gets an unexpected login prompt. This 
 is good, as far as I understand, because it means the guest isn't ready yet 
 (still booting). The next time remote_login attempts to log in, it usually 
 succeeds. If we consider an unexpected login prompt OK, we pass login 
 attempts that actually should have failed (and the resulting sessions will be 
 useless anyway).
 
I have removed this from the current patch, so now the remote_login
function is unchanged.I will recheck my machine configuration and submit
it as new patch if necessary. I had exchanged ssh keys between the
hosts(both local and remote hosts), but the login sessions seem to
terminates with Got unexpected login prompt.  
 Other things:
 
 - If I understand correctly, remote migration will only work if the remote 
 qemu binary path is exactly the same as the local one. Maybe we should 
 receive a qemu path parameter that will allow for some flexibility.
update the patch with this option by providing 2 new parameters
qemu_path_dst and image_dir_dst

 - In VM.make_qemu_command(), in the code that handles redirections, you add 
 'self.ssh_port = host_port'. I don't think this is correct because there can 
 be multiple redirections, unrelated to SSH, so you certainly shouldn't assume 
 that the only redirection is an SSH one. When you want the host port 
 redirected to the guest's SSH port, you should use 
 self.get_port(int(self.params.get(ssh_port))). This will also work if for 
 some reason 'ssh_port' changes while the guest is alive.

yes,should not have done that. So also  removed it from this patch
 - It seems that the purpose of 'remote = dst' is to indicate to 'dst' that it 
 should be started as a remote VM. The preferred way to do this is to pass 
 something like 'remote_dst = yes' and then in VM.create() you can test for 
 params.get(remote) == yes. See Addressing objects in the wiki 
 (http://www.linux-kvm.org/page/KVM-Autotest/Parameters#Addressing_objects_.28VMs.2C_images.2C_NICs_etc.29).
 In general, any parameter you want to pass to a specific VM, you pass using 
 param_vmname = value, e.g. 'mem_dst = 128', and then in VM.create() the 
 parameter is accessible without the VM name extension (e.g. 
 self.params.get(mem) will equal 128).
updated the patch with the above suggestion

Thank you,for your suggestion Michael, they were very helpful(srry i
took so long to reply,was traveling,so was not able to reply and
resubmit the patch).

Thx
yogi

 kvm_tests.py |2 +-
 kvm_vm.py|   54 +++---
 2 files changed, 44 insertions(+), 12 deletions(-)


Signed-off-by: Yogananth Subramanian anant...@in.ibm.com
---
diff -aurp kvm-autotest.orgi/client/tests/kvm_runtest_2//kvm_tests.py kvm-autotest/client/tests/kvm_runtest_2//kvm_tests.py
--- kvm-autotest.orgi/client/tests/kvm_runtest_2//kvm_tests.py	2009-04-29 18:33:10.0 +
+++ kvm-autotest/client/tests/kvm_runtest_2//kvm_tests.py	

Re: [PATCH 1/2] don't start cpu main loop while there is still init work to do.

2009-05-04 Thread Glauber Costa
On Mon, May 04, 2009 at 11:30:58AM +0300, Avi Kivity wrote:
 Glauber Costa wrote:
 As soon as we call kvm_init_vcpu(), we start the vcpu thread.
 However, there is still things that has to be done, as soon
 as the new CPUState is created. Examples include initializing the
 apic, halting the cpu, etc.

 Without this patch, it is possible that the cpu may want to start
 using those things, before initializing them, leading to segfaults.
 We introduce another state variable, initialized, meaning that
 the cpu is already created, but not totally initialized,
 to serialize it.

 Before this patch:
 (qemu) cpu_set X online = segfaults ~ 80 % of the time
 After this patch:
 (qemu) cpu_set X online = works.

   

 Is it possible to move all those things to the vcpu thread, so it  
 serializes naturally?
Everything is possible. moving everything to inside cpu_x86_init would be best,
IMHO. We have to remember qemu will have the same problem when kvm gets in 
there.

However, we might as well remember that cpu_x86_init creates a x86 cpu. It does 
not
have to be a pc cpu. So initializing apic and the like inside cpu_x86_init 
could break
this separability. Of course, right now we don't do anything other than pc, so 
we might
not care. But theorectically...


 I'd like to avoid vcpu ioctls from more than one thread, in case we ever  
 move to a syscall implementation.

Although I don't see exactly what's your point in here.
We're just adding a serialization points through pthreads function, not doing 
any ioctl from
the outside.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Lennart Sorensen
On Mon, May 04, 2009 at 08:13:16AM -0500, Anthony Liguori wrote:
 We can not just rely on everyone who uses QEMU to use the latest version  
 of Debian...

 The fact is, linux-libc-dev is *not* meant for applications to use as  
 the official kernel ABI.  We shouldn't depend on it.

Actually I think that is exactly what it is intended to be and exactly
what you should do.

-- 
Len Sorensen
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] don't start cpu main loop while there is still init work to do.

2009-05-04 Thread Glauber Costa
On Mon, May 04, 2009 at 05:33:26PM +0300, Avi Kivity wrote:
 Glauber Costa wrote:
 I'd like to avoid vcpu ioctls from more than one thread, in case we 
 ever  move to a syscall implementation.
 

 Although I don't see exactly what's your point in here.
 We're just adding a serialization points through pthreads function, not 
 doing any ioctl from
 the outside.
   

 Doesn't the lapic creation call KVM_CREATE_LAPIC?
Oh yeah, that.

Maybe we could then move kvm_vcpu_init to the end of pc_new_cpu.

This way we don't break the separability of pc and x86 concepts. We would then 
issue the lapic creation
ioctl right after the vcpu is created.

How would you feel about it?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Paravirtualisation or not?

2009-05-04 Thread howard chen
Hey,

I am comparing Xen and KVM to see which one is suitable for me usage.


From the FAQ: 
http://www.linux-kvm.org/page/FAQ#What_is_the_difference_between_kvm_and_Xen.3F

It said:

 kvm does not support paravirtualization for cpu but may support 
 paravirtualization for device drivers to improve I/O performances.


Do does it mean using paravirtualization is good? So Xen is faster if
we can are running modified OS which support paravirtualization?


In fact, we have tried Xen, running CentOS 5.0 on latest Intel Quad
core CPU. Performance is not bad, but already have interest in try
other solutions.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Paravirtualisation or not?

2009-05-04 Thread Javier Guerra
On Mon, May 4, 2009 at 9:49 AM, howard chen howac...@gmail.com wrote:
 On Mon, May 4, 2009 at 10:44 PM, Pasi Kärkkäinen pa...@iki.fi wrote:
 On Mon, May 04, 2009 at 10:40:00PM +0800, howard chen wrote:
 Yes, paravirtualization is good. If running KVM, use paravirtualized network
 and disk/block drivers for better performance.

 So does it mean generally Xen is more optimized than KVM for speed?


no

Xen started as paravirtualization-only, and later got full
virtualization capabilities, mainly to run windows guests.

KVM is full-virtualization-only.  if things stopped there, then yes,
Xen would be much faster than kvm.

but on almost all cases, biggest bottleneck (by far) isn't the CPU,
it's I/O. adding paravirtualization drivers to a fully virtualized
guest brings it roughly to the same speed level as a PV guest.  that
makes kvm comparable to Xen in most workloads.

there some real advantages of kvm:

- less context switches needed to make a block or packet go from guest
to hardware and viceversa

- paravirtualized drivers widely available both for Linux and Windows
(Xen's drivers on windows can be hard and/or expensive to get)

- tight work with the qemu/kernel guys make big advances in througput.
 i recall that virtio-net can go near 2Gbit with little tuning, almost
twice as the best Xen numbers.

of course, there are also several hard, real advantages of Xen:

- the hypervisor's scheduler is more appropriate for dataserver
managers that sell VMs

- wider recognition from supporting companies (changing quickly)

several more for each side that i don't remember right now, i'm sure

-- 
Javier
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Anthony Liguori wrote:

Thinking again about it, this is not really necessary.

In general a distro provides kernel headers matched to the running 
kernel.  For example F10 provides 
kernel-headers-2.6.27.21-170.2.56.fc10.x86_64 to go along with 
kernel-2.6.27.21-170.2.56.fc10.x86_64.  So a user running a distro 
kernel (the majority, given that most people don't inflict pain upon 
themselves unnecessarily) will have exactly the features exported by 
the kernel.


kernel-headers is not usually installed by default.  




It is:


$ rpm -q --whatrequires kernel-headers
glibc-headers-2.9-3.x86_64


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Paravirtualisation or not?

2009-05-04 Thread howard chen
On Mon, May 4, 2009 at 10:44 PM, Pasi Kärkkäinen pa...@iki.fi wrote:
 On Mon, May 04, 2009 at 10:40:00PM +0800, howard chen wrote:
 Yes, paravirtualization is good. If running KVM, use paravirtualized network
 and disk/block drivers for better performance.

So does it mean generally Xen is more optimized than KVM for speed?

Thanks.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Edgar E. Iglesias
On Mon, May 04, 2009 at 08:15:58AM -0500, Anthony Liguori wrote:
 Edgar E. Iglesias wrote:
 I don't feel very strongly about it but my gut feeling tells me we
 shouldn't be doing this.
   

 We have to.  It's not just KVM, it's virtio, tun/tap, and as we add more 
 things to the Linux kernel to support QEMU, it'll just grow larger.

I'm not sure we have too. QEMU users that build from source can IMO
be expected to update kernel headers if required and if our configure
is explicit about it. You didn't get feature x because you have an
old kernel or old kernel headers kind of warning.

Another alternative is to provide an option so users can specify
where to find alternative kernel-headers. IIRC other I've seen
this approach in several other projects.

I agree with you that the compat ifdefs are annoying though...

 This is how applications are supposed to use kernel headers.  It's 
 unpleasant, but that's just the way Linux is today.

Do you mean that all apps using linux header files should bring those
in?

Cheers
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: vnc protocol broken

2009-05-04 Thread Dietmar Maurer
 I think this is a known problem in qemu mentioned here on qemu-devel
 
 http://article.gmane.org/gmane.comp.emulators.qemu/42321

Ok, it is fixed in the latest qemu git code.

- Dietmar

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Paravirtualisation or not?

2009-05-04 Thread Glauber Costa
On Mon, May 4, 2009 at 11:49 AM, howard chen howac...@gmail.com wrote:
 On Mon, May 4, 2009 at 10:44 PM, Pasi Kärkkäinen pa...@iki.fi wrote:
 On Mon, May 04, 2009 at 10:40:00PM +0800, howard chen wrote:
 Yes, paravirtualization is good. If running KVM, use paravirtualized network
 and disk/block drivers for better performance.

 So does it mean generally Xen is more optimized than KVM for speed?

No, no way.

Xen pv paravirtualizes everything. Most of those things are pv for the
need, not for any kind
of perfomance tweak. It runs on machines that does not provide
hardware virtualization, so
paravirtualization is your only option.

For solutions that uses hardware virtualization (such as KVM and Xen
HV), virtualization is done
by the hardware, with the help of the VMM. For things in which there
are a performance/correctness
impact of using PV, like the clock, we do it.

If you are using a paravirtual clock, and specialized block/net
drivers, you are already taking advantage
of most of the speed benefits a PV solution can provide you with.


-- 
Glauber  Costa.
Free as in Freedom
http://glommer.net

The less confident you are, the more serious you have to act.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Arnd Bergmann wrote:

On Sunday 03 May 2009, Anthony Liguori wrote:
  

A classic example is linux/compiler.h and the broken usbdevice_fs.h
header that depends on it.  There are still distributions today that
QEMU doesn't compile on because of this.



Can you clarify this? I can't find any version of usbdevice_fs.h that
ever included linux/compiler.h (make headers_check would warn about that),
and the only construct used in there that comes from compiler.h is
the __user annotation, which gets stripped in 'make headers_install',
and has been since 2006.
  


Distros that were released before 2006 certainly had this problem.  The 
issue is that usbdevice_fs.h depends on __user.



+CORE_HDRS=linux/types.h linux/posix_types.h linux/stddef.h linux/compiler.h
+CORE_HDRS+=linux/byteorder/little_endian.h linux/byteorder/big_endian.h
+CORE_HDRS+=linux/swab.h linux/ioctl.h
+
+CORE_HDRS+=asm-generic/int-ll64.h asm-generic/int-l64.h asm-generic/ioctl.h
+
+CORE_HDRS+=asm-x86/types.h asm-x86/posix_types.h
+CORE_HDRS+=asm-x86/posix_types_32.h asm-x86/posix_types_64.h
+CORE_HDRS+=asm-x86/byteorder.h asm-x86/swab.h asm-x86/ioctl.h
+
+CORE_HDRS+=asm-powerpc/types.h asm-powerpc/posix_types.h
+CORE_HDRS+=asm-powerpc/byteorder.h asm-powerpc/swab.h asm-powerpc/ioctl.h
+
+CORE_HDRS+=asm-sparc/types.h asm-sparc/posix_types.h
+CORE_HDRS+=asm-sparc/byteorder.h asm-sparc/swab.h asm-sparc/ioctl.h
+CORE_HDRS+=asm-sparc/asi.h 
+

+CORE_HDRS+=asm-arm/types.h asm-arm/posix_types.h
+CORE_HDRS+=asm-arm/byteorder.h asm-arm/swab.h asm-arm/ioctl.h
+
+CORE_HDRS+=asm-parisc/types.h asm-parisc/posix_types.h
+CORE_HDRS+=asm-parisc/byteorder.h asm-parisc/swab.h asm-parisc/ioctl.h



I don't see the need to copy all the core headers. These should have
been working for ages, and hardly ever see changes that are relevant
to kvm. 
  


If we want to use virtio_*.h instead of duplicating the copies as we are 
now, then we need all of the core headers too or else it won't be able 
to compile on systems that do not have Linux libc headers (like win32).



+# Kernel Virtual Machine interface
+KVM_HDRS=linux/kvm.h linux/kvm_para.h
+KVM_HDRS+=asm-x86/kvm.h asm-x86/kvm_para.h
+KVM_HDRS+=asm-powerpc/kvm.h asm-powerpc/kvm_para.h
+
+# VirtIO paravirtual IO framework
+VIRTIO_HDRS=linux/virtio_config.h linux/virtio_net.h linux/virtio_blk.h
+VIRTIO_HDRS+=linux/virtio_console.h linux/virtio_balloon.h



These should be copied into the qemu source tree, but not at configure
time. They should just reflect the latest upstream version. Qemu already
needs to handle older kernel versions at run time, and by having the
very latest version in the source tree, you can make sure that qemu
will run on any kernel version.
  


Yes, if it isn't clear, this Makefile is meant to be used by the 
maintainers to bring the headers into git.  I didn't post the headers 
because it would have made the note annoyingly long.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] don't start cpu main loop while there is still init work to do.

2009-05-04 Thread Avi Kivity

Glauber Costa wrote:

On Mon, May 04, 2009 at 05:33:26PM +0300, Avi Kivity wrote:
  

Glauber Costa wrote:

I'd like to avoid vcpu ioctls from more than one thread, in case we 
ever  move to a syscall implementation.



Although I don't see exactly what's your point in here.
We're just adding a serialization points through pthreads function, not doing 
any ioctl from
the outside.
  
  

Doesn't the lapic creation call KVM_CREATE_LAPIC?


Oh yeah, that.

Maybe we could then move kvm_vcpu_init to the end of pc_new_cpu.

This way we don't break the separability of pc and x86 concepts. We would then 
issue the lapic creation
ioctl right after the vcpu is created.

How would you feel about it


Like I said, I'd like to see the lapic creation come from the vcpu thread.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Mark McLoughlin wrote:

On Mon, 2009-05-04 at 12:08 +0300, Avi Kivity wrote:
  
In general a distro provides kernel headers matched to the running 
kernel.  For example F10 provides 
kernel-headers-2.6.27.21-170.2.56.fc10.x86_64 to go along with 
kernel-2.6.27.21-170.2.56.fc10.x86_64.  So a user running a distro 
kernel (the majority, given that most people don't inflict pain upon 
themselves unnecessarily) will have exactly the features exported by the 
kernel.



Right, but if you e.g. try to build a newer qemu-kvm on F10, you
currently need newer kvm kernel headers - IMHO, we should use #ifdef to
allow newer qemu-kvm build with older kvm headers.

  


qemu build against new headers should work fine on older hosts -- we 
discover features at runtime.  But I agree it's nice to be able to build 
against older headers.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Custom BIOS supported size

2009-05-04 Thread Cristi Magherusan

On Mon, 2009-05-04 at 16:48 +0300, Avi Kivity wrote:
 Cristi Magherusan wrote:
  Hello,
 
  Which is the maximum size supported for a custom BIOS image(eg.
  coreboot-based)? I tried some 256K coreboot BIOS images and seemed to
  work fine, but it blowed up with a 3MB image (which by the way works on
  qemu just fine). 

 
 256K is the maximum with kvm.  It can easily be increased, but we need a 
 small kernel change on Intel to store the real-mode TSS.
 
 
Hi Avi,

Thanks for your answer. What would be the upper bound up to which we can
extend it? May I somehow help in getting this done?

Best regards,
Cristi

-- 
Ing. Cristi Măgherușan, System/Network Engineer
Technical University of Cluj-Napoca, Romania
http://cc.utcluj.ro  +40264 401247

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Edgar E. Iglesias wrote:

I don't feel very strongly about it but my gut feeling tells me we
shouldn't be doing this.
  


We have to.  It's not just KVM, it's virtio, tun/tap, and as we add more 
things to the Linux kernel to support QEMU, it'll just grow larger.


This is how applications are supposed to use kernel headers.  It's 
unpleasant, but that's just the way Linux is today.


Regards,

Anthony Liguori


Cheers
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Custom BIOS supported size

2009-05-04 Thread Avi Kivity

Cristi Magherusan wrote:

Hello,

Which is the maximum size supported for a custom BIOS image(eg.
coreboot-based)? I tried some 256K coreboot BIOS images and seemed to
work fine, but it blowed up with a 3MB image (which by the way works on
qemu just fine). 
  


256K is the maximum with kvm.  It can easily be increased, but we need a 
small kernel change on Intel to store the real-mode TSS.





--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] don't start cpu main loop while there is still init work to do.

2009-05-04 Thread Avi Kivity

Glauber Costa wrote:
I'd like to avoid vcpu ioctls from more than one thread, in case we ever  
move to a syscall implementation.



Although I don't see exactly what's your point in here.
We're just adding a serialization points through pthreads function, not doing 
any ioctl from
the outside.
  


Doesn't the lapic creation call KVM_CREATE_LAPIC?

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Avi Kivity

Anthony Liguori wrote:

Stefan Weil wrote:

Anthony Liguori schrieb:
 
For Debian systems, those headers are installed by package 
linux-libc-dev.

There are also packages for cross compilation in emdebian
(linux-libc-dev-mips-cross, linux-libc-dev-powerpc-cross, ...).

Yes, those headers did not always match the features of the current 
kernel,
so --enable-kvm did not work. This is fixed now - there is a 
linux-libc-dev

2.6.29-3 which is up-to-date.

So, at the moment I see no need to fill the QEMU source tree with
linux header files.
  


We can not just rely on everyone who uses QEMU to use the latest 
version of Debian...


The fact is, linux-libc-dev is *not* meant for applications to use as 
the official kernel ABI.  We shouldn't depend on it.


At least on Fedora, kernel-headers is.  It is installed in 
/usr/include/linux and is synced (sorta) to the installed kernel.


Carrying a subset of kernel headers is a bit too much, IMO.

kvm is a special case since it is available externally.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Avi Kivity wrote:

Anthony Liguori wrote:

Comments?



Thinking again about it, this is not really necessary.

In general a distro provides kernel headers matched to the running 
kernel.  For example F10 provides 
kernel-headers-2.6.27.21-170.2.56.fc10.x86_64 to go along with 
kernel-2.6.27.21-170.2.56.fc10.x86_64.  So a user running a distro 
kernel (the majority, given that most people don't inflict pain upon 
themselves unnecessarily) will have exactly the features exported by 
the kernel.


kernel-headers is not usually installed by default.  Also, I'd rather 
not deal with #ifdef code as we introduce new features like TUN_VNET_HDR.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Paravirtualisation or not?

2009-05-04 Thread Pasi Kärkkäinen
On Mon, May 04, 2009 at 10:40:00PM +0800, howard chen wrote:
 Hey,
 
 I am comparing Xen and KVM to see which one is suitable for me usage.
 
 
 From the FAQ: 
 http://www.linux-kvm.org/page/FAQ#What_is_the_difference_between_kvm_and_Xen.3F
 
 It said:
 
  kvm does not support paravirtualization for cpu but may support 
  paravirtualization for device drivers to improve I/O performances.
 
 
 Do does it mean using paravirtualization is good? So Xen is faster if
 we can are running modified OS which support paravirtualization?


Yes, paravirtualization is good. If running KVM, use paravirtualized network
and disk/block drivers for better performance.
 
Xen paravirtual guests (domUs) use paravirtualized drivers out of the box.

 
 In fact, we have tried Xen, running CentOS 5.0 on latest Intel Quad
 core CPU. Performance is not bad, but already have interest in try
 other solutions.


You should be running CentOS 5.3, it has a lot of updates after 5.0.

-- Pasi
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [RFC] Bring in all the Linux headers we depend on in QEMU

2009-05-04 Thread Anthony Liguori

Avi Kivity wrote:

random kernel tree

Developers, in particular, like to point things at their random 
kernel trees.  In general though, relying on a full kernel source 
tree being available isn't a good idea.  Kernel headers change 
dramatically across versions too so it's very likely that we would 
need to have a lot of #ifdefs dependent on kernel versions, or some 
of the uglier work arounds we have in usb-linux.c.


I think the best way to avoid #ifdefs and dependencies on 
broken/incomplete glibc headers is to include all of the Linux 
headers we need within QEMU.  The attached patch does just this.


I think there's room for discussion about whether we really want to 
do this.  We could potentially depend on some more common glibc 
headers (like asm/types.h) while bringing in less reliable headers 
(if_tun.h/virtio*).  Including them all seems like the most robust 
solution to me though.


Comments? 


I think we need to use the output of 'make headers-install', which 
removes things like __user and CONFIG_*.


I was thinking about that as a possibility too.  We still need the same 
basic infrastructure though.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Paravirtualisation or not?

2009-05-04 Thread Pantelis Koukousoulas
 - paravirtualized drivers widely available both for Linux and Windows
 (Xen's drivers on windows can be hard and/or expensive to get)

Well, Xen has GPL PV drivers for windows (at least for networking)
which KVM doesn't have. There is a promise
but no date attached to it.

If a set of drivers essentially implementing the virtio framework
(virtio_pci, virtio_ring, virtio queues) were available for
windows, that would be *really* neat.

But that is somewhat off topic :)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-04 Thread Simon Bienlein

Hello Jan,

thank you very much for processing my request.
I [1]patched the KVM sources from Lenny and created a new Debian packet. 
When I boot the VM from the Lenny CD, there is no audible signal tone.
It does not make a difference whether I start the KVM with or without 
the option -no-kvm-pit. What do I have to do in order to test the new 
function?


Thanks in advance for the further help.

Simon

1. http://patchwork.kernel.org/patch/19687/
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC] x86 instruction decoder with userspace test code

2009-05-04 Thread Masami Hiramatsu
Hi,

I've rewritten the x86(-64) instruction decoder with instruction
attribute table and a generator according to Peter's comments.

Currently, an opcode map file (x86-opcode-map.txt) is based on opcode
maps in Intel(R) Software Developers Manual Vol.2: Appendix.A, and it
contains below two types of opcode tables.

1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are
written as below;
---
Table: table-name
Referrer: escaped-name
opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 
2nd-mnemonic ...]
 (or)
opcode: escape # escaped-name
EndTable
---

Group opcodes, which has 8 elements, are written as below;
---
GrpTable: GrpXXX
reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic 
...]
EndTable
---

These opcode maps do NOT include SSE and most of FP opcodes,
because those opcodes are not used in the kernel.

The generator(gen-insn-attr-x86.awk) translates the opcode maps
into a file which defines instruction attribute tables. The instruction
attributes are defined in inat.h and inat.c.


I attached insn decoder with user space test, which was originally
written by Jim. You can test the decoder can decode instruction length,
as following:

 Pull all the attached files into a directory and have a go -- e.g.,
 $ make
 $ objdump -d vmlinux | awk -f distill.awk | ./test_get_len [x86_64]

Known issues:
- 0x9b is an instruction (fwait), but the objdump treats it as a
  prefix.  For example 9b df ... can be disassembled as
fstsw ...   // wait, then store status word
  or
fwait   // wait
fnstsw ...  // store status word without waiting
  and this instruction decoder decode 0x9b as an instruction.

  Anyway, according to Jim's investigation, the single-step stopped
  after the fwait, so it's no problem.

- Illegal instruction sequences(in some data/note sections), such
  as an x86_64 instruction that starts with 0x40, or a misplaced
  0x65 prefix. We can filtered out those instructions which start
  with rex or includes (bad).


I'll put x86-opcode-map.txt under arch/x86/lib, gen-insn-attr-x86.awk
under arch/x86/scripts/ and generate attribute tables at build time.

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhira...@redhat.com

test_get_len: test_get_len.c insn.c inat.c inat.h insn.h insn_x86_user.h 
inat-tables.c
$(CC) -Wall -g test_get_len.c insn.c inat.c -o test_get_len

inat-tables.c: gen-insn-attr-x86.awk x86-opcode-map.txt
awk -f gen-insn-attr-x86.awk x86-opcode-map.txt  $@

clean:
rm -f *.o

clobber: clean
rm -f test_get_len inat-tables.c
# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
# Distills the disassembly as follows:
# - Removes all lines except the disassembled instructions.
# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
# into a single line.

BEGIN {
prev_addr = 
prev_hex = 
prev_mnemonic = 
}

/^ *[0-9a-f]+:/ {
if (split($0, field, \t)  3) {
# This is a continuation of the same insn.
prev_hex = prev_hex field[2]
} else {
if (prev_addr != )
printf %s\t%s\t%s\n, prev_addr, prev_hex, 
prev_mnemonic
prev_addr = field[1]
prev_hex = field[2]
prev_mnemonic = field[3]
}
}

END {
if (prev_addr != )
printf %s\t%s\t%s\n, prev_addr, prev_hex, prev_mnemonic
}
#!/bin/gawk -f

BEGIN {
print /* x86 opcode map generated from x86-opcode-map.txt */
print /* Do not change this code. */
ggid = 1
geid = 1

opnd_expr = ^[A-Za-z]
ext_expr = ^\\(
sep_expr = ^\\|$
group_expr = ^Grp[0-9]+A*

imm_expr = ^[IJAO][a-z]
imm_flag[Ib] = INAT_MAKE_IMM(INAT_IMM_BYTE)
imm_flag[Jb] = INAT_MAKE_IMM(INAT_IMM_BYTE)
imm_flag[Iw] = INAT_MAKE_IMM(INAT_IMM_WORD)
imm_flag[Id] = INAT_MAKE_IMM(INAT_IMM_DWORD)
imm_flag[Iq] = INAT_MAKE_IMM(INAT_IMM_QWORD)
imm_flag[Ap] = INAT_MAKE_IMM(INAT_IMM_PTR)
imm_flag[Iz] = INAT_MAKE_IMM(INAT_IMM_VWORD32)
imm_flag[Jz] = INAT_MAKE_IMM(INAT_IMM_VWORD32)
imm_flag[Iv] = INAT_MAKE_IMM(INAT_IMM_VWORD)
imm_flag[Ob] = INAT_MOFFSET
imm_flag[Ov] = INAT_MOFFSET

modrm_expr = ^([CDEGMNPQRSUVW][a-z]+|NTA|T[0-2])
force64_expr = \\([df]64\\)
rex_expr = ^REX(\\.[XRWB]+)*
fpu_expr = ^ESC # TODO

lprefix1_expr = \\(66\\)
delete lptable1
lprefix2_expr = \\(F2\\)
delete lptable2
lprefix3_expr = \\(F3\\)
delete lptable3
max_lprefix = 4

prefix_expr = \\(Prefix\\)
prefix_num[Operand-Size] = INAT_PFX_OPNDSZ
prefix_num[REPNE] = INAT_PFX_REPNE
prefix_num[REP/REPE] = INAT_PFX_REPE
prefix_num[LOCK] = 

virtio-net not working with the latest qemu-kvm git

2009-05-04 Thread Sridhar Samudrala
When i moved to the latest qemu-kvm git tree from kvm-85, i noticed that
networking stopped working between the host and the guest.
It started working when i put the device in promiscuos mode by running
tcpdump in background on the guest. 

After browsing through the recent patches, i found that the following commit
is causing the regression.

Remove stray GSO code from virtio_net (Mark McLoughlin)
http://git.kernel.org/?p=virt/kvm/qemu-kvm.git;a=commitdiff;h=559a8f45f34cc50d1a60b4f67a06614d506b2e01

The comment doesn't seem to match with the code that is removed with this patch.

Thanks
Sridhar

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] Revert Sync idcache after emualted DMA operations foria64

2009-05-04 Thread Hollis Blanchard
On Mon, 2009-05-04 at 09:39 +0800, Zhang, Xiantao wrote:
Could you explain why this patch breaks the powerpc build?
 qemu_sync_icache has the definition for non-ai64 case, so shoudn't
 break any arch-specific build.  

cutils.o: In function `qemu_iovec_from_buffer':
/home/hollisb/source/qemu-kvm.git/cutils.c:175: undefined reference to
`qemu_cache_conf'
/home/hollisb/source/qemu-kvm.git/cutils.c:171: undefined reference to
`qemu_cache_conf'
cutils.o: In function `qemu_iovec_from_buffer':
/home/hollisb/source/qemu-kvm.git/cache-utils.h:18: undefined reference
to `qemu_cache_conf'

However, to restate my point: the build error is not the biggest problem
with this patch. The bigger problems are all the other issues I've
repeatedly described. The build break is the icing on the cake.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm] virtio-net not working with the latest qemu-kvm git

2009-05-04 Thread Alex Williamson
On Mon, 2009-05-04 at 09:50 -0700, Sridhar Samudrala wrote:
 When i moved to the latest qemu-kvm git tree from kvm-85, i noticed that
 networking stopped working between the host and the guest.
 It started working when i put the device in promiscuos mode by running
 tcpdump in background on the guest. 
 
 After browsing through the recent patches, i found that the following commit
 is causing the regression.
 
 Remove stray GSO code from virtio_net (Mark McLoughlin)
 http://git.kernel.org/?p=virt/kvm/qemu-kvm.git;a=commitdiff;h=559a8f45f34cc50d1a60b4f67a06614d506b2e01
 
 The comment doesn't seem to match with the code that is removed with this 
 patch.

Yep, I agree, the removed code is not bogus.  We have to skip the vnet
header to to get to the ethernet header, which we do the filtering on.

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v4 0/2] irqfd

2009-05-04 Thread Gregory Haskins
(Applies to kvm.git:7da2e3ba, plus you will also need Davide Libenzi's
eventfd_file_create() patch, which you can find here:

http://www.mail-archive.com/kvm@vger.kernel.org/msg13923.html

You can find my complete tree with kvm.git, Davide's patch, and this series
here:

http://git.kernel.org/?p=linux/kernel/git/ghaskins/vbus/linux-2.6.git;a=shortlog;h=irqfd

--

irqfd, v4

This series implements a mechanism called irqfd.  It lets you create
an eventfd based file-desriptor to inject interrupts to a kvm guest. For
more details, please see the prologue for patch 2/2.

[ Changelog:

   v4:
*) Changed allocation model to create the new fd last, after
   we get past the last potential error point by using Davide's
   new eventfd_file_create interface (Al Viro, Davide Libenzi)
*) We no longer export sys_eventfd2() since it is replaced
   functionally with eventfd_file_create();
*) Rebased to kvm.git:7da2e3ba

   v3:
*) The kernel now allocates the eventfd (need to export sys_eventfd2)
*) Added a flags field for future expansion to kvm_irqfd()
*) We properly toggle the irq level 1+0.
*) We re-use the USERSPACE_SRC_ID instead of creating our own
*) Properly check for failures establishing a poll-table with eventfd
*) Fixed fd/file leaks on failure
*) Rebased to lateste kvm.git::41b76d8d04

   v2:
*) Dropped notifier_chain based callbacks in favor of
   wait_queue_t::func and file::poll based callbacks (Thanks to
   Davide for the suggestion)

   v1:
*) Initial release



We do not have a user of this interface in this series, though note
future version of virtual-bus (v4 and above) will be based on this.

Note that this series requires userspace patches for qemu-kvm.git, v3, which
you can find here: http://patchwork.kernel.org/patch/20213/

-Greg

---

Gregory Haskins (2):
  kvm: add support for irqfd via eventfd-notification interface
  eventfd: export eventfd interfaces for module use


 arch/x86/kvm/Makefile|2 -
 arch/x86/kvm/x86.c   |1 
 fs/eventfd.c |4 +
 include/linux/kvm.h  |7 ++
 include/linux/kvm_host.h |4 +
 virt/kvm/irqfd.c |  159 ++
 virt/kvm/kvm_main.c  |   11 +++
 7 files changed, 187 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/irqfd.c

-- 
Signature
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v4 1/2] eventfd: export eventfd interfaces for module use

2009-05-04 Thread Gregory Haskins
We will re-use eventfd for implmenting irqfd later in the series, and the
irqfd users will potentially live in modules.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 fs/eventfd.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 0de6ebb..2e1c2ff 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include linux/anon_inodes.h
 #include linux/eventfd.h
 #include linux/syscalls.h
+#include linux/module.h
 
 struct eventfd_ctx {
wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
 
return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
 
return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 
 struct file *eventfd_file_create(unsigned int count, int flags)
 {
@@ -225,6 +228,7 @@ struct file *eventfd_file_create(unsigned int count, int 
flags)
 
return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_file_create);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v4 2/2] kvm: add support for irqfd via eventfd-notification interface

2009-05-04 Thread Gregory Haskins
KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 arch/x86/kvm/Makefile|2 -
 arch/x86/kvm/x86.c   |1 
 include/linux/kvm.h  |7 ++
 include/linux/kvm_host.h |4 +
 virt/kvm/irqfd.c |  159 ++
 virt/kvm/kvm_main.c  |   11 +++
 6 files changed, 183 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/irqfd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b43c4ef..d5fff51 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-coalesced_mmio.o irq_comm.o)
+coalesced_mmio.o irq_comm.o irqfd.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d7082c..699a407 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1027,6 +1027,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
+   case KVM_CAP_IRQFD:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..5e9b861 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_IRQFD 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +455,11 @@ struct kvm_irq_routing {
 
 #endif
 
+struct kvm_irqfd {
+   __u32 gsi;
+   __u32 flags;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -498,6 +504,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 095ebb6..6a8d1c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ struct kvm {
struct list_head vm_list;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
+   struct list_head irqfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
atomic_t users_count;
@@ -524,4 +525,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_irqfd(struct kvm *kvm, int gsi, int flags);
+void kvm_irqfd_release(struct kvm *kvm);
+
 #endif
diff --git a/virt/kvm/irqfd.c b/virt/kvm/irqfd.c
new file mode 100644
index 000..6c82fcb
--- /dev/null
+++ b/virt/kvm/irqfd.c
@@ -0,0 +1,159 @@
+/*
+ * irqfd: Allows an eventfd to be used to inject an interrupt to the guest
+ *
+ * Credit goes to Avi Kivity for the original idea.
+ *
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins ghask...@novell.com
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include linux/kvm_host.h
+#include linux/eventfd.h
+#include linux/workqueue.h
+#include linux/syscalls.h
+#include linux/wait.h
+#include linux/poll.h
+#include linux/file.h
+#include linux/list.h
+
+struct _irqfd {
+   struct kvm   *kvm;
+   int   gsi;
+   struct file  *file;
+   struct list_head  list;
+   poll_tablept;
+   wait_queue_head_t*wqh;
+   wait_queue_t  wait;

[KVM PATCH v4 0/2] irqfd

2009-05-04 Thread Gregory Haskins
(Applies to kvm.git:7da2e3ba, plus you will also need Davide Libenzi's
eventfd_file_create() patch, which you can find here:

http://www.mail-archive.com/kvm@vger.kernel.org/msg13923.html

You can find my complete tree with kvm.git, Davide's patch, and this series
here:

http://git.kernel.org/?p=linux/kernel/git/ghaskins/vbus/linux-2.6.git;a=shortlog;h=irqfd

--

irqfd, v4

This series implements a mechanism called irqfd.  It lets you create
an eventfd based file-desriptor to inject interrupts to a kvm guest. For
more details, please see the prologue for patch 2/2.

[ Changelog:

   v4:
*) Changed allocation model to create the new fd last, after
   we get past the last potential error point by using Davide's
   new eventfd_file_create interface (Al Viro, Davide Libenzi)
*) We no longer export sys_eventfd2() since it is replaced
   functionally with eventfd_file_create();
*) Rebased to kvm.git:7da2e3ba

   v3:
*) The kernel now allocates the eventfd (need to export sys_eventfd2)
*) Added a flags field for future expansion to kvm_irqfd()
*) We properly toggle the irq level 1+0.
*) We re-use the USERSPACE_SRC_ID instead of creating our own
*) Properly check for failures establishing a poll-table with eventfd
*) Fixed fd/file leaks on failure
*) Rebased to lateste kvm.git::41b76d8d04

   v2:
*) Dropped notifier_chain based callbacks in favor of
   wait_queue_t::func and file::poll based callbacks (Thanks to
   Davide for the suggestion)

   v1:
*) Initial release



We do not have a user of this interface in this series, though note
future version of virtual-bus (v4 and above) will be based on this.

Note that this series requires userspace patches for qemu-kvm.git, v3, which
you can find here: http://patchwork.kernel.org/patch/20213/

-Greg

---

Gregory Haskins (2):
  kvm: add support for irqfd via eventfd-notification interface
  eventfd: export eventfd interfaces for module use


 arch/x86/kvm/Makefile|2 -
 arch/x86/kvm/x86.c   |1 
 fs/eventfd.c |4 +
 include/linux/kvm.h  |7 ++
 include/linux/kvm_host.h |4 +
 virt/kvm/irqfd.c |  159 ++
 virt/kvm/kvm_main.c  |   11 +++
 7 files changed, 187 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/irqfd.c


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v4 2/2] kvm: add support for irqfd via eventfd-notification interface

2009-05-04 Thread Gregory Haskins
KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 arch/x86/kvm/Makefile|2 -
 arch/x86/kvm/x86.c   |1 
 include/linux/kvm.h  |7 ++
 include/linux/kvm_host.h |4 +
 virt/kvm/irqfd.c |  159 ++
 virt/kvm/kvm_main.c  |   11 +++
 6 files changed, 183 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/irqfd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b43c4ef..d5fff51 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-coalesced_mmio.o irq_comm.o)
+coalesced_mmio.o irq_comm.o irqfd.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d7082c..699a407 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1027,6 +1027,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
+   case KVM_CAP_IRQFD:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..5e9b861 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_IRQFD 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +455,11 @@ struct kvm_irq_routing {
 
 #endif
 
+struct kvm_irqfd {
+   __u32 gsi;
+   __u32 flags;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -498,6 +504,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 095ebb6..6a8d1c1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ struct kvm {
struct list_head vm_list;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
+   struct list_head irqfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
atomic_t users_count;
@@ -524,4 +525,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_irqfd(struct kvm *kvm, int gsi, int flags);
+void kvm_irqfd_release(struct kvm *kvm);
+
 #endif
diff --git a/virt/kvm/irqfd.c b/virt/kvm/irqfd.c
new file mode 100644
index 000..6c82fcb
--- /dev/null
+++ b/virt/kvm/irqfd.c
@@ -0,0 +1,159 @@
+/*
+ * irqfd: Allows an eventfd to be used to inject an interrupt to the guest
+ *
+ * Credit goes to Avi Kivity for the original idea.
+ *
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins ghask...@novell.com
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include linux/kvm_host.h
+#include linux/eventfd.h
+#include linux/workqueue.h
+#include linux/syscalls.h
+#include linux/wait.h
+#include linux/poll.h
+#include linux/file.h
+#include linux/list.h
+
+struct _irqfd {
+   struct kvm   *kvm;
+   int   gsi;
+   struct file  *file;
+   struct list_head  list;
+   poll_tablept;
+   wait_queue_head_t*wqh;
+   wait_queue_t  wait;

[KVM PATCH v4 1/2] eventfd: export eventfd interfaces for module use

2009-05-04 Thread Gregory Haskins
We will re-use eventfd for implmenting irqfd later in the series, and the
irqfd users will potentially live in modules.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 fs/eventfd.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 0de6ebb..2e1c2ff 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include linux/anon_inodes.h
 #include linux/eventfd.h
 #include linux/syscalls.h
+#include linux/module.h
 
 struct eventfd_ctx {
wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
 
return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
 
return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 
 struct file *eventfd_file_create(unsigned int count, int flags)
 {
@@ -225,6 +228,7 @@ struct file *eventfd_file_create(unsigned int count, int 
flags)
 
return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_file_create);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v4 0/2] irqfd

2009-05-04 Thread Gregory Haskins
Gregory Haskins wrote:
 (Applies to kvm.git:7da2e3ba, plus you will also need Davide Libenzi's
 eventfd_file_create() patch, which you can find here:
   

[ snip ]

Sorry about the double post of v4.  The first time through I
fat-fingered Al's and LKML's addresses so they were munged together.  I
tried to kill the mail before it went out, but it looks like I missed
it.  If replying, please be sure to use the second version of v4 (which
has a legit address for Al and LKML).

-Greg




signature.asc
Description: OpenPGP digital signature


Re: [KVM PATCH v4 0/2] irqfd

2009-05-04 Thread Gregory Haskins
Gregory Haskins wrote:
 (Applies to kvm.git:7da2e3ba, plus you will also need Davide Libenzi's
 eventfd_file_create() patch, which you can find here:
   
[snip]

I should also add that v4 is build-tested only.  I am in the middle of
refactoring virtual-bus, which is my only current test-harness for
this.  I will report back later this week how the testing goes once I
get something running again.

-Greg




signature.asc
Description: OpenPGP digital signature


Re: kvm-77 Excessive Disk Access causes real time clock hang!

2009-05-04 Thread Erik Rull

Hi Avi,

Avi Kivity wrote:

Erik Rull wrote:
The file system is the guest's business.  Instead of '-hda /dev/hda2', try

 -drive file=/dev/hda2,cache=none


great!
cache=off worked - none caused an error.

The Timing problem is still present but the XP system is now much more 
interactive during file access (copy / defrag,...)


I will try out the 84 kvm with the irq-reinjection.

Best regards,

Erik
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm-77 Excessive Disk Access causes real time clock hang!

2009-05-04 Thread Erik Rull

Hi Avi,

Avi Kivity wrote:

Erik Rull wrote:
The file system is the guest's business.  Instead of '-hda /dev/hda2', try

 -drive file=/dev/hda2,cache=none


great!
cache=off worked - none caused an error.

The Timing problem is still present but the XP system is now much more
interactive during file access (copy / defrag,...)

I will try out the 84 kvm with the irq-reinjection.

Best regards,

Erik

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM-AUTOTEST] [PATCH] support for remote migration

2009-05-04 Thread Michael Goldish
Thanks for the new patch. I'll comment on it later because I want to take some 
more time to review it.

The login prompt problem is my fault -- please see my comment below.

- yogi anant...@linux.vnet.ibm.com wrote:

 Hello everyone,
 
 I like to resubmit patch to add support for remote migration in
 kvm-autotest, based on Michael Goldish's suggestions.
 
 To use this patch the following seven parameters should be added to
 the
 existing migration test
 
 remote_dst = yes
 hostip = localhost ip or name
 remoteip = remote host ip or name
 remuser = root
 rempassword = password
 qemu_path_dst = qemu binary path on remote host
 image_dir_dst = images dir on remote host
 
 
 For example:
 - migrate:  install setup
 type = migration
 vms +=  dst
 migration_test_command = help
 kill_vm_on_error = yes
 hostip = 192.168.1.2
 remoteip = 192.168.1.3
 remuser = root
 rempassword = 123456
 remote_dst = yes
 qemu_path_dst = /tmp/kvm_autotest_root/qemu
 image_dir_dst = /tmp/kvm_autotest_root/images
 
 variants:
 
 The parameter remote_dst = yes, indicates that the VM dst should
 be
 started on the remote host.If the parameter qemu_path_dst and
 image_dir_dst, it is assumed tht the qemu binary images path is same
 on
 both local and remote host.
 
  Regarding remote_login:
  
  - Why should remote_login return a session when it gets an
 unexpected login prompt? If you get a login prompt doesn't that mean
 something went wrong? The username is always provided in the ssh
 command line, so we shouldn't expect to receive a login prompt -- or
 am I missing something? I am pretty confident this is true in the
 general case, but maybe it's different when ssh keys have been
 exchanged between the hosts.
  
  - I think it makes little sense to return a session object when you
 see a login prompt because that session will be useless. You can't
 send any commands to it because you don't have a shell prompt yet. Any
 command you send will be interpreted as a username, and will most
 likely be the wrong username.
  
  - When a guest is in the process of booting and we try to log into
 it, remote_login sometimes fails because it gets an unexpected login
 prompt. This is good, as far as I understand, because it means the
 guest isn't ready yet (still booting). The next time remote_login
 attempts to log in, it usually succeeds. If we consider an unexpected
 login prompt OK, we pass login attempts that actually should have
 failed (and the resulting sessions will be useless anyway).
  
 I have removed this from the current patch, so now the remote_login
 function is unchanged.I will recheck my machine configuration and
 submit
 it as new patch if necessary. I had exchanged ssh keys between the
 hosts(both local and remote hosts), but the login sessions seem to
 terminates with Got unexpected login prompt.  

It seems the problem is caused by a loose regular expression in 
kvm_utils.remote_login().
In the list of parameters to read_until_last_line_matches, you'll find 
something like [Ll]ogin:.
I put it there to match the telnet login prompt which indicates failure, but it 
also matches the
Last login: Mon May 4 ... from ... line, which appears when SSH login 
succeeds.
This regex should be made stricter, e.g. r^[Ll]ogin:\s*$, which means it must 
appear at the beginning
of the line, and must be followed by nothing other than whitespace characters.

I'll commit a fix, which will also make the other regex's stricter as well, but 
it won't appear in the
public repository until Uri comes back from vacation.

Thanks,
Michael
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-04 Thread Jan Kiszka
Simon Bienlein wrote:
 Hello Jan,
 
 thank you very much for processing my request.
 I [1]patched the KVM sources from Lenny and created a new Debian packet.

Did you patch both kernel and user space? Note that my kernel patch was
against the kernel git repository, not against the external modules
delivered with kvm releases. Feasible to convert them, but maybe not
straightforward.

 When I boot the VM from the Lenny CD, there is no audible signal tone.
 It does not make a difference whether I start the KVM with or without
 the option -no-kvm-pit. What do I have to do in order to test the new
 function?

Hmm, I successfully tested with '-soundbw pcspk' + my patches or
-no-kvm-pit. There is probably a different, unrelated issue with your setup.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-04 Thread Jan Kiszka
Samuel Thibault wrote:
 Jan Kiszka, le Mon 04 May 2009 22:29:39 +0200, a écrit :
 When I boot the VM from the Lenny CD, there is no audible signal tone.
 Hmm, I successfully tested with '-soundbw pcspk' + my patches or
 -no-kvm-pit. There is probably a different, unrelated issue with your setup.
 
 Remember that the BIOS support for beeps is probably still missing.
 Simon, you should also test beeps from an installed Linux guest.
 

Yeah, I forgot... :)

Indeed, I only tested the console beep of a fully installed Linux guest.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] qemu-kvm: build system Add link to qemu

2009-05-04 Thread Jan Kiszka
Avi Kivity wrote:
 Jan Kiszka wrote:
 I'm getting closer to a working qemu-kvm, but there are still a few
 messy parts. The magic dance goes like this:

 cd qemu-kvm/kvm
 ln -s .. qemu(or apply patch below)
 ./configure -whatever
 make

 Still, this is unintuitive. As both top-level configure and Makefile
 already differ from upstream, I see no reason not tweaking them also in
 way that ./configure  make from the top-level directory behaves as
 expected again. May look into this later (and the other warnings the
 build threw at me), now I've to understand an ugly shadow page table
 inconsistency of kvm...

 Jan

 ---

 Subject: [PATCH] qemu-kvm: build system Add link to qemu

 Signed-off-by: Jan Kiszka jan.kis...@siemens.com
 ---
  kvm/qemu |1 +
  1 files changed, 1 insertions(+), 0 deletions(-)
  create mode 12 kvm/qemu

 diff --git a/kvm/qemu b/kvm/qemu
 new file mode 12
 index 000..a96aa0e
 --- /dev/null
 +++ b/kvm/qemu
 @@ -0,0 +1 @@
 +..
 \ No newline at end of file
   
 
 This shouldn't be needed.  Can you confirm this with current qemu-kvm.git?
 

Yes, long solved.

Jan



signature.asc
Description: OpenPGP digital signature


[PATCH 1/4] BIOS changes for configuring irq0-inti2 override

2009-05-04 Thread Beth Kon
These patches resolve the irq0-inti2 override issue, and get the hpet working
on kvm. 

Override and HPET changes are sent as a series because HPET depends on the 
override. Win2k8 expects the HPET interrupt on inti2, regardless of whether 
an override exists in the BIOS. And the HPET spec states that in legacy mode, 
timer interrupt is on inti2.

The irq0-inti2 override will always be used unless the kernel cannot do irq 
routing (i.e., compatibility with old kernels). So if the kernel is capable, 
userspace sets up irq0-inti2 via the irq routing interface, and adds the 
irq0-inti2 override to the MADT interrupt source override table, 
and the mp table (for the no-acpi case).

A couple of months ago, Marcelo was seeing RHEL5 guests complain of invalid
checksum with these patches, but later he couldn't reproduce it, and I'm not 
seeing it now. While all guests still need to be fully tested, everything 
appears to be in order.  I've tested on win2k864, win2k832, RHEL5.3 32 bit, 
and ubuntu 8.10 64 bit. 


Signed-off-by: Beth Kon e...@us.ibm.com


diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c
index 8684987..ddfa828 100755
--- a/kvm/bios/rombios32.c
+++ b/kvm/bios/rombios32.c
@@ -445,6 +445,9 @@ uint32_t cpuid_ext_features;
 unsigned long ram_size;
 uint64_t ram_end;
 uint8_t bios_uuid[16];
+#ifdef BX_QEMU
+uint8_t irq0_override;
+#endif
 #ifdef BX_USE_EBDA_TABLES
 unsigned long ebda_cur_addr;
 #endif
@@ -477,6 +480,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
 #define QEMU_CFG_SIGNATURE  0x00
 #define QEMU_CFG_ID 0x01
 #define QEMU_CFG_UUID   0x02
+#define QEMU_CFG_IRQ0_OVERRIDE 0x0e
 
 int qemu_cfg_port;
 
@@ -518,6 +522,18 @@ void uuid_probe(void)
 memset(bios_uuid, 0, 16);
 }
 
+#ifdef BX_QEMU
+void irq0_override_probe(void)
+{
+if(qemu_cfg_port) {
+qemu_cfg_select(QEMU_CFG_IRQ0_OVERRIDE);
+qemu_cfg_read(irq0_override, 1);
+return;
+}
+memset(irq0_override, 0, 1);
+}
+#endif
+
 void cpu_probe(void)
 {
 uint32_t eax, ebx, ecx, edx;
@@ -1160,6 +1176,13 @@ static void mptable_init(void)
 
 /* irqs */
 for(i = 0; i  16; i++) {
+#ifdef BX_QEMU
+/* One entry per ioapic interrupt destination. Destination 2 is covered
+ * by irq0-inti2 override (i == 0). Source IRQ 2 is unused 
+ */
+if (irq0_override  i == 2)
+continue;
+#endif
 putb(q, 3); /* entry type = I/O interrupt */
 putb(q, 0); /* interrupt type = vectored interrupt */
 putb(q, 0); /* flags: po=0, el=0 */
@@ -1167,7 +1190,12 @@ static void mptable_init(void)
 putb(q, 0); /* source bus ID = ISA */
 putb(q, i); /* source bus IRQ */
 putb(q, ioapic_id); /* dest I/O APIC ID */
-putb(q, i); /* dest I/O APIC interrupt in */
+#ifdef BX_QEMU
+if (irq0_override  i == 0)
+putb(q, 2); /* dest I/O APIC interrupt in */
+else
+#endif
+putb(q, i); /* dest I/O APIC interrupt in */
 }
 /* patch length */
 len = q - mp_config_table;
@@ -1550,16 +1578,18 @@ void acpi_bios_init(void)
 
 addr = (addr + 7)  ~7;
 madt_addr = addr;
+madt = (void *)(addr);
 madt_size = sizeof(*madt) +
 sizeof(struct madt_processor_apic) * MAX_CPUS +
-#ifdef BX_QEMU
-sizeof(struct madt_io_apic) /* + sizeof(struct madt_int_override) */;
-#else
 sizeof(struct madt_io_apic);
+#ifdef BX_QEMU
+for (i = 0; i  16; i++)
+if (PCI_ISA_IRQ_MASK  (1U  i))
+madt_size += sizeof(struct madt_int_override);
+if (irq0_override)
+madt_size += sizeof(struct madt_int_override);
 #endif
-madt = (void *)(addr);
 addr += madt_size;
-
 #ifdef BX_QEMU
 #ifdef HPET_WORKS_IN_KVM
 addr = (addr + 7)  ~7;
@@ -1660,23 +1690,21 @@ void acpi_bios_init(void)
 io_apic-io_apic_id = smp_cpus;
 io_apic-address = cpu_to_le32(0xfec0);
 io_apic-interrupt = cpu_to_le32(0);
+int_override = (struct madt_int_override*)(io_apic + 1);
 #ifdef BX_QEMU
-#ifdef HPET_WORKS_IN_KVM
-io_apic++;
-
-int_override = (void *)io_apic;
-int_override-type = APIC_XRUPT_OVERRIDE;
-int_override-length = sizeof(*int_override);
-int_override-bus = cpu_to_le32(0);
-int_override-source = cpu_to_le32(0);
-int_override-gsi = cpu_to_le32(2);
-int_override-flags = cpu_to_le32(0);
-#endif
+if (irq0_override) {
+int_override = (void *)io_apic;
+int_override-type = APIC_XRUPT_OVERRIDE;
+int_override-length = sizeof(*int_override);
+int_override-bus = cpu_to_le32(0);
+int_override-source = cpu_to_le32(0);
+int_override-gsi = cpu_to_le32(2);
+int_override-flags = cpu_to_le32(0); /* conforms to bus 
specifications */
+int_override++;
+}
 #endif
-
-int_override = (struct madt_int_override*)(io_apic + 1);
 for ( i = 0; i  16; i++ ) {
-

[PATCH 2/4] Userspace changes for configuring irq0-inti2 override

2009-05-04 Thread Beth Kon
Signed-off-by: Beth Kon e...@us.ibm.com


diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index e1b19d7..bb74f38 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -279,6 +279,7 @@ void *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
 fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
 fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)nographic);
 fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+fw_cfg_add_i16(s, FW_CFG_IRQ0_OVERRIDE, (uint16_t)irq0override);
 
 register_savevm(fw_cfg, -1, 1, fw_cfg_save, fw_cfg_load, s);
 qemu_register_reset(fw_cfg_reset, s);
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index f616ed2..498c1e3 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -15,6 +15,7 @@
 #define FW_CFG_INITRD_SIZE  0x0b
 #define FW_CFG_BOOT_DEVICE  0x0c
 #define FW_CFG_NUMA 0x0d
+#define FW_CFG_IRQ0_OVERRIDE0x0e
 #define FW_CFG_MAX_ENTRY0x10
 
 #define FW_CFG_WRITE_CHANNEL0x4000
diff --git a/hw/ioapic.c b/hw/ioapic.c
index 0b70cf6..2d77a2c 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -23,6 +23,7 @@
 
 #include hw.h
 #include pc.h
+#include sysemu.h
 #include qemu-timer.h
 #include host-utils.h
 
@@ -95,14 +96,13 @@ void ioapic_set_irq(void *opaque, int vector, int level)
 {
 IOAPICState *s = opaque;
 
-#if 0
 /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
  * to GSI 2.  GSI maps to ioapic 1-1.  This is not
  * the cleanest way of doing it but it should work. */
 
-if (vector == 0)
+if (vector == 0  irq0override) {
 vector = 2;
-#endif
+}
 
 if (vector = 0  vector  IOAPIC_NUM_PINS) {
 uint32_t mask = 1  vector;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 68a9218..5b27179 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -814,9 +814,14 @@ int kvm_qemu_create_context(void)
 return r;
 }
 for (i = 0; i  24; ++i) {
-r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
-if (r  0)
+if (i == 0) {
+r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, 2);
+} else if (i != 2) {
+r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
+}
+if (r  0) {
 return r;
+}
 }
 kvm_commit_irq_routes(kvm_context);
 }
diff --git a/qemu-kvm.h b/qemu-kvm.h
index ca59af8..a836579 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -166,6 +166,7 @@ void qemu_kvm_cpu_stop(CPUState *env);
 #define kvm_enabled() (kvm_allowed)
 #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
 #define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
+#define qemu_kvm_has_gsi_routing() kvm_has_gsi_routing(kvm_context)
 #define kvm_has_sync_mmu() qemu_kvm_has_sync_mmu()
 void kvm_init_vcpu(CPUState *env);
 void kvm_load_tsc(CPUState *env);
diff --git a/sysemu.h b/sysemu.h
index e8dd381..a5f96f9 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -96,6 +96,7 @@ extern int graphic_width;
 extern int graphic_height;
 extern int graphic_depth;
 extern int nographic;
+extern int irq0override;
 extern const char *keyboard_layout;
 extern int win2k_install_hack;
 extern int rtc_td_hack;
diff --git a/vl.c b/vl.c
index 9ff4a5a..ee7f29a 100644
--- a/vl.c
+++ b/vl.c
@@ -207,6 +207,7 @@ static int vga_ram_size;
 enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
 static DisplayState *display_state;
 int nographic;
+int irq0override;
 static int curses;
 static int sdl;
 const char* keyboard_layout = NULL;
@@ -4599,6 +4600,7 @@ int main(int argc, char **argv, char **envp)
 vga_ram_size = VGA_RAM_SIZE;
 snapshot = 0;
 nographic = 0;
+irq0override = 1;
 curses = 0;
 kernel_filename = NULL;
 kernel_cmdline = ;
@@ -5682,8 +5684,14 @@ int main(int argc, char **argv, char **envp)
 }
 }
 
-if (kvm_enabled())
-   kvm_init_ap();
+if (kvm_enabled()) {
+   kvm_init_ap();
+#ifdef USE_KVM
+if (kvm_irqchip  !qemu_kvm_has_gsi_routing()) {
+   irq0override = 0;
+}
+#endif
+}
 
 machine-init(ram_size, vga_ram_size, boot_devices,
   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] BIOS changes for KVM HPET

2009-05-04 Thread Beth Kon
Signed-off-by: Beth Kon e...@us.ibm.com


diff --git a/kvm/bios/acpi-dsdt.dsl b/kvm/bios/acpi-dsdt.dsl
index c756fed..0e142be 100755
--- a/kvm/bios/acpi-dsdt.dsl
+++ b/kvm/bios/acpi-dsdt.dsl
@@ -308,7 +308,6 @@ DefinitionBlock (
 })
 }
 #ifdef BX_QEMU
-#ifdef HPET_WORKS_IN_KVM
 Device(HPET) {
 Name(_HID,  EISAID(PNP0103))
 Name(_UID, 0)
@@ -328,7 +327,6 @@ DefinitionBlock (
 })
 }
 #endif
-#endif
 }
 
 Scope(\_SB.PCI0) {
diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c
index ddfa828..7441cd7 100755
--- a/kvm/bios/rombios32.c
+++ b/kvm/bios/rombios32.c
@@ -1293,7 +1293,7 @@ struct rsdt_descriptor_rev1
 {
ACPI_TABLE_HEADER_DEF   /* ACPI common table 
header */
 #ifdef BX_QEMU
-   uint32_t table_offset_entry [2]; /* Array 
of pointers to other */
+   uint32_t table_offset_entry [3]; /* Array 
of pointers to other */
 // uint32_t table_offset_entry [4]; /* Array 
of pointers to other */
 #else
uint32_t table_offset_entry [3]; /* Array 
of pointers to other */
@@ -1450,8 +1450,8 @@ struct acpi_20_generic_address {
 } __attribute__((__packed__));
 
 /*
- *  * HPET Description Table
- *   */
+ *  HPET Description Table
+ */
 struct acpi_20_hpet {
 ACPI_TABLE_HEADER_DEF   /* ACPI common table 
header */
 uint32_t   timer_block_id;
@@ -1591,13 +1591,11 @@ void acpi_bios_init(void)
 #endif
 addr += madt_size;
 #ifdef BX_QEMU
-#ifdef HPET_WORKS_IN_KVM
 addr = (addr + 7)  ~7;
 hpet_addr = addr;
 hpet = (void *)(addr);
 addr += sizeof(*hpet);
 #endif
-#endif
 
 acpi_tables_size = addr - base_addr;
 
@@ -1620,10 +1618,10 @@ void acpi_bios_init(void)
 memset(rsdt, 0, sizeof(*rsdt));
 rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr);
 rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr);
-//rsdt-table_offset_entry[2] = cpu_to_le32(ssdt_addr);
 #ifdef BX_QEMU
-//rsdt-table_offset_entry[3] = cpu_to_le32(hpet_addr);
+rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr);
 #endif
+//rsdt-table_offset_entry[3] = cpu_to_le32(ssdt_addr);
 acpi_build_table_header((struct acpi_table_header *)rsdt,
 RSDT, sizeof(*rsdt), 1);
 
@@ -1723,7 +1721,6 @@ void acpi_bios_init(void)
 
 #ifdef BX_QEMU
 /* HPET */
-#ifdef HPET_WORKS_IN_KVM
 memset(hpet, 0, sizeof(*hpet));
 /* Note timer_block_id value must be kept in sync with value advertised by
  * emulated hpet
@@ -1733,7 +1730,6 @@ void acpi_bios_init(void)
 acpi_build_table_header((struct  acpi_table_header *)hpet,
  HPET, sizeof(*hpet), 1);
 #endif
-#endif
 
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] Userspace changes for KVM HPET

2009-05-04 Thread Beth Kon
Signed-off-by: Beth Kon e...@us.ibm.com


diff --git a/hw/hpet.c b/hw/hpet.c
index c7945ec..47c9f89 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -30,6 +30,7 @@
 #include console.h
 #include qemu-timer.h
 #include hpet_emul.h
+#include qemu-kvm.h
 
 //#define HPET_DEBUG
 #ifdef HPET_DEBUG
@@ -48,6 +49,43 @@ uint32_t hpet_in_legacy_mode(void)
 return 0;
 }
 
+static void hpet_kpit_enable(void)
+{
+struct kvm_pit_state ps;
+kvm_get_pit(kvm_context, ps);
+kvm_set_pit(kvm_context, ps);
+}
+
+static void hpet_kpit_disable(void)
+{
+struct kvm_pit_state ps;
+kvm_get_pit(kvm_context, ps);
+ps.channels[0].mode = 0xff;
+kvm_set_pit(kvm_context, ps);
+}
+
+static void hpet_legacy_enable(void)
+{
+if (qemu_kvm_pit_in_kernel()) {
+   hpet_kpit_disable();
+   dprintf(qemu: hpet disabled kernel pit\n);
+} else {
+   hpet_pit_disable();
+   dprintf(qemu: hpet disabled userspace pit\n);
+}
+}
+
+static void hpet_legacy_disable(void)
+{
+if (qemu_kvm_pit_in_kernel()) {
+   hpet_kpit_enable();
+   dprintf(qemu: hpet enabled kernel pit\n);
+} else {
+   hpet_pit_enable();
+   dprintf(qemu: hpet enabled userspace pit\n);
+}
+}
+
 static uint32_t timer_int_route(struct HPETTimer *timer)
 {
 uint32_t route;
@@ -475,9 +513,9 @@ static void hpet_ram_writel(void *opaque, 
target_phys_addr_t addr,
 }
 /* i8254 and RTC are disabled when HPET is in legacy mode */
 if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
-hpet_pit_disable();
+hpet_legacy_enable();
 } else if (deactivating_bit(old_val, new_val, 
HPET_CFG_LEGACY)) {
-hpet_pit_enable();
+hpet_legacy_disable();
 }
 break;
 case HPET_CFG + 4:
@@ -560,7 +598,7 @@ static void hpet_reset(void *opaque) {
  * hpet_reset is called due to system reset. At this point control must
  * be returned to pit until SW reenables hpet.
  */
-hpet_pit_enable();
+hpet_legacy_disable();
 count = 1;
 }
 
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index d5d42f3..2503783 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/vl.c b/vl.c
index 5eacd6a..1334344 100644
--- a/vl.c
+++ b/vl.c
@@ -5666,10 +5666,15 @@ int main(int argc, char **argv, char **envp)
 }
 
 if (kvm_enabled()) {
-   kvm_init_ap();
+kvm_init_ap();
 #ifdef USE_KVM
 if (kvm_irqchip  !qemu_kvm_has_gsi_routing()) {
-   irq0override = 0;
+irq0override = 0;
+/* if kernel can't do irq routing, interrupt source
+ * override 0-2 can not be set up as required by hpet,
+ * so disable hpet.
+ */
+no_hpet=1;
 }
 #endif
 }
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-04 Thread Samuel Thibault
Jan Kiszka, le Mon 04 May 2009 22:29:39 +0200, a écrit :
  When I boot the VM from the Lenny CD, there is no audible signal tone.
 
 Hmm, I successfully tested with '-soundbw pcspk' + my patches or
 -no-kvm-pit. There is probably a different, unrelated issue with your setup.

Remember that the BIOS support for beeps is probably still missing.
Simon, you should also test beeps from an installed Linux guest.

Samuel
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm.git now live

2009-05-04 Thread Hollis Blanchard
On Sat, 2009-05-02 at 10:52 +0300, Avi Kivity wrote:
 Hollis Blanchard wrote:
  In that case it's sufficient to have the build system use the upstream 
  kvm integration (CONFIG_KVM) rather than the qemu-kvm integration 
  (USE_KVM).
  
 
  OK, I give up... how is this supposed to work? Nobody ever sets
  CONFIG_KVM or KVM_UPSTREAM, but there are a couple tests for it. Glauber
  once sent a patch related to that, but I don't see how it helps.

 
 KVM_UPSTREAM is just a marker to let us know which parts of upstream 
 qemu/kvm integration conflict with qemu-kvm.git.

OK, so where do I define KVM_UPSTREAM?

Also, where do I define CONFIG_KVM? I would expect the configure script
to do that, but apparently it does not.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/04] qemu-kvm: other archs should maintain memory mappingalso.

2009-05-04 Thread Hollis Blanchard
On Mon, 2009-05-04 at 11:25 +0200, Jes Sorensen wrote:
 Avi Kivity wrote:
  Jes Sorensen wrote:
  +int destroy_region_works = 0;
  
  Global name, prefix with kvm_.  Does it actually need to be global?
 
 Gone, now local to qemu-kvm-x86.c. I moved the initializer into
 kvm_arch_create_context() instead.
 
  The header depends on target_phys_addr_t, so it must include whatever 
  defines it.
 
 Added an #include cpu-all.h which defines it.
 
  Missing other archs...
  
  Instead of duplicating this for every arch, you can have a #define that 
  tells you if you want non-trivial arch definitions, and supply the 
  trivial definitions in qemu-kvm.h.
 
 Done, I also added a PPC header file - which may or may not be wanted
 at this point. You can just cut it out if you don't think it should be
 added.

I don't understand the code being moved, but I guess I don't want it, so
your patch is fine with me.

(Wtf are those magic addresses? And not a single comment?? Aren't we
better than this?)

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v4 1/2] eventfd: export eventfd interfaces for module use

2009-05-04 Thread Al Viro
On Mon, May 04, 2009 at 01:57:45PM -0400, Gregory Haskins wrote:
 @@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
  
   return n;
  }
 +EXPORT_SYMBOL_GPL(eventfd_signal);

perhaps, but...

 @@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
  
   return file;
  }
 +EXPORT_SYMBOL_GPL(eventfd_fget);

this one looks very odd.  Could you show legitimate users?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Linux x86 guest panics in skb_copy_bits

2009-05-04 Thread Marcelo Tosatti
Justin,

On Sun, May 03, 2009 at 11:40:47AM -0700, Justin Dossey wrote:
 Hi all,
 
 I have a pretty straightforward setup.
 
 Hypervisor:
 dual xeon e5205 running Gentoo Linux
 kernel 2.6.27 with virtio devices enabled
 kvm 84
 libvirt 0.5.1
 
 Guest:
 32-bit, virtio for nic and disk, qcow2.
 Linux 2.6.28.
 
 Network is bridged using tap and brctl.
 
 I'm running Apache on the guest.  Whenever I send enough data through
 the virtual NIC, I get a panic in skb_copy_bits.  I've tried using the
 e1000 driver instead of the virtio one, but that makes no difference.
 
 Has anyone else seen this behavior before?  I got this on 2.6.27 and 2.6.28.
 
 Here's a snippet:
 
 [280204.340016]  [c02253e2] panic+0x4e/0xea
 [280204.340016]  [c05906b9] oops_end+0x8f/0xa3
 [280204.340016]  [c0204e94] die+0x57/0x5f
 [280204.340016]  [c0592192] do_page_fault+0x605/0x6bc
 [280204.340016]  [c059010d] ? _spin_lock+0x15/0x18
 [280204.340016]  [c04bfad8] ? __qdisc_run+0xe6/0x1a7
 [280204.340016]  [c0591b8d] ? do_page_fault+0x0/0x6bc
 [280204.340016]  [c0590482] error_code+0x72/0x78
 [280204.340016]  [c04ace8c] ? skb_copy_bits+0x4f/0x1c4
 [280204.340016]  [c0215864] ? kvm_set_pte+0x26/0x29
 [280204.340016]  [c054889c] xdr_skb_read_bits+0x1f/0x37
 [280204.340016]  [c054872b] xdr_partial_copy_from_skb+
 0x117/0x16c
 [280204.340016]  [c0549ec4] xs_tcp_data_recv+0x245/0x3de
 [280204.340016]  [c054887d] ? xdr_skb_read_bits+0x0/0x37
 [280204.340016]  [c04e07d6] tcp_read_sock+0x8c/0x1e2
 [280204.340016]  [c0549c7f] ? xs_tcp_data_recv+0x0/0x3de
 [280204.340016]  [c054a5d1] xs_tcp_data_ready+0x54/0x64
 [280204.340016]  [c04e9469] tcp_rcv_established+0x524/0x7b7
 [280204.340016]  [c04ee4b2] tcp_v4_do_rcv+0x173/0x2dc

Seems to be an issue with paravirt mmu. Do you happen to have
CONFIG_DEBUG_PAGEALLOC turned on your guests?



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM x86_64 with SR-IOV..?

2009-05-04 Thread Yu Zhao
Hi,

The VF also works in the host if the VF driver is programed properly.
So it would be easier to develop the VF driver in the host and then
verify the VF driver in the guest.

BTW, I didn't see the SR-IOV is enabled in your dmesg, did you select
the CONFIG_PCI_IOV in the kernel .config?

Thanks,
Yu

On Mon, May 04, 2009 at 06:40:36PM +0800, Nicholas A. Bellinger wrote:
 On Mon, 2009-05-04 at 17:49 +0800, Sheng Yang wrote:
  On Monday 04 May 2009 17:11:59 Nicholas A. Bellinger wrote:
   On Mon, 2009-05-04 at 16:20 +0800, Sheng Yang wrote:
On Monday 04 May 2009 12:36:04 Nicholas A. Bellinger wrote:
 On Mon, 2009-05-04 at 10:09 +0800, Sheng Yang wrote:
  On Monday 04 May 2009 08:53:07 Nicholas A. Bellinger wrote:
   On Sat, 2009-05-02 at 18:22 +0800, Sheng Yang wrote:
On Thu, Apr 30, 2009 at 01:22:54PM -0700, Nicholas A. Bellinger
  wrote:
 Greetings KVM folks,

 I wondering if any information exists for doing SR-IOV on the
 new VT-d capable chipsets with KVM..?  From what I understand
 the patches for doing this with KVM are floating around, but I
 have been unable to find any user-level docs for actually
 making it all go against a upstream v2.6.30-rc3 code..

 So far I have been doing IOV testing with Xen 3.3 and
 3.4.0-pre, and I am really hoping to be able to jump to KVM 
 for
 single-function and and then multi-function SR-IOV.  I know
 that the VM migration stuff for IOV in Xen is up and running,
 and I assume it is being worked in for KVM instance migration
 as well..? This part is less important (at least for me :-)
 than getting a stable SR-IOV setup running under the KVM
 hypervisor..  Does anyone have any pointers for this..?

 Any comments or suggestions are appreciated!
   
Hi Nicholas
   
The patches are not floating around now. As you know, SR-IOV for
Linux have been in 2.6.30, so then you can use upstream KVM and
qemu-kvm(or recent released kvm-85) with 2.6.30-rc3 as host
kernel. And some time ago, there are several SRIOV related
patches for qemu-kvm, and now they all have been checked in.
   
And for KVM, the extra document is not necessary, for you can
simple assign a VF to guest like any other devices. And how to
create VF is specific for each device driver. So just create a 
VF
then assign it to KVM guest is fine.
  
   Greetings Sheng,
  
   So, I have been trying the latest kvm-85 release on a v2.6.30-rc3
   checkout from linux-2.6.git on a CentOS 5u3 x86_64 install on 
   Intel
   IOH-5520 based dual socket Nehalem board.  I have enabled DMAR and
   Interrupt Remapping my KVM host using v2.6.30-rc3 and from what I
   can tell, the KVM_CAP_* defines from libkvm are enabled with
   building kvm-85 after './configure
   --kerneldir=/usr/src/linux-2.6.git' and the PCI passthrough code 
   is
   being enabled in
   kvm-85/qemu/hw/device-assignment.c AFAICT..
  
   From there, I use the freshly installed qemu-x86_64-system binary
to
  
   start a Debian 5 x86_64 HVM (that previously had been moving
   network packets under Xen for PCIe passthrough). I see the MSI-X
   interrupt remapping working on the KVM host for the passed
   -pcidevice, and the MMIO mappings from the qemu build that I also
   saw while using Xen/qemu-dm built with PCI passthrough are there 
   as
   well..
 
  Hi Nicholas
 
   But while the KVM guest is booting, I see the following
   exception(s) from qemu-x86_64-system for one of the VFs for a
   multi-function PCIe device:
  
   BUG: kvm_destroy_phys_mem: invalid parameters (slot=-1)
 
  This one is mostly harmless.

 Ok, good to know..  :-)

   I try with one of the on-board e1000e ports (02:00.0) and I see 
   the
   same exception along with some MSI-X exceptions from
   qemu-x86_64-system in KVM guest.. However, I am still able to see
   the e1000e and the other vxge multi-function device with lspci, 
   but
   I am unable to dhcp or ping with the e1000e and VF from
   multi-function device fails to register the MSI-X interrupt in the
   guest..
 
  Did you see the interrupt in the guest and host side?

 Ok, I am restarting the e1000e test with a fresh Fedora 11 install and
 KVM host kernel 2.6.29.1-111.fc11.x86_64.   After unbinding and
 attaching the e1000e single-function device at 02:00.0 to pci-stub
 with:

echo 8086 10d3  /sys/bus/pci/drivers/pci-stub/new_id
echo :02:00.0  /sys/bus/pci/devices/:02:00.0/driver/unbind
echo :02:00.0  /sys/bus/pci/drivers/pci-stub/bind

 I see the following the KVM host kernel ring buffer:
  

  1   2   >