[PATCH] KVM: ia64: Implement a uniform vps interface

2008-09-24 Thread Avi Kivity
From: Xiantao Zhang [EMAIL PROTECTED]

An uniform entry kvm_vps_entry is added for
vps_sync_write/read, vps_resume_handler/guest,
and branches to differnt PAL service according to the offset.

Singed-off-by: Anthony Xu [EMAIL PROTECTED]
Signed-off-by: Xiantao Zhang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 13980d9..2cc41d1 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -50,27 +50,18 @@
 
 #define PAL_VSA_SYNC_READ  \
/* begin to call pal vps sync_read */   \
+{.mii; \
add r25 = VMM_VPD_BASE_OFFSET, r21; \
-   adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21;  /* entry point */\
+   nop 0x0;\
+   mov r24=ip; \
;;  \
+}  \
+{.mmb  \
+   add r24=0x20, r24;  \
ld8 r25 = [r25];  /* read vpd base */   \
-   ld8 r20 = [r20];\
-   ;;  \
-   add r20 = PAL_VPS_SYNC_READ,r20;\
-   ;;  \
-{ .mii;
\
-   nop 0x0;\
-   mov r24 = ip;   \
-   mov b0 = r20;   \
+   br.cond.sptk kvm_vps_sync_read; /*call the service*/\
;;  \
 }; \
-{ .mmb;
\
-   add r24 = 0x20, r24;\
-   nop 0x0;\
-   br.cond.sptk b0;/*  call the service */ \
-   ;;  \
-};
-
 
 
 #define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
index e4f15d6..f0bf0a8 100644
--- a/arch/ia64/kvm/optvfault.S
+++ b/arch/ia64/kvm/optvfault.S
@@ -20,6 +20,75 @@
 #define ACCE_MOV_TO_PSR
 #define ACCE_THASH
 
+ENTRY(kvm_vps_entry)
+   adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
+   ;;
+   ld8 r29 = [r29]
+   ;;
+   add r29 = r29, r30
+   ;;
+   mov b0 = r29
+   br.sptk.many b0
+END(kvm_vps_entry)
+
+/*
+ * Inputs:
+ * r24 : return address
+ * r25 : vpd
+ * r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_read)
+   movl r30 = PAL_VPS_SYNC_READ
+   ;;
+   br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_read)
+
+/*
+ * Inputs:
+ * r24 : return address
+ * r25 : vpd
+ * r29 : scratch
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_sync_write)
+   movl r30 = PAL_VPS_SYNC_WRITE
+   ;;
+   br.sptk.many kvm_vps_entry
+END(kvm_vps_sync_write)
+
+/*
+ * Inputs:
+ * r23 : pr
+ * r24 : guest b0
+ * r25 : vpd
+ *
+ */
+GLOBAL_ENTRY(kvm_vps_resume_normal)
+   movl r30 = PAL_VPS_RESUME_NORMAL
+   ;;
+   mov pr=r23,-2
+   br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_normal)
+
+/*
+ * Inputs:
+ * r23 : pr
+ * r24 : guest b0
+ * r25 : vpd
+ * r17 : isr
+ */
+GLOBAL_ENTRY(kvm_vps_resume_handler)
+   movl r30 = PAL_VPS_RESUME_HANDLER
+   ;;
+   ld8 r27=[r25]
+   shr r17=r17,IA64_ISR_IR_BIT
+   ;;
+   dep r27=r17,r27,63,1   // bit 63 of r27 indicate whether enable CFLE
+   mov pr=r23,-2
+   br.sptk.many kvm_vps_entry
+END(kvm_vps_resume_handler)
+
 //mov r1=ar3
 GLOBAL_ENTRY(kvm_asm_mov_from_ar)
 #ifndef ACCE_MOV_FROM_AR
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 5a33f7e..3417783 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -962,9 +962,9 @@ static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
 void vmm_transition(struct kvm_vcpu *vcpu)
 {
ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu-arch.vpd,
-   0, 0, 0, 0, 0, 0);
+   1, 0, 0, 0, 0, 0);
vmm_trampoline(vcpu-arch.guest, vcpu-arch.host);
ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu-arch.vpd,
-   0, 0, 0, 0, 0, 0);
+ 

[PATCH] kvm: libkvm: do not use mem_hole anymore.

2008-09-24 Thread Avi Kivity
From: Glauber Costa [EMAIL PROTECTED]

memory holes are totally evil. Right now they work for some basic tests,
but had never been stressed enough. Using memory holes leaves open questions 
like:

* what happens if a area being registered span two slots?
* what happens if there is already data in the slots?

also, the code behaves badly if the piece to be removed lies in the boundaries 
of the
current slot. Luckily, we don't really need it. Remove it, and make sure we 
never hit it.

Signed-off-by: Glauber Costa [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index a850caa..c261053 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -439,74 +439,9 @@ int kvm_is_allocated_mem(kvm_context_t kvm, unsigned long 
phys_start,
return 0;
 }
 
-int kvm_create_mem_hole(kvm_context_t kvm, unsigned long phys_start,
-   unsigned long len)
-{
-   int slot;
-   int r;
-   struct kvm_userspace_memory_region rmslot;
-   struct kvm_userspace_memory_region newslot1;
-   struct kvm_userspace_memory_region newslot2;
-
-   len = (len + PAGE_SIZE - 1)  PAGE_MASK;
-
-   slot = get_intersecting_slot(phys_start);
-   /* no need to create hole, as there is already hole */
-   if (slot == -1)
-   return 0;
-
-   memset(rmslot, 0, sizeof(struct kvm_userspace_memory_region));
-   memset(newslot1, 0, sizeof(struct kvm_userspace_memory_region));
-   memset(newslot2, 0, sizeof(struct kvm_userspace_memory_region));
-
-   rmslot.guest_phys_addr = slots[slot].phys_addr;
-   rmslot.slot = slot;
-
-   newslot1.guest_phys_addr = slots[slot].phys_addr;
-   newslot1.memory_size = phys_start - slots[slot].phys_addr;
-   newslot1.slot = slot;
-   newslot1.userspace_addr = slots[slot].userspace_addr;
-   newslot1.flags = slots[slot].flags;
-
-   newslot2.guest_phys_addr = newslot1.guest_phys_addr +
-  newslot1.memory_size + len;
-   newslot2.memory_size = slots[slot].phys_addr +
-  slots[slot].len - newslot2.guest_phys_addr;
-   newslot2.userspace_addr = newslot1.userspace_addr +
- newslot1.memory_size;
-   newslot2.slot = get_free_slot(kvm);
-   newslot2.flags = newslot1.flags;
-
-   r = ioctl(kvm-vm_fd, KVM_SET_USER_MEMORY_REGION, rmslot);
-   if (r == -1) {
-   fprintf(stderr, kvm_create_mem_hole: %s\n, strerror(errno));
-   return -1;
-   }
-   free_slot(slot);
-
-   r = ioctl(kvm-vm_fd, KVM_SET_USER_MEMORY_REGION, newslot1);
-   if (r == -1) {
-   fprintf(stderr, kvm_create_mem_hole: %s\n, strerror(errno));
-   return -1;
-   }
-   register_slot(newslot1.slot, newslot1.guest_phys_addr,
- newslot1.memory_size, newslot1.userspace_addr,
- newslot1.flags);
-
-   r = ioctl(kvm-vm_fd, KVM_SET_USER_MEMORY_REGION, newslot2);
-   if (r == -1) {
-   fprintf(stderr, kvm_create_mem_hole: %s\n, strerror(errno));
-   return -1;
-   }
-   register_slot(newslot2.slot, newslot2.guest_phys_addr,
- newslot2.memory_size, newslot2.userspace_addr,
- newslot2.flags);
-   return 0;
-}
-
 int kvm_register_phys_mem(kvm_context_t kvm,
-   unsigned long phys_start, void *userspace_addr,
-   unsigned long len, int log)
+ unsigned long phys_start, void *userspace_addr,
+ unsigned long len, int log)
 {
 
struct kvm_userspace_memory_region memory = {
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index 79dd769..77fd903 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -457,8 +457,6 @@ void kvm_destroy_phys_mem(kvm_context_t, unsigned long 
phys_start,
 int kvm_is_intersecting_mem(kvm_context_t kvm, unsigned long phys_start);
 int kvm_is_allocated_mem(kvm_context_t kvm, unsigned long phys_start,
 unsigned long len);
-int kvm_create_mem_hole(kvm_context_t kvm, unsigned long phys_start,
-   unsigned long len);
 int kvm_register_phys_mem(kvm_context_t kvm,
unsigned long phys_start, void *userspace_addr,
unsigned long len, int log);
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 00840df..3663d38 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -773,12 +773,13 @@ void kvm_cpu_register_physical_memory(target_phys_addr_t 
start_addr,
 r = kvm_is_allocated_mem(kvm_context, start_addr, size);
 if (r)
 return;
-r = kvm_is_intersecting_mem(kvm_context, start_addr);
-if (r)
-kvm_create_mem_hole(kvm_context, start_addr, size);
-r = kvm_register_phys_mem(kvm_context, start_addr,
-  phys_ram_base + phys_offset,
-  

[PATCH] kvm: qemu: register mmio slots

2008-09-24 Thread Avi Kivity
From: Glauber Costa [EMAIL PROTECTED]

By analysing phys_offset, we know whether a region is an mmio region
or not. If it is, we don't want to have kvm caring about it, so just
return.

Signed-off-by: Glauber Costa [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 1da253a..cfdf90f 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -780,6 +780,10 @@ void kvm_cpu_register_physical_memory(target_phys_addr_t 
start_addr,
 r = kvm_is_containing_region(kvm_context, start_addr, size);
 if (r)
 return;
+
+if (area_flags = TLB_MMIO)
+return;
+
 r = kvm_register_phys_mem(kvm_context, start_addr,
   phys_ram_base + phys_offset,
   size, 0);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: unregister memory area depending on their flags

2008-09-24 Thread Avi Kivity
From: Glauber Costa [EMAIL PROTECTED]

Signed-off-by: Glauber Costa [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index f7a7fdd..88d3f5d 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -508,6 +508,18 @@ void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long 
phys_start,
free_slot(memory.slot);
 }
 
+void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, 
unsigned long size)
+{
+
+   int slot = get_container_slot(phys_addr, size);
+
+   if (slot != -1) {
+   DPRINTF(Unregistering memory region %llx (%lx)\n, phys_addr, 
size);
+   kvm_destroy_phys_mem(kvm, phys_addr, size);
+   return;
+   }
+}
+
 static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf)
 {
int r;
diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
index cb77c6c..14ea93b 100644
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -454,6 +454,9 @@ void *kvm_create_phys_mem(kvm_context_t, unsigned long 
phys_start,
  unsigned long len, int log, int writable);
 void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
  unsigned long len);
+void kvm_unregister_memory_area(kvm_context_t, uint64_t phys_start,
+unsigned long len);
+
 int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_start, 
unsigned long size);
 int kvm_register_phys_mem(kvm_context_t kvm,
unsigned long phys_start, void *userspace_addr,
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 07cffef..1da253a 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -768,8 +768,15 @@ void kvm_cpu_register_physical_memory(target_phys_addr_t 
start_addr,
   unsigned long phys_offset)
 {
 int r = 0;
+unsigned long area_flags = phys_offset  ~TARGET_PAGE_MASK;
 
 phys_offset = ~IO_MEM_ROM;
+
+if (area_flags == IO_MEM_UNASSIGNED) {
+kvm_unregister_memory_area(kvm_context, start_addr, size);
+return;
+}
+
 r = kvm_is_containing_region(kvm_context, start_addr, size);
 if (r)
 return;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Remaining passthrough/VT-d tasks list

2008-09-24 Thread Han, Weidong
Hi all,

The initial passthrough/VT-d patches have been in kvm, it's time to
enhance it, and push them into 2.6.28.

Following is the remaining passthrough/VT-d tasks list:

- Multiple devices assignment (WIP)
- MSI support (WIP)
- MTRR/PAT support of EPT (WIP)
- MTRR/PAT support of shadow (WIP)
- Basic FLR support (WIP)
(Above tasks are working in process, some patches have been sent out,
others will be sent out in near future)
- architecture independent (such as x86, IPF)
- Shared Interrupt support
- Add dummy driver to hide/unbind passthrough device from host
kernel

If I omit some good features or you have some good proposals, please
feel free to add them to this list.
If you are interest in any tasks, please reply the mail directly and let
other guys to know your progress. Appreciate any effort from you!


Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Yang, Sheng
On Wednesday 24 September 2008 14:15:15 Han, Weidong wrote:
 Hi all,

 The initial passthrough/VT-d patches have been in kvm, it's time to enhance
 it, and push them into 2.6.28.


Some supplements:

 Following is the remaining passthrough/VT-d tasks list:

 - Multiple devices assignment (WIP)

Weidong is working on this.

 - MSI support (WIP)
 - MTRR/PAT support of EPT (WIP)
 - MTRR/PAT support of shadow (WIP)
 - Basic FLR support (WIP)

Above four are my works. All of them work now. But more job should be done to 
polish the patches. And the main part of Function Level Reset would be picked 
by linux-pci. 

Another thing is we would send out/update above patches before Sept. 28, and 
hope they can picked by 2.6.28 merge window.

Avi, what's your opinion? Of course we would work hard. :) But what's the 
deadline of merge window? 

 (Above tasks are working in process, some patches have been sent out,
 others will be sent out in near future) - architecture independent (such as
 x86, IPF)
 - Shared Interrupt support

I still don't know who would do this. It's very important for VT-d real 
usable. If nobody interested in it, I would pick it up, but after Oct. 6
(after National Holiday in China).

--
regards
Yang, Sheng

 - Add dummy driver to hide/unbind passthrough device from host
 kernel

 If I omit some good features or you have some good proposals, please feel
 free to add them to this list. If you are interest in any tasks, please
 reply the mail directly and let other guys to know your progress.
 Appreciate any effort from you!


 Randy (Weidong)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Amit Shah
* On Wednesday 24 Sep 2008 13:21:25 Han, Weidong wrote:
 Amit Shah wrote:
 
 - Add dummy driver to hide/unbind passthrough device from
 host kernel

  This isn't needed; we currently don't assign the device to the guest
  if we find that a driver is already loaded. I intend to change it to
  failing guest start altogether in case we find a module already using
  a device. When a guest exits, we release all the structures and hence
  even unloading kvm is not needed to reclaim the device on the host
  side.

 This task needn't targe 2.6.28. For long term, we need it to make device
 assignment more user friendly.

How is the current scheme not user friendly? Or, how will adding a dummy 
driver be more user friendly?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

Hi all,

The initial passthrough/VT-d patches have been in kvm, it's time to
enhance it, and push them into 2.6.28.

- Shared Interrupt support
  


Shared guest interrupts is a prerequisite for merging into mainline.  
Without this, device assignment is useless in anything but a benchmark 
scenario.  I won't push device assignment for 2.6.28 without it.


Shared host interrupts are a different matter; which one did you mean?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Han, Weidong
Amit Shah wrote:
 * On Wednesday 24 Sep 2008 13:21:25 Han, Weidong wrote:
 Amit Shah wrote:
 
 - Add dummy driver to hide/unbind passthrough device from
 host kernel
 
 This isn't needed; we currently don't assign the device to the guest
 if we find that a driver is already loaded. I intend to change it to
 failing guest start altogether in case we find a module already
 using a device. When a guest exits, we release all the structures
 and hence even unloading kvm is not needed to reclaim the device on
 the host side.
 
 This task needn't targe 2.6.28. For long term, we need it to make
 device assignment more user friendly.
 
 How is the current scheme not user friendly? Or, how will adding a
 dummy driver be more user friendly?

We had some discussion on this few months ago. Currently, users need to
remove device driver before assignment. If there are more than one same
type devices, removing driver makes them cannot work at the same time,
even though user just want to assign one of them to guest. Note that not
all drivers support unbind function. If we can provide a mechanism to
hide single device independently, e.g, implement a dummy driver to own
devices that user want to assign to guest. I think it's more friendly to
end user than remove/unbind driver manually.

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Yang, Sheng wrote:



- MSI support (WIP)
- MTRR/PAT support of EPT (WIP)
- MTRR/PAT support of shadow (WIP)
- Basic FLR support (WIP)



Above four are my works. All of them work now. But more job should be done to 
polish the patches. And the main part of Function Level Reset would be picked 
by linux-pci. 

Another thing is we would send out/update above patches before Sept. 28, and 
hope they can picked by 2.6.28 merge window.


Avi, what's your opinion? Of course we would work hard. :) But what's the 
deadline of merge window? 

  


No one knows, but it's very unlikely these features will make it for 
2.6.28.  To be merged, it is not sufficient for the patches to be 
ready.  They have to undergo some testing in the field.




- Shared Interrupt support



I still don't know who would do this. It's very important for VT-d real 
usable. If nobody interested in it, I would pick it up, but after Oct. 6

(after National Holiday in China).
  


Shared host interrupts?  What's your plan here?  The polarity trick?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

- Add dummy driver to hide/unbind passthrough device from host
kernel
  



Maybe this can be implemented at the modprobe/hotplug level.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Yang, Sheng
On Wednesday 24 September 2008 16:34:22 Avi Kivity wrote:
 Han, Weidong wrote:
  Hi all,
 
  The initial passthrough/VT-d patches have been in kvm, it's time to
  enhance it, and push them into 2.6.28.
 
- Shared Interrupt support

 Shared guest interrupts is a prerequisite for merging into mainline.
 Without this, device assignment is useless in anything but a benchmark
 scenario.  I won't push device assignment for 2.6.28 without it.

 Shared host interrupts are a different matter; which one did you mean?


Got confused...

I think we are talking about share host interrupts, that is pre-assigned 
device shared IRQ with other devices. 

Why share guest interrupts is a prerequisite... 

--
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Yang, Sheng
On Wednesday 24 September 2008 16:38:35 Avi Kivity wrote:
 Yang, Sheng wrote:
  - MSI support (WIP)
  - MTRR/PAT support of EPT (WIP)
  - MTRR/PAT support of shadow (WIP)
  - Basic FLR support (WIP)
 
  Above four are my works. All of them work now. But more job should be
  done to polish the patches. And the main part of Function Level Reset
  would be picked by linux-pci.
 
  Another thing is we would send out/update above patches before Sept. 28,
  and hope they can picked by 2.6.28 merge window.
 
  Avi, what's your opinion? Of course we would work hard. :) But what's the
  deadline of merge window?

 No one knows, but it's very unlikely these features will make it for
 2.6.28.  To be merged, it is not sufficient for the patches to be
 ready.  They have to undergo some testing in the field.

..

  - Shared Interrupt support
 
  I still don't know who would do this. It's very important for VT-d real
  usable. If nobody interested in it, I would pick it up, but after Oct. 6
  (after National Holiday in China).

 Shared host interrupts?  What's your plan here?  The polarity trick?

Yeah, share host interrupts. But haven't got the very clear idea yet. 

-- 
regards
Yang, Sheng


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Amit Shah wrote:

I'd say we have about 3 weeks to get things in.

  


How do you figure? 2.6.26 was released July 13, we're more than 2.5 
months later.


Furthermore, I'm not queueing untested patches for 2.6.28 at this time.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Han, Weidong
Avi Kivity wrote:
 Han, Weidong wrote:
  - Add dummy driver to hide/unbind passthrough device from host
 kernel 
 
 
 
 Maybe this can be implemented at the modprobe/hotplug level.

I think so.

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

We had some discussion on this few months ago. Currently, users need to
remove device driver before assignment. If there are more than one same
type devices, removing driver makes them cannot work at the same time,
even though user just want to assign one of them to guest. Note that not
all drivers support unbind function. If we can provide a mechanism to
hide single device independently, e.g, implement a dummy driver to own
devices that user want to assign to guest. I think it's more friendly to
end user than remove/unbind driver manually.
  


That's a good point -- multiple devices with the same driver.

We may need a kernel parameter as well, for built-in drivers.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Yang, Sheng wrote:

Shared guest interrupts is a prerequisite for merging into mainline.
Without this, device assignment is useless in anything but a benchmark
scenario.  I won't push device assignment for 2.6.28 without it.

Shared host interrupts are a different matter; which one did you mean?




Got confused...

I think we are talking about share host interrupts, that is pre-assigned 
device shared IRQ with other devices. 

Why share guest interrupts is a prerequisite... 
  


We only have three pci interrupts at this point (though this could be 
easily extended); if you start the guest with a non-trivial number of 
devices, you will have shared guest interrupts.


(of course, when I pointed this out during review, people said it could 
be done later, then forgot all about it)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] VT-d: remove useless header inclusion

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

Currently #include linux/intel-iommu.h is not needed in
virt/kvm/kvm_main.c. What's more, this inclusion may result in
compilation error in other architecture.

  


Applied, but please also fix intel-iommu.h to compile on all archs.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Implement an fd pool to get real AIO with posix-aio

2008-09-24 Thread Avi Kivity

Anthony Liguori wrote:

This patch implements a simple fd pool to allow many AIO requests with
posix-aio.  The result is significantly improved performance (identical to that
reported for linux-aio) for both cache=on and cache=off.

The fundamental problem with posix-aio is that it limits itself to one thread
per-file descriptor.  I don't know why this is, but this patch provides a simple
mechanism to work around this (duplicating the file descriptor).

This isn't a great solution, but it seems like a reasonable intermediate step
between posix-aio and a custom thread-pool to replace it.

 
+static int raw_fd_pool_get(BDRVRawState *s)

+{
+int i;
+
+for (i = 0; i  RAW_FD_POOL_SIZE; i++) {
+/* already in use */
+if (s-fd_pool[i] != -1)
+continue;
+
+/* try to dup file descriptor */
+s-fd_pool[i] = dup(s-fd);
+if (s-fd_pool[i] != -1)
+return s-fd_pool[i];
+}
+
+/* we couldn't dup the file descriptor so just use the main one */
+return s-fd;
+}
+
  


dup()ing the fd on each request is unnecessary work; would be better to 
cache the duped fd.


Of course, if this is just a stepping stone, it doesn't matter very much.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Yang, Sheng
On Wednesday 24 September 2008 16:53:15 Avi Kivity wrote:
 Yang, Sheng wrote:
  Shared guest interrupts is a prerequisite for merging into mainline.
  Without this, device assignment is useless in anything but a benchmark
  scenario.  I won't push device assignment for 2.6.28 without it.
 
  Shared host interrupts are a different matter; which one did you mean?
 
  Got confused...
 
  I think we are talking about share host interrupts, that is pre-assigned
  device shared IRQ with other devices.
 
  Why share guest interrupts is a prerequisite...

 We only have three pci interrupts at this point (though this could be
 easily extended); if you start the guest with a non-trivial number of
 devices, you will have shared guest interrupts.

 (of course, when I pointed this out during review, people said it could
 be done later, then forgot all about it)

.. 

I think it's a performance issue, not break it? How about do it like Xen side? 
Try best to avoid the share, extended the pci interrupts, improve hash 
algorithm. Is there anything else we can do?

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] VT-d: remove useless header inclusion

2008-09-24 Thread Han, Weidong
Avi Kivity wrote:
 Han, Weidong wrote:
 Currently #include linux/intel-iommu.h is not needed in
 virt/kvm/kvm_main.c. What's more, this inclusion may result in
 compilation error in other architecture.
 
 
 
 Applied, but please also fix intel-iommu.h to compile on all archs.

Avi,

Current intel-iommu.h should be compiled on all archs. On linux-next,
they moved __iommu_clflush_cache() definition to intel-iomm.h, which
results in it cannot pass compilation on some archs, such as IA64.

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

Avi Kivity wrote:
  

Han, Weidong wrote:


- Add dummy driver to hide/unbind passthrough device from host
kernel 

  

Maybe this can be implemented at the modprobe/hotplug level.



I think so.
  


I'm not sure now -- after I saw the point about a driver binding to two 
devices.


Perhaps the deeper fix is to separate driver loading from binding to 
devices (or maybe it's separated already, but not exposed)?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] VT-d: remove useless header inclusion

2008-09-24 Thread Avi Kivity

Han, Weidong wrote:

Avi Kivity wrote:
  

Han, Weidong wrote:


Currently #include linux/intel-iommu.h is not needed in
virt/kvm/kvm_main.c. What's more, this inclusion may result in
compilation error in other architecture.


  

Applied, but please also fix intel-iommu.h to compile on all archs.



Avi,

Current intel-iommu.h should be compiled on all archs. On linux-next,
they moved __iommu_clflush_cache() definition to intel-iomm.h, which
results in it cannot pass compilation on some archs, such as IA64.
  


Well, it still wants fixing, even if it is in linux-next only.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch] cap code_gen_buffer_size on ia64

2008-09-24 Thread Jes Sorensen

Hi,

This one limits the code_gen_buffer_size on ia64, phys_mem_size/4
really gets out of hand when you boot say a 64GB guest.

Cheers,
Jes

Cap code_gen_buffer_size on ia64 - it quickly goes out of hand otherwise
when booting larger guests.

Signed-off-by: Jes Sorensen [EMAIL PROTECTED]

---
 qemu/exec.c |4 
 1 file changed, 4 insertions(+)

Index: kvm-userspace.git/qemu/exec.c
===
--- kvm-userspace.git.orig/qemu/exec.c
+++ kvm-userspace.git/qemu/exec.c
@@ -443,6 +443,10 @@
 start = (void *) 0x6000UL;
 if (code_gen_buffer_size  (512 * 1024 * 1024))
 code_gen_buffer_size = (512 * 1024 * 1024);
+#elif defined(__ia64__)
+   /* cap the mapping, don't want it totally out of hand */
+if (code_gen_buffer_size  (512 * 1024 * 1024))
+code_gen_buffer_size = (512 * 1024 * 1024);
 #endif
 code_gen_buffer = mmap(start, code_gen_buffer_size,
PROT_WRITE | PROT_READ | PROT_EXEC,


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Yang, Sheng wrote:

We only have three pci interrupts at this point (though this could be
easily extended); if you start the guest with a non-trivial number of
devices, you will have shared guest interrupts.

(of course, when I pointed this out during review, people said it could
be done later, then forgot all about it)


. 

I think it's a performance issue, not break it? How about do it like Xen side? 
Try best to avoid the share, extended the pci interrupts, improve hash 
algorithm. Is there anything else we can do?
  



Two separate issues:

1. only three guest pci interrupts

That's a performance issue, not correctness.  can be fixed by using gsi 
16-23 in APIC mode, and by adding another IOAPIC (so we can use gsi 
16-47).  Anthony Xu posted some patches for this, not sure where this 
stands, but it was the right approach.


2. shared guest pci interrupts

That's a correctness issue.  No matter how many interrupts we have, we 
may have sharing issues.  Of course with only three the issue is very 
pressing since we will get sharing with just a few devices.  Currently 
if two assigned devices share a guest interrupts, or if an emulated 
device shares an interrupt with an assigned device, things will break.


They need to be fixed independently.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] remove kvm_init_ap from qemu code.

2008-09-24 Thread Avi Kivity

Glauber Costa wrote:

Call it as a special case for cpu 0 creation. This removes
a piece of kvm code from raw qemu.
 
+void kvm_init_new_ap(int cpu, CPUState *env)

+{
+if (!cpu)
+kvm_init_ap();
+
+pthread_create(vcpu_info[cpu].thread, NULL, ap_main_loop, env);
+
+while (vcpu_info[cpu].created == 0)
+   qemu_cond_wait(qemu_vcpu_cond);
+}
+
  


kvm_init_ap() is machine-level initialization.  It's hacky to call it 
from cpu-level initialization.


Do we have a machine-level initialization hook?


[btw, !x makes sense when x is a boolean, pointer, or count.  Then it 
means there is no x or there are no xs.  But when x is an index, !x 
means x equals 0, so you may as well write that.


and don't get me started on !strcmp()]

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Yang, Sheng
On Wednesday 24 September 2008 17:22:53 Avi Kivity wrote:
 Yang, Sheng wrote:
  We only have three pci interrupts at this point (though this could be
  easily extended); if you start the guest with a non-trivial number of
  devices, you will have shared guest interrupts.
 
  (of course, when I pointed this out during review, people said it could
  be done later, then forgot all about it)
 
  .
 
  I think it's a performance issue, not break it? How about do it like Xen
  side? Try best to avoid the share, extended the pci interrupts, improve
  hash algorithm. Is there anything else we can do?

 Two separate issues:

 1. only three guest pci interrupts

 That's a performance issue, not correctness.  can be fixed by using gsi
 16-23 in APIC mode, and by adding another IOAPIC (so we can use gsi
 16-47).  Anthony Xu posted some patches for this, not sure where this
 stands, but it was the right approach.

 2. shared guest pci interrupts

 That's a correctness issue.  No matter how many interrupts we have, we
 may have sharing issues.  Of course with only three the issue is very
 pressing since we will get sharing with just a few devices.  Currently
 if two assigned devices share a guest interrupts, or if an emulated
 device shares an interrupt with an assigned device, things will break.

 They need to be fixed independently.

About the second issue, I don't understand how it would break... Would you 
please give more details on this? It's a QEmu bug or IOAPIC bug?

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Amit Shah
* On Wednesday 24 Sep 2008 14:08:14 Han, Weidong wrote:
 Amit Shah wrote:
  * On Wednesday 24 Sep 2008 13:21:25 Han, Weidong wrote:
  Amit Shah wrote:
  - Add dummy driver to hide/unbind passthrough device from
  host kernel
 
  This isn't needed; we currently don't assign the device to the guest
  if we find that a driver is already loaded. I intend to change it to
  failing guest start altogether in case we find a module already
  using a device. When a guest exits, we release all the structures
  and hence even unloading kvm is not needed to reclaim the device on
  the host side.
 
  This task needn't targe 2.6.28. For long term, we need it to make
  device assignment more user friendly.
 
  How is the current scheme not user friendly? Or, how will adding a
  dummy driver be more user friendly?

 We had some discussion on this few months ago. Currently, users need to
 remove device driver before assignment. If there are more than one same
 type devices, removing driver makes them cannot work at the same time,
 even though user just want to assign one of them to guest. Note that not
 all drivers support unbind function. If we can provide a mechanism to
 hide single device independently, e.g, implement a dummy driver to own
 devices that user want to assign to guest. I think it's more friendly to
 end user than remove/unbind driver manually.

This needs a change in the driver core and it definitely won't be solved by 
having a dummy device. We have to have a way to signal to modules that a 
particular device will now be owned by a different module, even if the 
current module thinks it is the sole owner.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Amit Shah
* On Wednesday 24 Sep 2008 14:16:47 Avi Kivity wrote:
 Amit Shah wrote:
  I'd say we have about 3 weeks to get things in.

 How do you figure? 2.6.26 was released July 13, we're more than 2.5
 months later.

A week for 2.6.28 to open and two weeks for the rc1 window.

 Furthermore, I'm not queueing untested patches for 2.6.28 at this time.

Of course, I'm not advocating this! If they're tested by Intel, we can push 
them in.

Amit
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source

2008-09-24 Thread Yang, Sheng
On Tuesday 23 September 2008 23:04:48 Jan Kiszka wrote:
 Yang, Sheng wrote:
  On Friday 19 September 2008 20:03:02 Jan Kiszka wrote:
  LINT0 of the LAPIC can be used to route PIT events as NMI watchdog
  ticks into the guest. This patch aligns the in-kernel irqchip emulation
  with the user space irqchip with already supports this feature. The
  trick is to route PIT interrupts to all LAPIC's LVT0 lines.
 
  Rebased patch and slightly polished patch originally posted by Sheng
  Yang.
 
  Signed-off-by: Sheng Yang [EMAIL PROTECTED]
 
  Thanks for pick up this patch again!
 
  Have you test some Windows guest with this watchdog? Last time I dropped
  it because it cause BSOD on some version of
  Windows(IRQ_NOT_EQUAL_OR_LESS). I don't remember the exactly situation
  there, but you may have a try.

 Not yet. I always tell my colleagues that I don't need Windows on my
 desktop, I just need a few VM images - for testing... :)

 I will try to dig out / generate some image and reproduce the issue you
 and Gleb see. Hope it will trigger here as well. Anything special
 required to make Windows use the NMI as watchdog?

I don't know if Windows use NMI watchdog. In fact, my original patch just 
cause Windows BSOD, and I think Windows don't use it(Linux NMI watchdog 
mechanism is a little tricky one)...

--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Avi Kivity

Amit Shah wrote:

* On Wednesday 24 Sep 2008 14:16:47 Avi Kivity wrote:
  

Amit Shah wrote:


I'd say we have about 3 weeks to get things in.
  

How do you figure? 2.6.26 was released July 13, we're more than 2.5
months later.



A week for 2.6.28 to open and two weeks for the rc1 window.

  

Furthermore, I'm not queueing untested patches for 2.6.28 at this time.



Of course, I'm not advocating this! If they're tested by Intel, we can push 
them in.
  


No, the patches have to be in my tree some time before the merge window 
opens.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] cap code_gen_buffer_size on ia64

2008-09-24 Thread Avi Kivity

Jes Sorensen wrote:

Hi,

This one limits the code_gen_buffer_size on ia64, phys_mem_size/4
really gets out of hand when you boot say a 64GB guest.



ia64 doesn't codegen; why not set it to zero?

(and the phys_ram_size / 4 heuristic is ridiculous; code size doesn't 
scale with guest size)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] cap code_gen_buffer_size on ia64

2008-09-24 Thread Jes Sorensen

Avi Kivity wrote:

ia64 doesn't codegen; why not set it to zero?

(and the phys_ram_size / 4 heuristic is ridiculous; code size doesn't 
scale with guest size)


That works too - I didn't really know this part too well, but I hit
the problem that I was unable to allocate the space because of 64 bit
issues.

I'll whip up a patch to disable it for ia64.

Cheers,
Jes



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5] bios: 4G updates

2008-09-24 Thread Avi Kivity

Alex Williamson wrote:

As requested, here's the follow-on to the 4G MTRR changes split into
functional bits.

[1/5] Rename variables to reflect what they're really reporting
[2/5] Reformat ram_probe() to match the rest of the code
[3/5] Add SMBIOS info for memory above 4G
[4/5] Fix the SMBIOS type 19  20 range end address
[5/5] Optional - switch default MTRR type to WB and only cover MMIO

I've taken some liberties renaming and reformatting, if we'd rather not
introduce too many extraneous changes, I can drop those.  The SMBIOS
changes seem to work up to 32767MB, then we hit a limitation in the type
17 table only providing 15bits for the size.  We might need to describe
multiple virtual DIMMs to get around that, but it's a separate issue.

The final patch is optional and switches over to make the variable MTRRs
only describe the MMIO hole, leaving the rest of the address space
default to WB.  I can't say I fully understand the implications of
hotplug memory for this scenario.  Let me know if there are comments.
  


The patches all look good, however renaming and reformatting will lead 
to merge headaches later on.  We haven't been good at working with bochs 
bios upstream.


Can you peek in bochs upstream and see if it's worth merging?  If not, 
I'll just merge these patches.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/9] coalesce mmio regions with an explicit call

2008-09-24 Thread Avi Kivity

Glauber Costa wrote:
You can't coalesce the registers which trigger device action.  You'll  
destroy latency and/or functionality.



which kills the goal of getting rid of explicit kvm code.

  


It's a fact that coalescing helps kvm but not qemu.


So maybe the solution here is to add calls in qemu to a memory
coalescing function that in the raw qemu / kqemu case just don't
do anything?
  


That's just word games.  s/kvm/qemu/ won't change the fact that this is 
a kvm specific hook.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Jes Sorensen

Avi Kivity wrote:

Jes Sorensen wrote:

Hi,

This one limits the code_gen_buffer_size on ia64, phys_mem_size/4
really gets out of hand when you boot say a 64GB guest.


ia64 doesn't codegen; why not set it to zero?


How about this one then?

Jes
Do not allocate a code_gen buffer on ia64 given it doesn't support
code generation.

Signed-off-by: Jes Sorensen [EMAIL PROTECTED]

---
 qemu/exec.c |4 
 1 file changed, 4 insertions(+)

Index: kvm-userspace.git/qemu/exec.c
===
--- kvm-userspace.git.orig/qemu/exec.c
+++ kvm-userspace.git/qemu/exec.c
@@ -407,6 +407,10 @@
 
 static void code_gen_alloc(unsigned long tb_size)
 {
+#ifdef TARGET_IA64
+   return;
+#endif
+
 #ifdef USE_STATIC_CODE_GEN_BUFFER
 code_gen_buffer = static_code_gen_buffer;
 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;


Re: [patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Avi Kivity

Jes Sorensen wrote:

Avi Kivity wrote:

Jes Sorensen wrote:

Hi,

This one limits the code_gen_buffer_size on ia64, phys_mem_size/4
really gets out of hand when you boot say a 64GB guest.


ia64 doesn't codegen; why not set it to zero?


How about this one then?



Applied, thanks.  Note qemu uses 4 spaces for intedenation.  Talk to 
your editor.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Jes Sorensen

Avi Kivity wrote:
Applied, thanks.  Note qemu uses 4 spaces for intedenation.  Talk to 
your editor.


Even when it's a double indentation, ie 8 spaces?

Thats just plain sicko :-(

Jes

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: PIC: enhance IPI avoidance

2008-09-24 Thread Avi Kivity

Marcelo Tosatti wrote:

KVM: PIC: enhance IPI avoidance

The PIC code makes little effort to avoid kvm_vcpu_kick(), resulting in
unnecessary guest exits in some conditions.

For example, if the timer interrupt is routed through the IOAPIC, IRR
for IRQ 0 will get set but not cleared, since the APIC is handling the
acks.

This means that everytime an interrupt  16 is triggered, the priority
logic will find IRQ0 pending and send an IPI to vcpu0 (in case IRQ0 is
not masked, which is Linux's case).

Introduce a new variable isr_ack to represent the IRQ's for which the
guest has been signalled / cleared the ISR. Use it to avoid more than
one IPI per trigger-ack cycle, in addition to the avoidance when ISR is
set in get_priority().

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]

 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
Index: kvm/arch/x86/kvm/irq.h
===
--- kvm.orig/arch/x86/kvm/irq.h
+++ kvm/arch/x86/kvm/irq.h
@@ -42,6 +42,7 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
+   u8 isr_ack; /* interrupt ack detection */
u8 priority_add;/* highest irq priority */
u8 irq_base;
u8 read_reg_select;
  


Needs to be cleared by reset and by register load from userspace, no?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Han, Weidong
Amit Shah wrote:
 * On Wednesday 24 Sep 2008 14:08:14 Han, Weidong wrote:
 Amit Shah wrote:
 * On Wednesday 24 Sep 2008 13:21:25 Han, Weidong wrote:
 Amit Shah wrote:
 - Add dummy driver to hide/unbind passthrough device from
 host kernel
 
 This isn't needed; we currently don't assign the device to the
 guest if we find that a driver is already loaded. I intend to
 change it to failing guest start altogether in case we find a
 module already using a device. When a guest exits, we release all
 the structures and hence even unloading kvm is not needed to
 reclaim the device on the host side.
 
 This task needn't targe 2.6.28. For long term, we need it to make
 device assignment more user friendly.
 
 How is the current scheme not user friendly? Or, how will adding a
 dummy driver be more user friendly?
 
 We had some discussion on this few months ago. Currently, users need
 to remove device driver before assignment. If there are more than
 one same type devices, removing driver makes them cannot work at the
 same time, even though user just want to assign one of them to
 guest. Note that not all drivers support unbind function. If we can
 provide a mechanism to hide single device independently, e.g,
 implement a dummy driver to own devices that user want to assign to
 guest. I think it's more friendly to end user than remove/unbind
 driver manually. 
 
 This needs a change in the driver core and it definitely won't be
 solved by having a dummy device. We have to have a way to signal to
 modules that a particular device will now be owned by a different
 module, even if the current module thinks it is the sole owner.

The assigned devices are only owned by the dummy driver. Like Xen,
pciback owns the assignable devices via adding option
'pciback.hide=(bus:dev:func)' in grub, that means device(bus:dev:func)
driver won't be loaded. Then user can assign these hidden devices. 

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Yang, Sheng wrote:
 On Tuesday 23 September 2008 17:45:44 Gleb Natapov wrote:
 On Tue, Sep 23, 2008 at 05:42:02PM +0800, Yang, Sheng wrote:
 That is exactly what I am using. Run it with SMP hal and do
 hibernate.
 Oh... Finally found how to enable that hibernate option

 And this hibernate works on my virtual_nmi supported box, with smp hal
 and 2 cpus.
 However, for this hibernate won't success if there is no NMI support,
 maybe we can say it's not a regression...
 I am not saying it's a regression, but it would be nice to have it
 working :)

 Yeah, of course. :)

OK, I've a 2003 server up and running now, I'm able to reproduce

kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9

but not via hibernate (it suspends and powers off normally, but then
hangs after resume), rather by manually injecting an NMI on CPU0.


After Windows' graphical installation phase I had a hanging guest. At
the same time I got

kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9
kvm_handle_exit: Breaking out of NMI-blocked state on VCPU 0 after 1 s
timeout

in the kernel log as well. Something is borken. Will retest the
installation with vanilla KVM. Anyone any ideas on the task switch
thing? Just a false positive or an indication for the real problem in
that domain?

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Avi Kivity

Jan Kiszka wrote:

After Windows' graphical installation phase I had a hanging guest. At
the same time I got

kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9
kvm_handle_exit: Breaking out of NMI-blocked state on VCPU 0 after 1 s
timeout

in the kernel log as well. Something is borken. Will retest the
installation with vanilla KVM. Anyone any ideas on the task switch
thing? Just a false positive or an indication for the real problem in
that domain?
  


Check the descriptor type for the NMI vector, that should show if it's 
real or not.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Guest crash with 2.6.27-rc6 (a different one)

2008-09-24 Thread Avi Kivity

Dan Smith wrote:

Hi,

After hitting the pvclock-related issue, I recompiled my 2.6.27-rc6
kernel without CONFIG_KVM_CLOCK.  It stays up far longer, but I see
the following guest crash when I stress it (with a source build):

  


Are you using 4K stacks?  If so, please try with 8K stacks and report.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 02:40:18PM +0200, Jan Kiszka wrote:
 Yang, Sheng wrote:
  On Tuesday 23 September 2008 17:45:44 Gleb Natapov wrote:
  On Tue, Sep 23, 2008 at 05:42:02PM +0800, Yang, Sheng wrote:
  That is exactly what I am using. Run it with SMP hal and do
  hibernate.
  Oh... Finally found how to enable that hibernate option
 
  And this hibernate works on my virtual_nmi supported box, with smp hal
  and 2 cpus.
  However, for this hibernate won't success if there is no NMI support,
  maybe we can say it's not a regression...
  I am not saying it's a regression, but it would be nice to have it
  working :)
 
  Yeah, of course. :)
 
 OK, I've a 2003 server up and running now, I'm able to reproduce
 
 kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9
 
 but not via hibernate (it suspends and powers off normally, but then
 hangs after resume), rather by manually injecting an NMI on CPU0.
 
I found out today that on regular windows 2003 the problem does not
exist (on hibernate at least). The image I have was used to run WLK
tests (windows logo kit) and this kit changes something in windows
kernel to do additional stuff after hibernation and that is where we
crash.

 
 After Windows' graphical installation phase I had a hanging guest. At
 the same time I got
 
 kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9
 kvm_handle_exit: Breaking out of NMI-blocked state on VCPU 0 after 1 s
 timeout
 
 in the kernel log as well. Something is borken. Will retest the
 installation with vanilla KVM. Anyone any ideas on the task switch
 thing? Just a false positive or an indication for the real problem in
 that domain?
 
Nothing is broken IMO. The IDT entry for NMI is set up as task gate so
we get a task switch exit after NMI injection.

We should do something like this:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 046a91b..860e66d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2826,10 +2826,20 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
unsigned long exit_qualification;
u16 tss_selector;
int reason;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
reason = (u32)exit_qualification  30;
+
+   if (reason == TASK_SWITCH_GATE  vmx-vcpu.arch.nmi_injected 
+   (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) 
+   (vmx-idt_vectoring_info  VECTORING_INFO_TYPE_MASK) == 
INTR_TYPE_NMI_INTR) {
+   vcpu-arch.nmi_injected = false;
+   vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+   printk(KERN_DEBUGNMI cause task switch. No need to 
reinject\n);
+   }
tss_selector = exit_qualification;
 
return kvm_task_switch(vcpu, tss_selector, reason);
@@ -3002,7 +3012,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
 
if ((vectoring_info  VECTORING_INFO_VALID_MASK) 
(exit_reason != EXIT_REASON_EXCEPTION_NMI 
-   exit_reason != EXIT_REASON_EPT_VIOLATION))
+   exit_reason != EXIT_REASON_EPT_VIOLATION 
+   exit_reason != EXIT_REASON_TASK_SWITCH))
printk(KERN_WARNING %s: unexpected, valid vectoring info and 
   exit reason is 0x%x\n, __func__, exit_reason);
if (exit_reason  kvm_vmx_max_exit_handlers

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Jes Sorensen

Avi Kivity wrote:

Jes Sorensen wrote:

Avi Kivity wrote:
Applied, thanks.  Note qemu uses 4 spaces for intedenation.  Talk to 
your editor.


Even when it's a double indentation, ie 8 spaces?

Yes.


:-(

Well then we really should add something like this to every file in
qemu, since this behavior is so non standard from what any normal editor
does per default.

/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */


Thats just plain sicko :-(


It's the only scheme that works 100% reliably.


Ehm, maybe, but then that makes 8 space (tab) indention work 110%
reliable :-)

Cheers,
Jes
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 02:40:18PM +0200, Jan Kiszka wrote:
 Yang, Sheng wrote:
 On Tuesday 23 September 2008 17:45:44 Gleb Natapov wrote:
 On Tue, Sep 23, 2008 at 05:42:02PM +0800, Yang, Sheng wrote:
 That is exactly what I am using. Run it with SMP hal and do
 hibernate.
 Oh... Finally found how to enable that hibernate option

 And this hibernate works on my virtual_nmi supported box, with smp hal
 and 2 cpus.
 However, for this hibernate won't success if there is no NMI support,
 maybe we can say it's not a regression...
 I am not saying it's a regression, but it would be nice to have it
 working :)

 Yeah, of course. :)
 OK, I've a 2003 server up and running now, I'm able to reproduce

 kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9

 but not via hibernate (it suspends and powers off normally, but then
 hangs after resume), rather by manually injecting an NMI on CPU0.

 I found out today that on regular windows 2003 the problem does not
 exist (on hibernate at least). The image I have was used to run WLK
 tests (windows logo kit) and this kit changes something in windows
 kernel to do additional stuff after hibernation and that is where we
 crash.

Ahh!

 
 After Windows' graphical installation phase I had a hanging guest. At
 the same time I got

 kvm_handle_exit: unexpected, valid vectoring info and exit reason is 0x9
 kvm_handle_exit: Breaking out of NMI-blocked state on VCPU 0 after 1 s
 timeout

 in the kernel log as well. Something is borken. Will retest the
 installation with vanilla KVM. Anyone any ideas on the task switch
 thing? Just a false positive or an indication for the real problem in
 that domain?

 Nothing is broken IMO. The IDT entry for NMI is set up as task gate so
 we get a task switch exit after NMI injection.

Yes, that's what I see here now as well.

 
 We should do something like this:
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 046a91b..860e66d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2826,10 +2826,20 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, 
 struct kvm_run *kvm_run)
   unsigned long exit_qualification;
   u16 tss_selector;
   int reason;
 + struct vcpu_vmx *vmx = to_vmx(vcpu);
  
   exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
  
   reason = (u32)exit_qualification  30;
 +
 + if (reason == TASK_SWITCH_GATE  vmx-vcpu.arch.nmi_injected 
 + (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) 
 + (vmx-idt_vectoring_info  VECTORING_INFO_TYPE_MASK) == 
 INTR_TYPE_NMI_INTR) {
 + vcpu-arch.nmi_injected = false;
 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 +   GUEST_INTR_STATE_NMI);
 + printk(KERN_DEBUGNMI cause task switch. No need to 
 reinject\n);
 + }

OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
this? Does it make your 2003 power-off?

   tss_selector = exit_qualification;
  
   return kvm_task_switch(vcpu, tss_selector, reason);
 @@ -3002,7 +3012,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, 
 struct kvm_vcpu *vcpu)
  
   if ((vectoring_info  VECTORING_INFO_VALID_MASK) 
   (exit_reason != EXIT_REASON_EXCEPTION_NMI 
 - exit_reason != EXIT_REASON_EPT_VIOLATION))
 + exit_reason != EXIT_REASON_EPT_VIOLATION 
 + exit_reason != EXIT_REASON_TASK_SWITCH))
   printk(KERN_WARNING %s: unexpected, valid vectoring info and 
  exit reason is 0x%x\n, __func__, exit_reason);

Dumping the vectoring info here as well would have accelerated the
debugging. I think we should add this.

   if (exit_reason  kvm_vmx_max_exit_handlers
 
 --
   Gleb.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 02:56:40PM +0200, Jan Kiszka wrote:
  We should do something like this:
  
  diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
  index 046a91b..860e66d 100644
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -2826,10 +2826,20 @@ static int handle_task_switch(struct kvm_vcpu 
  *vcpu, struct kvm_run *kvm_run)
  unsigned long exit_qualification;
  u16 tss_selector;
  int reason;
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
   
  exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
   
  reason = (u32)exit_qualification  30;
  +
  +   if (reason == TASK_SWITCH_GATE  vmx-vcpu.arch.nmi_injected 
  +   (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) 
  +   (vmx-idt_vectoring_info  VECTORING_INFO_TYPE_MASK) == 
  INTR_TYPE_NMI_INTR) {
  +   vcpu-arch.nmi_injected = false;
  +   vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
  + GUEST_INTR_STATE_NMI);
  +   printk(KERN_DEBUGNMI cause task switch. No need to 
  reinject\n);
  +   }
 
 OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
 cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
 this? Does it make your 2003 power-off?
 
It does power-off, but hands during reboot. Looking at it right now.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 02:56:40PM +0200, Jan Kiszka wrote:
 We should do something like this:

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 046a91b..860e66d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2826,10 +2826,20 @@ static int handle_task_switch(struct kvm_vcpu 
 *vcpu, struct kvm_run *kvm_run)
 unsigned long exit_qualification;
 u16 tss_selector;
 int reason;
 +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  
 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
  
 reason = (u32)exit_qualification  30;
 +
 +   if (reason == TASK_SWITCH_GATE  vmx-vcpu.arch.nmi_injected 
 +   (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) 
 +   (vmx-idt_vectoring_info  VECTORING_INFO_TYPE_MASK) == 
 INTR_TYPE_NMI_INTR) {
 +   vcpu-arch.nmi_injected = false;
 +   vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 + GUEST_INTR_STATE_NMI);
 +   printk(KERN_DEBUGNMI cause task switch. No need to 
 reinject\n);
 +   }
 OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
 cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
 this? Does it make your 2003 power-off?

 It does power-off, but hands during reboot. Looking at it right now.

After completing the RAM read-back from disk? This is where it hangs
here. I also briefly played with an XP image, and that one even
hard-rebooted at that point.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 03:11:36PM +0200, Jan Kiszka wrote:
  OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
  cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
  this? Does it make your 2003 power-off?
 
  It does power-off, but hands during reboot. Looking at it right now.
 
 Yeah, with your patch I'm getting a totally different Blue Screen on
 spurious manual NMI injection:
 
 Hardware Malfunction
 
 Call your hardware vendor for support
 
 8)
 
Cool!

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 04:17:00PM +0300, Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 03:11:36PM +0200, Jan Kiszka wrote:
   OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
   cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
   this? Does it make your 2003 power-off?
  
   It does power-off, but hands during reboot. Looking at it right now.
  
  Yeah, with your patch I'm getting a totally different Blue Screen on
  spurious manual NMI injection:
  
  Hardware Malfunction
  
  Call your hardware vendor for support
  
  8)
  
 Cool!
 
BTW I have this is also needed in case CPU was reseted before it had a
chance to handle NMI:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5927b79..106e16d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2234,6 +2234,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
}
 
vmx-vcpu.arch.rmode.active = 0;
+   vcpu-arch.nmi_pending = false;
+   vcpu-arch.nmi_injected = false;
 
vmx-vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vmx-vcpu, 0);
--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:
  It does power-off, but hands during reboot. Looking at it right now.
 
 After completing the RAM read-back from disk? This is where it hangs
Much earlier. BIOS hangs because CPU1 ignores SIPI.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:
 It does power-off, but hands during reboot. Looking at it right now.
 After completing the RAM read-back from disk? This is where it hangs
 Much earlier. BIOS hangs because CPU1 ignores SIPI.

That sounds like an APIC state reset issue.

BTW, I'm getting tons of

Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x24
vcpu 1 received sipi with vector # 24
SIPI to vcpu 1 vector 0x24
SIPI to vcpu 1 vector 0x24
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x24

on my box with SMP guests (Linux and Windows). Do they all point to yet
incomplete emulations, or are they just far too verbose?

Jan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 03:33:13PM +0200, Jan Kiszka wrote:
 Gleb Natapov wrote:
  On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:
  It does power-off, but hands during reboot. Looking at it right now.
  After completing the RAM read-back from disk? This is where it hangs
  Much earlier. BIOS hangs because CPU1 ignores SIPI.
 
 That sounds like an APIC state reset issue.
 
 BTW, I'm getting tons of
 
 Ignoring de-assert INIT to vcpu 0
 Ignoring de-assert INIT to vcpu 1
 Ignoring de-assert INIT to vcpu 0
 Ignoring de-assert INIT to vcpu 1
 Ignoring de-assert INIT to vcpu 1
 SIPI to vcpu 1 vector 0x24
 vcpu 1 received sipi with vector # 24
 SIPI to vcpu 1 vector 0x24
 SIPI to vcpu 1 vector 0x24
 Ignoring de-assert INIT to vcpu 0
 Ignoring de-assert INIT to vcpu 1
 SIPI to vcpu 1 vector 0x24
 
 on my box with SMP guests (Linux and Windows). Do they all point to yet
 incomplete emulations, or are they just far too verbose?
 
Not sure about de-assert messages but others are fine.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Avi Kivity

Jan Kiszka wrote:

Gleb Natapov wrote:
  

On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:


It does power-off, but hands during reboot. Looking at it right now.


After completing the RAM read-back from disk? This is where it hangs
  

Much earlier. BIOS hangs because CPU1 ignores SIPI.



That sounds like an APIC state reset issue.

BTW, I'm getting tons of

Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x24
vcpu 1 received sipi with vector # 24
SIPI to vcpu 1 vector 0x24
SIPI to vcpu 1 vector 0x24
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x24

on my box with SMP guests (Linux and Windows). Do they all point to yet
incomplete emulations, or are they just far too verbose?
  


'Ignoring' sounds incomplete, but there's no real need to print it.  The 
rest is just noise.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 04:17:00PM +0300, Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 03:11:36PM +0200, Jan Kiszka wrote:
 OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
 cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
 this? Does it make your 2003 power-off?

 It does power-off, but hands during reboot. Looking at it right now.
 Yeah, with your patch I'm getting a totally different Blue Screen on
 spurious manual NMI injection:

 Hardware Malfunction

 Call your hardware vendor for support

 8)

 Cool!

 BTW I have this is also needed in case CPU was reseted before it had a
 chance to handle NMI:
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 5927b79..106e16d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2234,6 +2234,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
   }
  
   vmx-vcpu.arch.rmode.active = 0;
 + vcpu-arch.nmi_pending = false;
 + vcpu-arch.nmi_injected = false;
  
   vmx-vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
   kvm_set_cr8(vmx-vcpu, 0);

Good point. Same will be required for soft_vnmi_blocked. Will include
this as well as your other patch in an update of my NMI series.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] Allow enabling kvm_trace on external module

2008-09-24 Thread Avi Kivity

Eduardo Habkost wrote:

This series adds compat code to allow enabling kvm_trace when building
KVM as an external module on older kernels.

The most hackish part is the last patch, that adds --with-kvm-trace to
configure and adds an include to a generated file on kernel/x86/Kbuild. It
doesn't look pretty, so suggestions on how to make this better are
welcome.
  


Applied all, thanks.  We could improve Kbuild by having kernel/Makefile 
include config.mak and pass some variable to Kbuild somehow, but diving 
into Kbuild isn't my idea of a week well spent.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] relay_open() compat

2008-09-24 Thread Avi Kivity

Eduardo Habkost wrote:

@@ -73,6 +73,8 @@ BEGIN { split(INIT_WORK tsc_khz desc_struct ldttss_desc64 
desc_ptr  \
 }
 }
 
+{ sub(/relay_open/, kvm_relay_open) }

+
  


I moved this bit into the compat_apis variable.  Hope it still works.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Jan Kiszka
Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:
 It does power-off, but hands during reboot. Looking at it right now.
 After completing the RAM read-back from disk? This is where it hangs
 Much earlier. BIOS hangs because CPU1 ignores SIPI.

Apropos APIC: I also have this with Win2003 in my kernel log:

 apic write: bad size=1 fee00030

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Implement an fd pool to get real AIO with posix-aio

2008-09-24 Thread Anthony Liguori

Avi Kivity wrote:

Anthony Liguori wrote:

dup()ing the fd on each request is unnecessary work; would be better 
to cache the duped fd.


Yeah, I was concerned about this too.  Ryan reran the fio benchmark and 
the submission latency and completion latency were identical to the 
linux-aio patches.  That suggests that the overhead of dup() is lost in 
the noise.


Since this is simpler and keeps the number of open file descriptors as 
low as possible, I was happy about that.


Regards,

Anthony Liguori


Of course, if this is just a stepping stone, it doesn't matter very much.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Avi Kivity

Jan Kiszka wrote:

Gleb Natapov wrote:
  

On Wed, Sep 24, 2008 at 03:08:04PM +0200, Jan Kiszka wrote:


It does power-off, but hands during reboot. Looking at it right now.


After completing the RAM read-back from disk? This is where it hangs
  

Much earlier. BIOS hangs because CPU1 ignores SIPI.



Apropos APIC: I also have this with Win2003 in my kernel log:

 apic write: bad size=1 fee00030

  


Yes Windows ignores the specs here (which want 4-byte accesses).

We should probably drop this printk as well.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: PIC: enhance IPI avoidance

2008-09-24 Thread Marcelo Tosatti
On Wed, Sep 24, 2008 at 03:19:47PM +0300, Avi Kivity wrote:
 Index: kvm/arch/x86/kvm/irq.h
 ===
 --- kvm.orig/arch/x86/kvm/irq.h
 +++ kvm/arch/x86/kvm/irq.h
 @@ -42,6 +42,7 @@ struct kvm_kpic_state {
  u8 irr; /* interrupt request register */
  u8 imr; /* interrupt mask register */
  u8 isr; /* interrupt service register */
 +u8 isr_ack; /* interrupt ack detection */
  u8 priority_add;/* highest irq priority */
  u8 irq_base;
  u8 read_reg_select;
   

 Needs to be cleared by reset 

@@ -213,6 +214,7 @@ void kvm_pic_reset(struct kvm_kpic_state
s-irr = 0;
s-imr = 0;
s-isr = 0;
+   s-isr_ack = 0xff;
s-priority_add = 0;
s-irq_base = 0;
s-read_reg_select = 0;

 and by register load from userspace, no?

Isnt that responsability of the guest? Unacked IOAPIC interrupts are not
cleared on register load, are they?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Han, Weidong
Avi Kivity wrote:
 Amit Shah wrote:
 * On Wednesday 24 Sep 2008 14:16:47 Avi Kivity wrote:
 
 Amit Shah wrote:
 
 I'd say we have about 3 weeks to get things in.
 
 How do you figure? 2.6.26 was released July 13, we're more than 2.5
 months later. 
 
 
 A week for 2.6.28 to open and two weeks for the rc1 window.
 
 
 Furthermore, I'm not queueing untested patches for 2.6.28 at this
 time. 
 
 
 Of course, I'm not advocating this! If they're tested by Intel, we
 can push them in. 
 
 
 No, the patches have to be in my tree some time before the merge
 window opens.

I agree patches need sufficient testing before merge to mainline.
Anyway, let's try best to improve passthrough/VT-d code quality and make
it stable asap. 

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-24 Thread Gleb Natapov
On Wed, Sep 24, 2008 at 04:02:36PM +0300, Gleb Natapov wrote:
 On Wed, Sep 24, 2008 at 02:56:40PM +0200, Jan Kiszka wrote:
   We should do something like this:
   
   diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
   index 046a91b..860e66d 100644
   --- a/arch/x86/kvm/vmx.c
   +++ b/arch/x86/kvm/vmx.c
   @@ -2826,10 +2826,20 @@ static int handle_task_switch(struct kvm_vcpu 
   *vcpu, struct kvm_run *kvm_run)
 unsigned long exit_qualification;
 u16 tss_selector;
 int reason;
   + struct vcpu_vmx *vmx = to_vmx(vcpu);

 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);

 reason = (u32)exit_qualification  30;
   +
   + if (reason == TASK_SWITCH_GATE  vmx-vcpu.arch.nmi_injected 
   + (vmx-idt_vectoring_info  VECTORING_INFO_VALID_MASK) 
   + (vmx-idt_vectoring_info  VECTORING_INFO_TYPE_MASK) == 
   INTR_TYPE_NMI_INTR) {
   + vcpu-arch.nmi_injected = false;
   + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
   +   GUEST_INTR_STATE_NMI);
   + printk(KERN_DEBUGNMI cause task switch. No need to 
   reinject\n);
   + }
  
  OK, I just think we are not supposed to set GUEST_INTR_STATE_NMI without
  cpu_has_virtual_nmis(). Otherwise looks reasonable. Have you tested
  this? Does it make your 2003 power-off?
  
 It does power-off, but hands during reboot. Looking at it right now.
 
OK. The hand is a bug in qemu. Apic reset function marks CPU 1 as halted
and the CPU is never enters kernel again.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: PIC: enhance IPI avoidance

2008-09-24 Thread Avi Kivity

Marcelo Tosatti wrote:
  

and by register load from userspace, no?



Isnt that responsability of the guest? 


I'm talking about a restore to previous state scenario.  In this case we 
want to disable any IPI avoidance in case it avoids a needed IPI.



Unacked IOAPIC interrupts are not
cleared on register load, are they?

  


Good question.  I don't know if they should or shouldn't.  But that's a 
different question.  isr_ack is not guest visible, so nothing is lost 
from clearing it, but we can fail if we don't clear it.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/7] qemu: Include hw.h in qemu/hw/isa.h to fix compile issues

2008-09-24 Thread Anthony Liguori

Amit Shah wrote:

* On Tuesday 23 Sep 2008 21:43:44 Anthony Liguori wrote:
  

Amit Shah wrote:


Signed-off-by: Amit Shah [EMAIL PROTECTED]
---
 qemu/hw/isa.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/isa.h b/qemu/hw/isa.h
index 222e4f3..e4a1326 100644
--- a/qemu/hw/isa.h
+++ b/qemu/hw/isa.h
@@ -2,6 +2,8 @@
 #define HW_ISA_H
 /* ISA bus */

+#include hw.h
+
 extern target_phys_addr_t isa_mem_base;

 int register_ioport_read(int start, int length, int size,
  

What compile issues?



register_ioport_read* and register_ioport_write* functions cause a lot of 
this.
  


You could also address this by including hw.h before including isa.h.  
Basically, everything should include qemu-common.h and anything that's 
implemented emulated hardware should include hw.h before including 
anything else.  It's not perfect, but it's how things are right now.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/7] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests

2008-09-24 Thread Anthony Liguori

Amit Shah wrote:

* On Tuesday 23 Sep 2008 22:00:32 Anthony Liguori wrote:
  

Amit Shah wrote:



  

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index 72f3db8..40eb273 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -616,6 +616,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
 OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+OBJS+= device-assignment.o
  

This needs to be conditional on at least linux hosts, but probably also
kvm support.



I didn't see any other file that's doing it. So I added this conditional in 
vl.c by having a #if defined(__linux__). That's how usb-linux.c does it as 
well. Is there a better way?
  


aio and compatfd currently do it this way.  block-raw-win32 and 
block-raw-posix are this way.  We're slowly moving things away from 
#ifdef #else #endif to conditional compilation.


Not the whole functionality needs kvm support. This should be able to work 
even without kvm (for example, when the guest is 1:1 mapped in the host 
address space).
  


KVM is needed for interrupt remapping though.  That's something I don't 
see happening for normal userspace any time soon.



+   /* FIXME: Add support for emulated MMIO for non-kvm guests */
+   if (kvm_enabled()) {
  

This doesn't work at all if kvm isn't enabled right?  You should
probably bail out in the init if kvm isn't enabled.  If this whole file
is included conditionally based on KVM support, then you don't have to
worry about using kvm_enabled() guards to conditionally compile out code.



Non-kvm support is currently broken and should be fixed, but that can happen 
after we get this merged.
  


But it would take bouncing interrupts to userspace?  I don't think that 
will ever happen upstream personally.  At any rate, there's no point in 
even trying to support something like that until progress is made 
upstream on this front.



I can temporarily add a check for kvm_enabled and bail out.

  

+   sprintf(dir, /sys/bus/pci/devices/:%02x:%02x.%x/,
+   r_bus, r_dev, r_func);
  

snprintf()



It's guarded by the %02x modifiers; so this doesn't depend on user input.
  


strcpy or sprintf should never be used.  It doesn't matter if it's safe 
in a particular instance.  There are safer functions to use (like snprintf).


All it takes is for someone to come along and change the /sys/bus path 
to be larger without adjusting the buffer size and everything goes to 
hell.  It's inherently brittle.



+   fprintf(stderr, Registered host PCI device %02x:%02x.%1x 
+   (\%s\) as guest device %02x:%02x.%1x\n,
+   r_bus, r_dev, r_func, e_dev_name,
+   pci_bus_num(e_bus), e_device, r_func);
  

Please don't fprintf() unconditionally.



OK; however, a vmdk file open does that so I though it was alright to do it.
  


I obviously don't use vmdk or else I would have removed that by now :-)


A lot more checks are needed here to see if things can succeed.  We
definitely should bail out if they can't.



Bailing out is done in the out: label below. What else do  you think can fail? 
I've taken care of all the cases that do fail IMO.


  

+   return pci_dev;
+out:
+   pci_unregister_device(pci_dev-dev);
+   return NULL;
+}
  


  

+/*
+ * Syntax to assign device:
+ *
+ * -pcidevice dev=bus:dev.func,dma=dma
+ *
+ * Example:
+ * -pcidevice host=00:13.0,dma=pvdma
+ *
+ * dma can currently only be 'none' to disable iommu support.
  

Does it actually work if you disable iommu support?



If the guest is 1:1 mapped.
  


You mean with Andrea's reserved ram patches?


+#include sys/mman.h
  

Don't think this is needed here.



We use mmap(), so this is needed.
  


Ah.


+/* Initialize assigned devices */
+if (pci_enabled) {
+int r = -1;
+do {
+init_assigned_device(pci_bus, r);
  

Why pass r by reference instead of just returning it?  At any rate, you
should detect when this fails and gracefully terminate QEMU.



'r' is the count of the number of assigned devices -- mostly needed because we 
have the data stored in an array. If we migrate to a list, this can be 
relaxed.


ATM, I start the guest without assigning the device. I haven't figured out a 
way to gracefully terminate qemu yet.
  


In the case of hot plug, you fail the hot plug.  If you start with 
device assignment, just doing an exit would be sufficient.



+#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
+   case QEMU_OPTION_pcidevice:
+   add_assigned_device(optarg);
  

You should copy into an array, then in pc.c, iterate through the array
and call into add_assigned_device.



Is there any benefit in doing this? We're moving the iterate out of vl.c to 
pc.c and both will happen 

Re: [PATCH 6/7] KVM/userspace: Build vtd.c for Intel IOMMU support

2008-09-24 Thread Anthony Liguori

Amit Shah wrote:

* On Tuesday 23 Sep 2008 22:01:10 Anthony Liguori wrote:
  

Amit Shah wrote:


Signed-off-by: Amit Shah [EMAIL PROTECTED]
---
 kernel/x86/Kbuild |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild
index 8dc0483..a4cd00c 100644
--- a/kernel/x86/Kbuild
+++ b/kernel/x86/Kbuild
@@ -5,6 +5,9 @@ kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o
../anon_inodes.o irq.o i8259.o ifeq ($(CONFIG_KVM_TRACE),y)
 kvm-objs += kvm_trace.o
 endif
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 kvm-intel-objs := vmx.o vmx-debug.o ../external-module-compat.o
 kvm-amd-objs := svm.o ../external-module-compat.o
  

Where's the file come from?



Already in the kernel tree -- arch/x86/kvm/vtd.c
  


So this is independent of the rest of the series?  Any reason not to 
commit this Avi?


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Anthony Liguori

Avi Kivity wrote:

Han, Weidong wrote:

- Add dummy driver to hide/unbind passthrough device from host
kernel
  



Maybe this can be implemented at the modprobe/hotplug level.


Wouldn't you just blacklist the devices in the host and call it a day?

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Anthony Liguori

Avi Kivity wrote:

Jes Sorensen wrote:

Avi Kivity wrote:

Jes Sorensen wrote:

Hi,

This one limits the code_gen_buffer_size on ia64, phys_mem_size/4
really gets out of hand when you boot say a 64GB guest.


ia64 doesn't codegen; why not set it to zero?


How about this one then?



Applied, thanks.  Note qemu uses 4 spaces for intedenation.  Talk to 
your editor.


It would seem better to replace #ifdef TARGET_IA64 with if (kvm_enabled()).

If QEMU ever got proper ia64 target support, that's going to be an ugly 
bug to find.


Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Remaining passthrough/VT-d tasks list

2008-09-24 Thread Dong, Eddie
Avi Kivity wrote:
 Han, Weidong wrote:
 Hi all,
 
 The initial passthrough/VT-d patches have been in kvm,
 it's time to enhance it, and push them into 2.6.28.
 
  - Shared Interrupt support
 
 
 Shared guest interrupts is a prerequisite for merging
 into mainline. Without this, device assignment is useless
 in anything but a benchmark scenario.  I won't push
 device assignment for 2.6.28 without it. 
 
 Shared host interrupts are a different matter; which one
 did you mean? 
 
Avi:
How about we think in other way? The top usage model of IOMMU is
SR-IOV in my mind, at least for enterprise usage model. We are pushing
the SR-IOV patch for 2.6.28, and are continuously polishing the patch.
Even if it missed the 2.6.28 merge windows (unlikely?), we could be able
to ask OSVs to take the SR-IOV patch seperately before code froze since
it is very small, but it is hard to ask for taking whole IOMMU patches.

In Xen side, IOMMU is there, MSI-x is there, so SR-IOV patch is
the only one missed to enable SR-IOV. In KVM side, very likely we can
get MSI patch down soon before chinese holiday, and we of course will
spend tons of effort in qualities too. Should we target this? If yes, we
put MSI patch and push 2.6.28 as 1st priority. We would be able to see
next major release of VMM using KVM have HW IO virtualization
technology: Close to native performance, non sacriface of IO sharing,
minimal CPU utilization etc.
For those legacy PCI pass thru support, we can continue improve
it too.
Thanks, eddie
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch] do not allocate code_gen buffer on ia64

2008-09-24 Thread Avi Kivity

Anthony Liguori wrote:


It would seem better to replace #ifdef TARGET_IA64 with if 
(kvm_enabled()).




Right.  Committed.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5] bios: 4G updates

2008-09-24 Thread Alex Williamson
On Wed, 2008-09-24 at 14:07 +0300, Avi Kivity wrote:
 
 The patches all look good, however renaming and reformatting will lead 
 to merge headaches later on.  We haven't been good at working with bochs 
 bios upstream.
 
 Can you peek in bochs upstream and see if it's worth merging?  If not, 
 I'll just merge these patches.

I'll take a look.  It seemed like they added support for putting the
ACPI processor objects in an SSDT last I checked, but the AML for their
processors is fairly trivial.  I'll see if there's anything else
worthwhile.  Thanks,

Alex

-- 
Alex Williamson HP Open Source  Linux Org.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread Blue Swirl
On 9/23/08, David Miller [EMAIL PROTECTED] wrote:
 From: Blue Swirl [EMAIL PROTECTED]

 Date: Tue, 23 Sep 2008 18:28:06 +0300


   On 9/22/08, David Miller [EMAIL PROTECTED] wrote:

   As he mentioned, the V8 rett instruction causes problems on V9 chips.
   
 An opcode which was a V8 privileged instruction, rett, got reused as
 a non-privileged instruction in V9, for return.
  
   There are others: rdtbr/flushw and stdfq/stqf. Also any ASI 0x80
   accesses are unprivileged on V9, though that shouldn't be a problem
   since all ASIs used on V8 were 0x80. And of course MMUs are
   incompatible.


 Thanks for the list.  I sent a message to someone who I think might
  have been responsible for these architectual design decisions, letting
  them know what problems it is causing :-)


 So booting a 32-bit kernel on a 64-bit cpu is going to be challenging,
 at best.
  
   Maybe it would be possible to run V8 userspace with full speed
   acceleration on V9 and use translation only for kernel code?


 Yes, that should work.

  BTW, there is another area related the ASIs.  Trap numbers.

  Even through V9, traps only up to 0x7f are valid.  But sun4v extended
  V9 to allow trap numbers = 0x80, mostly these are used for hypervisor
  calls.

  The trap number field of the instruction is just extended one more
  bit higher to accomodate this.

I see, also Qemu needs to use one more bit then. Does this mean that
even V8 code written specially may use these traps to call hypervisor?
Then we would need to catch these, maybe with the some assistance from
the hypervisor.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread Blue Swirl
On 9/23/08, David Miller [EMAIL PROTECTED] wrote:
 From: Blue Swirl [EMAIL PROTECTED]

 Date: Tue, 23 Sep 2008 18:34:12 +0300


   On 9/23/08, David Miller [EMAIL PROTECTED] wrote:

   Sun4v systems come with Sun's hypervisor.  Linux simply runs on top
 of that, whether as a host or a guest.
   
 The hypervisor source is opensource and we could technically make
 changes to it, but it isn't very practical.
  
   Do you mean OpenxVM? I think Sun has not published the hypervisor part yet.


 No, I mean OpenSPARC.  The full hypervisor source code for sun4v is in
  the tarball.

 http://www.opensparc.net/

  The code is there for both Niagara-T1 and Niagara-T2, for example for
  Niagara-T1 click on Get The Source -- OpenSPARC T1 -- Downloads

  Then on that page you want OpenSPARC T1 Download for Architecture and
  Performance Modeling Tools.  It includes the full hypervisor and even
  the openboot PROM source code.

I'm not sure, but I think that hypervisor is not a real hypervisor
like the PROM version but one specially designed for Legion emulator.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] Allow enabling kvm_trace on external module

2008-09-24 Thread Eduardo Habkost
On Wed, Sep 24, 2008 at 04:48:35PM +0300, Avi Kivity wrote:
 Eduardo Habkost wrote:
 This series adds compat code to allow enabling kvm_trace when building
 KVM as an external module on older kernels.

 The most hackish part is the last patch, that adds --with-kvm-trace to
 configure and adds an include to a generated file on kernel/x86/Kbuild. It
 doesn't look pretty, so suggestions on how to make this better are
 welcome.
   

 Applied all, thanks.  We could improve Kbuild by having kernel/Makefile  
 include config.mak and pass some variable to Kbuild somehow, but diving  
 into Kbuild isn't my idea of a week well spent.

Oops. I've just noticed I broke './configure --with-patched-kernel'. Fix below.

---
From: Eduardo Habkost [EMAIL PROTECTED]
Date: Wed, 24 Sep 2008 14:11:42 -0300
Subject: Always generate config.kbuild

When implementing --with-kvm-trace, I supposed make would never enter
the 'kernel' directory when compiling with --with-patched-kernel. I was
wrong and broke --with-patched-kernel.

Change configure to always generate config.kbuild on the kernel
directory. Otherwise make will explode on 'make header-sync', that runs
even when --with-patched-kernel was used.

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
---
 configure |2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 78c2f9c..3b27364 100755
--- a/configure
+++ b/configure
@@ -137,8 +137,6 @@ LD=$cross_prefix$ld
 OBJCOPY=$cross_prefix$objcopy
 EOF
 
-if [ -n $want_module ];then
 cat EOF  kernel/config.kbuild
 CONFIG_KVM_TRACE=$kvm_trace
 EOF
-fi
-- 
1.5.5.GIT


-- 
Eduardo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread Blue Swirl
On 9/24/08, Blue Swirl [EMAIL PROTECTED] wrote:
 On 9/23/08, David Miller [EMAIL PROTECTED] wrote:
   From: Blue Swirl [EMAIL PROTECTED]
  
   Date: Tue, 23 Sep 2008 18:28:06 +0300
  
  
 On 9/22/08, David Miller [EMAIL PROTECTED] wrote:
  
 As he mentioned, the V8 rett instruction causes problems on V9 chips.
 
   An opcode which was a V8 privileged instruction, rett, got reused 
 as
   a non-privileged instruction in V9, for return.

 There are others: rdtbr/flushw and stdfq/stqf. Also any ASI 0x80
 accesses are unprivileged on V9, though that shouldn't be a problem
 since all ASIs used on V8 were 0x80. And of course MMUs are
 incompatible.
  
  
   Thanks for the list.  I sent a message to someone who I think might
have been responsible for these architectual design decisions, letting
them know what problems it is causing :-)
  
  
   So booting a 32-bit kernel on a 64-bit cpu is going to be 
 challenging,
   at best.

 Maybe it would be possible to run V8 userspace with full speed
 acceleration on V9 and use translation only for kernel code?
  
  
   Yes, that should work.
  
BTW, there is another area related the ASIs.  Trap numbers.
  
Even through V9, traps only up to 0x7f are valid.  But sun4v extended
V9 to allow trap numbers = 0x80, mostly these are used for hypervisor
calls.
  
The trap number field of the instruction is just extended one more
bit higher to accomodate this.


 I see, also Qemu needs to use one more bit then. Does this mean that
  even V8 code written specially may use these traps to call hypervisor?
  Then we would need to catch these, maybe with the some assistance from
  the hypervisor.

Now I found the relevant part in the manuals. The extra sun4v bit is
not taken into account from user mode, so we can't catch privileged to
hyperprivileged mode traps easily.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread David Miller
From: Blue Swirl [EMAIL PROTECTED]
Date: Wed, 24 Sep 2008 20:20:33 +0300

 I see, also Qemu needs to use one more bit then. Does this mean that
 even V8 code written specially may use these traps to call hypervisor?

No, V8 code should not set the extra bit.  Only V9 code on a processor
which is hypervisor capable should do this.

And the hypervisor calls can only be invoked from privileged mode.

This is all described in the UltraSPARC Architecture 2005 Specification
(Hyperprivileged Edition) at:

http://www.opensparc.net/opensparc-t1/index.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread David Miller
From: Blue Swirl [EMAIL PROTECTED]
Date: Wed, 24 Sep 2008 20:22:45 +0300

 I'm not sure, but I think that hypervisor is not a real hypervisor
 like the PROM version but one specially designed for Legion emulator.

No, it's the real deal.

All the real hardware device support is there.

And Legion simulates a real, full Niagara system.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM for Sparc?

2008-09-24 Thread David Miller
From: Blue Swirl [EMAIL PROTECTED]
Date: Wed, 24 Sep 2008 21:06:21 +0300

 Now I found the relevant part in the manuals. The extra sun4v bit is
 not taken into account from user mode, so we can't catch privileged to
 hyperprivileged mode traps easily.

That's right, the top bit is ignored in user mode.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: PIC: enhance IPI avoidance

2008-09-24 Thread Marcelo Tosatti
On Wed, Sep 24, 2008 at 05:49:37PM +0300, Avi Kivity wrote:
 Marcelo Tosatti wrote:
   
 and by register load from userspace, no?
 

 Isnt that responsability of the guest? 

 I'm talking about a restore to previous state scenario.  In this case we  
 want to disable any IPI avoidance in case it avoids a needed IPI.

 Unacked IOAPIC interrupts are not
 cleared on register load, are they?

   

 Good question.  I don't know if they should or shouldn't.  But that's a  
 different question.  isr_ack is not guest visible, so nothing is lost  
 from clearing it, but we can fail if we don't clear it.

True. Anything other potential problem you could think of?

KVM: PIC: enhance IPI avoidance

The PIC code makes little effort to avoid kvm_vcpu_kick(), resulting in
unnecessary guest exits in some conditions.

For example, if the timer interrupt is routed through the IOAPIC, IRR
for IRQ 0 will get set but not cleared, since the APIC is handling the
acks.

This means that everytime an interrupt  16 is triggered, the priority
logic will find IRQ0 pending and send an IPI to vcpu0 (in case IRQ0 is
not masked, which is Linux's case).

Introduce a new variable isr_ack to represent the IRQ's for which the
guest has been signalled / cleared the ISR. Use it to avoid more than
one IPI per trigger-ack cycle, in addition to the avoidance when ISR is
set in get_priority().


Index: kvm/arch/x86/kvm/i8259.c
===
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -33,6 +33,14 @@
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
s-isr = ~(1  irq);
+   s-isr_ack |= (1  irq);
+}
+
+void kvm_pic_clear_isr_ack(struct kvm *kvm)
+{
+   struct kvm_pic *s = pic_irqchip(kvm);
+   s-pics[0].isr_ack = 0xff;
+   s-pics[1].isr_ack = 0xff;
 }
 
 /*
@@ -213,6 +221,7 @@ void kvm_pic_reset(struct kvm_kpic_state
s-irr = 0;
s-imr = 0;
s-isr = 0;
+   s-isr_ack = 0xff;
s-priority_add = 0;
s-irq_base = 0;
s-read_reg_select = 0;
@@ -444,10 +453,14 @@ static void pic_irq_request(void *opaque
 {
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm-vcpus[0];
+   struct kvm_pic *s = pic_irqchip(kvm);
+   int irq = pic_get_irq(s-pics[0]);
 
-   pic_irqchip(kvm)-output = level;
-   if (vcpu)
+   s-output = level;
+   if (vcpu  level  (s-pics[0].isr_ack  (1  irq))) {
+   s-pics[0].isr_ack = ~(1  irq);
kvm_vcpu_kick(vcpu);
+   }
 }
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
Index: kvm/arch/x86/kvm/irq.h
===
--- kvm.orig/arch/x86/kvm/irq.h
+++ kvm/arch/x86/kvm/irq.h
@@ -42,6 +42,7 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
+   u8 isr_ack; /* interrupt ack detection */
u8 priority_add;/* highest irq priority */
u8 irq_base;
u8 read_reg_select;
@@ -70,6 +71,7 @@ struct kvm_pic *kvm_create_pic(struct kv
 void kvm_pic_set_irq(void *opaque, int irq, int level);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
+void kvm_pic_clear_isr_ack(struct kvm *kvm);
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 {
Index: kvm/arch/x86/kvm/x86.c
===
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -3963,6 +3963,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
pr_debug(Set back pending irq %d\n,
 pending_vec);
}
+   kvm_pic_clear_isr_ack(vcpu-kvm);
}
 
kvm_set_segment(vcpu, sregs-cs, VCPU_SREG_CS);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


VPN connection from Windows 2000 guest to remote server.

2008-09-24 Thread Colin Alie
Good morning,

Is it possible to establish a PPTP connection from a guest running Windows 
2000 Professional SP4 to a remote machine running Windows Server 2003 using 
user-mode networking?

It appears to me that the problem is that GRE protocol packets are not being 
successfully transmitted.  I can create a PPTP connection to the VPN server 
from the host machine and, using iptraf on the external interface, I see GRE 
packets being transmitted.  However, when I initiate the connection from the 
guest machine, using iptraf, I see the initial connection using TCP to port 
1723 on the server but no GRE protocol packets are detected.

Thanks for your help.

Until next time, Colin.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/7] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests

2008-09-24 Thread Yang, Sheng
On Tuesday 23 September 2008 22:54:53 Amit Shah wrote:
 From: Or Sagi [EMAIL PROTECTED]
 From: Nir Peleg [EMAIL PROTECTED]
 From: Amit Shah [EMAIL PROTECTED]
 From: Ben-Ami Yassour [EMAIL PROTECTED]
 From: Weidong Han [EMAIL PROTECTED]
 From: Glauber de Oliveira Costa [EMAIL PROTECTED]

 With this patch, we can assign a device on the host machine to a
 guest.

 A new command-line option, -pcidevice is added.
 For example, to invoke it for a device sitting at PCI bus:dev.fn
 04:08.0, use this:

 -pcidevice host=04:08.0

 * The host driver for the device, if any, is to be removed before
 assigning the device (else device assignment will fail).

 * A device that shares IRQ with another host device cannot currently
 be assigned.

 This works only with the in-kernel irqchip method; to use the
 userspace irqchip, a kernel module (irqhook) and some extra changes
 are needed.


Hi Amit

I am afraid I got this when try to enable VT-d.

create_userspace_phys_mem: Invalid argument
assigned_dev_iomem_map: Error: create new mapping failed

Can you have a look at it? (and the patch you sent to Weidong don't got this 
problem.)

Thanks.
--
regards
Yang, Sheng

 Signed-off-by: Amit Shah [EMAIL PROTECTED]
 ---
  qemu/Makefile.target|1 +
  qemu/hw/device-assignment.c |  665
 +++ qemu/hw/device-assignment.h |  
 93 ++
  qemu/hw/pc.c|9 +
  qemu/hw/pci.c   |7 +
  qemu/vl.c   |   18 ++
  6 files changed, 793 insertions(+), 0 deletions(-)
  create mode 100644 qemu/hw/device-assignment.c
  create mode 100644 qemu/hw/device-assignment.h

 diff --git a/qemu/Makefile.target b/qemu/Makefile.target
 index 72f3db8..40eb273 100644
 --- a/qemu/Makefile.target
 +++ b/qemu/Makefile.target
 @@ -616,6 +616,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
  OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
  OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
  OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
 +OBJS+= device-assignment.o
  ifeq ($(USE_KVM_PIT), 1)
  OBJS+= i8254-kvm.o
  endif
 diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
 new file mode 100644
 index 000..e70daf2
 --- /dev/null
 +++ b/qemu/hw/device-assignment.c
 @@ -0,0 +1,665 @@
 +/*
 + * Copyright (c) 2007, Neocleus Corporation.
 + *
 + * This program is free software; you can redistribute it and/or modify it
 + * under the terms and conditions of the GNU General Public License,
 + * version 2, as published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope it will be useful, but WITHOUT
 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for + * more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 with + * this program; if not, write to the Free Software Foundation, Inc.,
 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA.
 + *
 + *
 + *  Assign a PCI device from the host to a guest VM.
 + *
 + *  Adapted for KVM by Qumranet.
 + *
 + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
 + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
 + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
 + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
 + */
 +#include stdio.h
 +#include sys/io.h
 +#include qemu-kvm.h
 +#include linux/kvm_para.h
 +#include device-assignment.h
 +
 +/* From linux/ioport.h */
 +#define IORESOURCE_IO  0x0100  /* Resource type */
 +#define IORESOURCE_MEM 0x0200
 +#define IORESOURCE_IRQ 0x0400
 +#define IORESOURCE_DMA 0x0800
 +#define IORESOURCE_PREFETCH0x1000  /* No side effects */
 +
 +/* #define DEVICE_ASSIGNMENT_DEBUG */
 +
 +#ifdef DEVICE_ASSIGNMENT_DEBUG
 +#define DEBUG(fmt, args...) fprintf(stderr, %s:  fmt, __func__ , ##
 args) +#else
 +#define DEBUG(fmt, args...)
 +#endif
 +
 +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
 +  uint32_t value)
 +{
 +   AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
 +   uint32_t r_pio = (unsigned long)r_access-r_virtbase
 +   + (addr - r_access-e_physbase);
 +
 +   if (r_access-debug  DEVICE_ASSIGNMENT_DEBUG_PIO) {
 +   fprintf(stderr, %s: r_pio=%08x e_physbase=%08x
 +r_virtbase=%08lx value=%08x\n,
 +   __func__, r_pio, (int)r_access-e_physbase,
 +   (unsigned long)r_access-r_virtbase, value);
 +   }
 +   iopl(3);
 +   outb(value, r_pio);
 +}
 +
 +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
 +  uint32_t value)
 +{
 +   AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
 +   uint32_t r_pio = 

Re: [PATCH 5/7] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests

2008-09-24 Thread Yang, Sheng
On Thursday 25 September 2008 12:54:46 Yang, Sheng wrote:
 On Tuesday 23 September 2008 22:54:53 Amit Shah wrote:
  From: Or Sagi [EMAIL PROTECTED]
  From: Nir Peleg [EMAIL PROTECTED]
  From: Amit Shah [EMAIL PROTECTED]
  From: Ben-Ami Yassour [EMAIL PROTECTED]
  From: Weidong Han [EMAIL PROTECTED]
  From: Glauber de Oliveira Costa [EMAIL PROTECTED]
 
  With this patch, we can assign a device on the host machine to a
  guest.
 
  A new command-line option, -pcidevice is added.
  For example, to invoke it for a device sitting at PCI bus:dev.fn
  04:08.0, use this:
 
  -pcidevice host=04:08.0
 
  * The host driver for the device, if any, is to be removed before
  assigning the device (else device assignment will fail).
 
  * A device that shares IRQ with another host device cannot currently
  be assigned.
 
  This works only with the in-kernel irqchip method; to use the
  userspace irqchip, a kernel module (irqhook) and some extra changes
  are needed.

 Hi Amit

 I am afraid I got this when try to enable VT-d.

 create_userspace_phys_mem: Invalid argument
 assigned_dev_iomem_map: Error: create new mapping failed

 Can you have a look at it? (and the patch you sent to Weidong don't got
 this problem.)

Oh, Weidong's patch [PATCH] VT-d: Fix iommu map page for mmio pages fix it. 
--
regards
Yang, Sheng

 Thanks.
 --
 regards
 Yang, Sheng

  Signed-off-by: Amit Shah [EMAIL PROTECTED]
  ---
   qemu/Makefile.target|1 +
   qemu/hw/device-assignment.c |  665
  +++ qemu/hw/device-assignment.h |
  93 ++
   qemu/hw/pc.c|9 +
   qemu/hw/pci.c   |7 +
   qemu/vl.c   |   18 ++
   6 files changed, 793 insertions(+), 0 deletions(-)
   create mode 100644 qemu/hw/device-assignment.c
   create mode 100644 qemu/hw/device-assignment.h
 
  diff --git a/qemu/Makefile.target b/qemu/Makefile.target
  index 72f3db8..40eb273 100644
  --- a/qemu/Makefile.target
  +++ b/qemu/Makefile.target
  @@ -616,6 +616,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
   OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
   OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
   OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
  +OBJS+= device-assignment.o
   ifeq ($(USE_KVM_PIT), 1)
   OBJS+= i8254-kvm.o
   endif
  diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
  new file mode 100644
  index 000..e70daf2
  --- /dev/null
  +++ b/qemu/hw/device-assignment.c
  @@ -0,0 +1,665 @@
  +/*
  + * Copyright (c) 2007, Neocleus Corporation.
  + *
  + * This program is free software; you can redistribute it and/or modify
  it + * under the terms and conditions of the GNU General Public License,
  + * version 2, as published by the Free Software Foundation.
  + *
  + * This program is distributed in the hope it will be useful, but
  WITHOUT + * ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for + * more details.
  + *
  + * You should have received a copy of the GNU General Public License
  along with + * this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307
  USA.
  + *
  + *
  + *  Assign a PCI device from the host to a guest VM.
  + *
  + *  Adapted for KVM by Qumranet.
  + *
  + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
  + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
  + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
  + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
  + */
  +#include stdio.h
  +#include sys/io.h
  +#include qemu-kvm.h
  +#include linux/kvm_para.h
  +#include device-assignment.h
  +
  +/* From linux/ioport.h */
  +#define IORESOURCE_IO  0x0100  /* Resource type */
  +#define IORESOURCE_MEM 0x0200
  +#define IORESOURCE_IRQ 0x0400
  +#define IORESOURCE_DMA 0x0800
  +#define IORESOURCE_PREFETCH0x1000  /* No side effects */
  +
  +/* #define DEVICE_ASSIGNMENT_DEBUG */
  +
  +#ifdef DEVICE_ASSIGNMENT_DEBUG
  +#define DEBUG(fmt, args...) fprintf(stderr, %s:  fmt, __func__ , ##
  args) +#else
  +#define DEBUG(fmt, args...)
  +#endif
  +
  +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
  +  uint32_t value)
  +{
  +   AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
  +   uint32_t r_pio = (unsigned long)r_access-r_virtbase
  +   + (addr - r_access-e_physbase);
  +
  +   if (r_access-debug  DEVICE_ASSIGNMENT_DEBUG_PIO) {
  +   fprintf(stderr, %s: r_pio=%08x e_physbase=%08x
  +r_virtbase=%08lx value=%08x\n,
  +   __func__, r_pio, (int)r_access-e_physbase,
  +   (unsigned long)r_access-r_virtbase, value);
  +