[PATCH] kvm: extboot: properly set int 0x13 return value

2008-12-02 Thread Avi Kivity
From: Glauber Costa [EMAIL PROTECTED]

Callers of int 0x13 usually rely on the carry flag being
clear/set to indicate the status of the interrupt execution.

However, our current code clear or set the flags register,
which is totally useless. Whichever value it has, will
be overwritten by the flags value _before_ the interrupt, due to
the iret instruction.

This fixes a bug that prevents slackware (and possibly win2k, untested)
to boot.

Signed-off-by: Glauber Costa [EMAIL PROTECTED]
Acked-by: Anthony Liguori [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/extboot/extboot.S b/extboot/extboot.S
index 2630abb..e3d1adf 100644
--- a/extboot/extboot.S
+++ b/extboot/extboot.S
@@ -99,24 +99,24 @@ int19_handler:
 
 #define FLAGS_CF   0x01
 
-.macro clc
-   push %ax
-   pushf
-   pop %ax
-   and $(~FLAGS_CF), %ax
-   push %ax
-   popf
-   pop %ax
+/* The two macro below clear/set the carry flag to indicate the status
+ * of the interrupt execution. It is not enough to issue a clc/stc instruction,
+ * since the value of the flags register will be overwritten by whatever is
+ * in the stack frame
+ */
+.macro clc_stack
+   push %bp
+   mov %sp, %bp
+   /* 8 = 2 (bp, just pushed) + 2 (ip) + 3 (real mode interrupt frame) */
+   and $(~FLAGS_CF), 8(%bp)
+   pop %bp
 .endm
 
-.macro stc
-   push %ax
-   pushf
-   pop %ax
-   or $(FLAGS_CF), %ax
-   push %ax
-   popf
-   pop %ax
+.macro stc_stack
+   push %bp
+   /* 8 = 2 (bp, just pushed) + 2 (ip) + 3 (real mode interrupt frame) */
+   or $(FLAGS_CF), 8(%bp)
+   pop %bp
 .endm
 
 /* we clobber %bx */
@@ -292,7 +292,7 @@ mul32:  /* lo,  hi, lo, hi */
 
 disk_reset:
movb $0, %ah
-   clc
+   clc_stack
ret
 
 /* this really should be a function, not a macro but i'm lazy */
@@ -395,7 +395,7 @@ disk_reset:
pop %ax
 
mov $0, %ah
-   clc
+   clc_stack
ret
 .endm
 
@@ -454,12 +454,12 @@ read_disk_drive_parameters:
pop %bx
 
/* do this last since it's the most sensitive */
-   clc
+   clc_stack
ret
 
 alternate_disk_reset:
movb $0, %ah
-   clc
+   clc_stack
ret
 
 read_disk_drive_size:
@@ -498,21 +498,21 @@ read_disk_drive_size:
freea
pop %bx
 
-   clc
+   clc_stack
ret
 
 check_if_extensions_present:
mov $0x30, %ah
mov $0xAA55, %bx
mov $0x07, %cx
-   clc
+   clc_stack
ret
 
 .macro extended_read_write_sectors cmd
cmpb $10, 0(%si)
jg 1f
mov $1, %ah
-   stc
+   stc_stack
ret
 1:
push %ax
@@ -544,7 +544,7 @@ check_if_extensions_present:
pop %ax
 
mov $0, %ah
-   clc
+   clc_stack
ret
 .endm
 
@@ -612,12 +612,12 @@ get_extended_drive_parameters:
pop %ax
 
mov $0, %ah
-   clc
+   clc_stack
ret
 
 terminate_disk_emulation:
mov $1, %ah
-   stc
+   stc_stack
ret
 
 int13_handler:
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: fix handling of ACK from shared guest IRQ

2008-12-02 Thread Mark McLoughlin
If an assigned device shares a guest irq with an emulated
device then we currently interpret an ack generated by the
emulated device as originating from the assigned device
leading to e.g. Unbalanced enable for IRQ 4347 from the
enable_irq() in kvm_assigned_dev_ack_irq().

The fix is fairly simple - don't enable the physical device
irq unless it was previously disabled.

Of course, this can still lead to a situation where a
non-assigned device ACK can cause the physical device irq to
be reenabled before the device was serviced. However, being
level sensitive, the interrupt will merely be regenerated.

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
---
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |   15 ++-
 2 files changed, 15 insertions(+), 1 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d..eafabd5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,6 +307,7 @@ struct kvm_assigned_dev_kernel {
int host_busnr;
int host_devfn;
int host_irq;
+   bool host_irq_disabled;
int guest_irq;
struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e41d39d..b6cd30a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -170,6 +170,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct 
work_struct *work)
KVM_ASSIGNED_DEV_GUEST_MSI) {
assigned_device_msi_dispatch(assigned_dev);
enable_irq(assigned_dev-host_irq);
+   assigned_dev-host_irq_disabled = false;
}
mutex_unlock(assigned_dev-kvm-lock);
kvm_put_kvm(assigned_dev-kvm);
@@ -181,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void 
*dev_id)
(struct kvm_assigned_dev_kernel *) dev_id;
 
kvm_get_kvm(assigned_dev-kvm);
+
schedule_work(assigned_dev-interrupt_work);
+
disable_irq_nosync(irq);
+   assigned_dev-host_irq_disabled = true;
+
return IRQ_HANDLED;
 }
 
@@ -196,8 +201,16 @@ static void kvm_assigned_dev_ack_irq(struct 
kvm_irq_ack_notifier *kian)
 
dev = container_of(kian, struct kvm_assigned_dev_kernel,
   ack_notifier);
+
kvm_set_irq(dev-kvm, dev-irq_source_id, dev-guest_irq, 0);
-   enable_irq(dev-host_irq);
+
+   /* The guest irq may be shared so this ack may be
+* from another device.
+*/
+   if (dev-host_irq_disabled) {
+   enable_irq(dev-host_irq);
+   dev-host_irq_disabled = false;
+   }
 }
 
 static void kvm_free_assigned_irq(struct kvm *kvm,
-- 
1.5.4.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Avi Kivity wrote:
- something did a read-modify-write cycle on cr4 (which contains the 
svm enable bit) while kvm enabled that bit


Well, there are a couple of code paths that do this.  I'll look into it.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Avi Kivity wrote:

Avi Kivity wrote:
- something did a read-modify-write cycle on cr4 (which contains the 
svm enable bit) while kvm enabled that bit


Well, there are a couple of code paths that do this.  I'll look into it.



Sorry, that's EFER.  It could be something doing a read-modify-write on 
that.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Kvm: Qemu: save nvram

2008-12-02 Thread Avi Kivity

Daniel P. Berrange wrote:

On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote:
  
This patch to save the nvram. It save the nvram by specify the arg of 
-name.And the saved file named by the arg. If do not specify the arg,

it will not save the nvram



I think we might be better off having an explicit command line arg for nvram
path rather than hardcoding the directory, because there may well be times
where you want to have nvram saved, but don't want to specify -name, and
vica-verca. 


  -nvram foo.data

could prepend a default directory of $localstatedir/lib/qemu/nvram, where
$localstatedir  is set from 'configure' script, or

  -nvram /some/path/foo.data

would use the explicit path given.
  


I prefer current directory if relative path is given.  Since we 
encourage running qemu as an unprivileged user, and we don't want a 
world-writable directory, each user will have to provide a private 
storage location.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/12] introcude linux/iommu.h for an iommu api

2008-12-02 Thread Joerg Roedel
This patch introduces the API to abstract the exported VT-d functions
for KVM into a generic API. This way the AMD IOMMU implementation can
plug into this API later.

Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 include/linux/iommu.h |  109 +
 1 files changed, 109 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/iommu.h

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
new file mode 100644
index 000..47e9ec8
--- /dev/null
+++ b/include/linux/iommu.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel [EMAIL PROTECTED]
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __LINUX_IOMMU_H
+#define __LINUX_IOMMU_H
+
+struct device;
+
+struct iommu_domain {
+   void *priv;
+};
+
+struct iommu_ops {
+   int (*domain_init)(struct iommu_domain *domain);
+   void (*domain_destroy)(struct iommu_domain *domain);
+   int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+   void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+   int (*map)(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size, int prot);
+   void (*unmap)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+   phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
+   unsigned long iova);
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void register_iommu(struct iommu_ops *ops);
+extern bool iommu_found(void);
+extern struct iommu_domain *iommu_domain_alloc(void);
+extern void iommu_domain_free(struct iommu_domain *domain);
+extern int iommu_attach_device(struct iommu_domain *domain,
+  struct device *dev);
+extern void iommu_detach_device(struct iommu_domain *domain,
+   struct device *dev);
+extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t paddr, size_t size, int prot);
+extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova);
+
+#else /* CONFIG_IOMMU_API */
+
+static inline void register_iommu(struct iommu_ops *ops)
+{
+}
+
+static inline bool iommu_found(void)
+{
+   return false;
+}
+
+static inline struct iommu_domain *iommu_domain_alloc(void)
+{
+   return NULL;
+}
+
+static inline void iommu_domain_free(struct iommu_domain *domain)
+{
+}
+
+static inline int iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+   return -ENODEV;
+}
+
+static inline void iommu_detach_device(struct iommu_domain *domain,
+  struct device *dev)
+{
+}
+
+static inline int iommu_map_range(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot)
+{
+   return -ENODEV;
+}
+
+static inline void iommu_unmap_range(struct iommu_domain *domain,
+unsigned long iova, size_t size)
+{
+}
+
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+unsigned long iova)
+{
+   return 0;
+}
+
+#endif /* CONFIG_IOMMU_API */
+
+#endif /* __LINUX_IOMMU_H */
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/12] add frontend implementation for the IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/base/iommu.c |  100 ++
 1 files changed, 100 insertions(+), 0 deletions(-)
 create mode 100644 drivers/base/iommu.c

diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
new file mode 100644
index 000..5e039d4
--- /dev/null
+++ b/drivers/base/iommu.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel [EMAIL PROTECTED]
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include linux/bug.h
+#include linux/types.h
+#include linux/errno.h
+#include linux/iommu.h
+
+static struct iommu_ops *iommu_ops;
+
+void register_iommu(struct iommu_ops *ops)
+{
+   if (iommu_ops)
+   BUG();
+
+   iommu_ops = ops;
+}
+
+bool iommu_found()
+{
+   return iommu_ops != NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_found);
+
+struct iommu_domain *iommu_domain_alloc(void)
+{
+   struct iommu_domain *domain;
+   int ret;
+
+   domain = kmalloc(sizeof(*domain), GFP_KERNEL);
+   if (!domain)
+   return NULL;
+
+   ret = iommu_ops-domain_init(domain);
+   if (ret)
+   goto out_free;
+
+   return domain;
+
+out_free:
+   kfree(domain);
+
+   return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_alloc);
+
+void iommu_domain_free(struct iommu_domain *domain)
+{
+   iommu_ops-domain_destroy(domain);
+   kfree(domain);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_free);
+
+int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
+{
+   return iommu_ops-attach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_device);
+
+void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
+{
+   iommu_ops-detach_dev(domain, dev);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_device);
+
+int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+   phys_addr_t paddr, size_t size, int prot)
+{
+   return iommu_ops-map(domain, iova, paddr, size, prot);
+}
+EXPORT_SYMBOL_GPL(iommu_map_range);
+
+void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+   iommu_ops-unmap(domain, iova, size);
+}
+EXPORT_SYMBOL_GPL(iommu_unmap_range);
+
+phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+  unsigned long iova)
+{
+   return iommu_ops-iova_to_phys(domain, iova);
+}
+EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/12] KVM: rename vtd.c to iommu.c

2008-12-02 Thread Joerg Roedel
Impace: file renamed

The code in the vtd.c file can be reused for other IOMMUs as well. So
rename it to make it clear that it handle more than VT-d.

Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 arch/ia64/kvm/Makefile  |2 +-
 arch/x86/kvm/Makefile   |2 +-
 virt/kvm/{vtd.c = iommu.c} |0 
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename virt/kvm/{vtd.c = iommu.c} (100%)

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc..cb69dfc 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -52,7 +52,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o 
ioapic.o \
coalesced_mmio.o irq_comm.o)
 
 ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c023435..00f46c2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
 ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/virt/kvm/vtd.c b/virt/kvm/iommu.c
similarity index 100%
rename from virt/kvm/vtd.c
rename to virt/kvm/iommu.c
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Kvm: Qemu: save nvram

2008-12-02 Thread Daniel P. Berrange
On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote:
 Daniel P. Berrange wrote:
 On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote:
   
 This patch to save the nvram. It save the nvram by specify the arg of 
 -name.And the saved file named by the arg. If do not specify the arg,
 it will not save the nvram
 
 
 I think we might be better off having an explicit command line arg for 
 nvram
 path rather than hardcoding the directory, because there may well be times
 where you want to have nvram saved, but don't want to specify -name, and
 vica-verca. 
 
   -nvram foo.data
 
 could prepend a default directory of $localstatedir/lib/qemu/nvram, where
 $localstatedir  is set from 'configure' script, or
 
   -nvram /some/path/foo.data
 
 would use the explicit path given.
   
 
 I prefer current directory if relative path is given.  Since we 
 encourage running qemu as an unprivileged user, and we don't want a 
 world-writable directory, each user will have to provide a private 
 storage location.

Fine by me - avoids needing to embed any path in QEMU code at all then

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76

2008-12-02 Thread SourceForge.net
Bugs item #2138166, was opened at 2008-09-30 15:39
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Open
Resolution: Fixed
Priority: 5
Private: No
Submitted By: John Rousseau (johnrrousseau)
Assigned to: Nobody/Anonymous (nobody)
Summary: Vista guest fails to start on kvm-76

Initial Comment:
CPU: Intel(R) Core(TM)2 Duo CPU T7250  @ 2.00GHz
Build: kvm-76
Host kernel: 2.6.26.3-29.fc9.x86_64
Host arch: x86_64
Guest: Windows Vista Ultimate 64-bit
QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net 
nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga 
-full-screen -smp 2

I've been running this guest on this host with kvm-75 without difficulty. 
kvm-76, built the same way that kvm-75 was (and on the same machine), fails to 
start my guest. The guest window is up, but the guest fails to complete startup.

Command line output is:
kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File 
exists
set_vram_mapping failed
kvm: get_dirty_pages returned -2

The last line repeats hundreds of times. 

--

Comment By: Technologov (technologov)
Date: 2008-12-02 15:39

Message:
Please close bug.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-12 14:50

Message:
I've confirmed that this issue is resolved with kvm-77.

--

Comment By: Marco Menardi (markit)
Date: 2008-10-10 14:02

Message:
I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550,
kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead
(and I've had to revert to 75, of course). Any news? Would love to have
forecoming kvm77 with this blocking bug fixed.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-03 03:06

Message:
kvm-2646c5.tar.gz: Worked fine
kvm-d558461.tar.gz: Failed (showed this bug)

I've never used git before, but if you teach me to fish...

I installed git, pulled the userspace and kernel trees, built kvm-75 and
kvm-76 and got the expected results, but when I did a bisect on kvm-75
(good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build.
configure among other things was missing. What am I doing wrong?

Also, what should I be syncing my kernel tree to when I am bisecting the
userspace tree?

Thanks.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 19:27

Message:
Are you using git? If so, can you bisect to find out who the culprit is?

If not, I've managed to archive two strategic commits you should try:

http://glommer.net/kvm-2646c5.tar.gz  and
http://glommer.net/kvm-d558461.tar.gz

please report success or failure with them

thanks!

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-02 18:48

Message:
I applied the patch to kvm-76 and ran into basically the same problem. The
guest still hung during boot and I got the plume of kvm: get_dirty_pages
returned -2 errors, but the first message kvm_create_phys_mem: File
existsset_vram_mapping: cannot allocate memory:
File exists wasn't displayed.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 16:01

Message:
can you please test the patch at http://glommer.net/band-aid.patch ?

--

Comment By: Brian Jackson (iggy_cav)
Date: 2008-09-30 17:06

Message:
This was reported on the mailing list. It's a problem with sdl output. Not
specific to any guest. Until the problem is fixed, I'd suggest using vnc
output.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76

2008-12-02 Thread SourceForge.net
Bugs item #2138166, was opened at 2008-09-30 08:39
Message generated for change (Settings changed) made by johnrrousseau
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: John Rousseau (johnrrousseau)
Assigned to: Nobody/Anonymous (nobody)
Summary: Vista guest fails to start on kvm-76

Initial Comment:
CPU: Intel(R) Core(TM)2 Duo CPU T7250  @ 2.00GHz
Build: kvm-76
Host kernel: 2.6.26.3-29.fc9.x86_64
Host arch: x86_64
Guest: Windows Vista Ultimate 64-bit
QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net 
nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga 
-full-screen -smp 2

I've been running this guest on this host with kvm-75 without difficulty. 
kvm-76, built the same way that kvm-75 was (and on the same machine), fails to 
start my guest. The guest window is up, but the guest fails to complete startup.

Command line output is:
kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File 
exists
set_vram_mapping failed
kvm: get_dirty_pages returned -2

The last line repeats hundreds of times. 

--

Comment By: Technologov (technologov)
Date: 2008-12-02 08:39

Message:
Please close bug.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-12 08:50

Message:
I've confirmed that this issue is resolved with kvm-77.

--

Comment By: Marco Menardi (markit)
Date: 2008-10-10 08:02

Message:
I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550,
kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead
(and I've had to revert to 75, of course). Any news? Would love to have
forecoming kvm77 with this blocking bug fixed.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-02 20:06

Message:
kvm-2646c5.tar.gz: Worked fine
kvm-d558461.tar.gz: Failed (showed this bug)

I've never used git before, but if you teach me to fish...

I installed git, pulled the userspace and kernel trees, built kvm-75 and
kvm-76 and got the expected results, but when I did a bisect on kvm-75
(good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build.
configure among other things was missing. What am I doing wrong?

Also, what should I be syncing my kernel tree to when I am bisecting the
userspace tree?

Thanks.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 12:27

Message:
Are you using git? If so, can you bisect to find out who the culprit is?

If not, I've managed to archive two strategic commits you should try:

http://glommer.net/kvm-2646c5.tar.gz  and
http://glommer.net/kvm-d558461.tar.gz

please report success or failure with them

thanks!

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-02 11:48

Message:
I applied the patch to kvm-76 and ran into basically the same problem. The
guest still hung during boot and I got the plume of kvm: get_dirty_pages
returned -2 errors, but the first message kvm_create_phys_mem: File
existsset_vram_mapping: cannot allocate memory:
File exists wasn't displayed.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 09:01

Message:
can you please test the patch at http://glommer.net/band-aid.patch ?

--

Comment By: Brian Jackson (iggy_cav)
Date: 2008-09-30 10:06

Message:
This was reported on the mailing list. It's a problem with sdl output. Not
specific to any guest. Until the problem is fixed, I'd suggest using vnc
output.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2353811 ] Solaris 10 guest unstable

2008-12-02 Thread SourceForge.net
Bugs item #2353811, was opened at 2008-11-27 17:44
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2353811group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: krwi (krwi)
Assigned to: Nobody/Anonymous (nobody)
Summary: Solaris 10 guest unstable

Initial Comment:
After several minutes of normal work Solaris guest hangs for few seconds. 
Sometimes system hangs completely and kvm proccess must be killed. Besides this 
host CPU utilization ist much higher than could be expected from guest uasage.

Host:
2x Quad Core Opteron 8356
64GB RAM
OS: Gentoo 64bit
kernel: 2.6.26
KVM-79 (modules from package not from kernel)

Guest:
Solaris 10u5 started with command:
kvm -M pc -m 4096 -smp 2 -name despina -monitor pty -boot c -drive 
file=/dev/MDvg_SystemVM/despina,if=ide,index=0,boot=on -drive 
file=/dev/MDvg_DataVM/despina30,if=ide,index=1 -net 
nic,macaddr=00:16:3e:5f:64:10,vlan=0,model=e1000 -net 
tap,fd=38,script=,vlan=0,ifname=vnet8 -serial pty -parallel none

Usind -no-acpi switch doesn't help.

On the same host I running several other guests systems including Win2003
Server sp2 32 and 64bit, WinXP sp2 32bit, Gentoo 64bit and Debian Lenny 32bit 
without problem
like this.

--

Comment By: Technologov (technologov)
Date: 2008-12-02 15:46

Message:
From my testing, Yes, 64-bit Solaris guest is problematic, and requires
patching for KVM to work, but Solaris 32-bit guest should work fine.

Can you tell us which Solaris you use ? (32 or 64-bit)

-Alexey

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2353811group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2088475 ] OpenSuse10.2 can not be installed

2008-12-02 Thread SourceForge.net
Bugs item #2088475, was opened at 2008-09-02 11:37
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Jiajun Xu (jiajun)
Assigned to: Nobody/Anonymous (nobody)
Summary: OpenSuse10.2 can not be installed

Initial Comment:
OpenSuse10.2 can not be installed on KVM. Installer will stop after loading 
ISOLinux.
It is against latest kvm comit, kvm.git 
:5b9207ec01681337786c7898ffc0165ec4e7c2e4
userspace.git :5f2a9719f105e29fbde4529cf919a5351b05da9a.


--

Comment By: Technologov (technologov)
Date: 2008-12-02 15:58

Message:
It crashed with old KVMs, but with newer it just stucks. Doesn't matters.

And yes, openSUSE 11.0 tested to work.

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-10-16 17:23

Message:
From the bug description, opensuse11.0 should work?
And we did not meet guest crash when installation, guest hangs when
loading grub and no any error messages printed.

--

Comment By: Technologov (technologov)
Date: 2008-10-16 17:04

Message:
Known issue:
https://sourceforge.net/tracker/index.php?func=detailaid=1760424group_id=180599atid=893831

This bug is duplicate.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL][RESEND] KVM updates for Linux 2.6.28-rc6

2008-12-02 Thread Avi Kivity

Linus, please pull some kvm fixes from repo and branch at:

  git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git 
kvm-updates/2.6.28


There are a couple of fixes for the out-of-sync mmu, a fix for a lost 
irq while injecting an nmi (which causes guests with an nmi watchdog to 
hang), as well as fixes for the ppc, s390, and ia64 kvm ports.


Avi Kivity (1):
  KVM: VMX: Fix interrupt loss during race with NMI

Christian Borntraeger (1):
  KVM: s390: Fix problem state handling in guest sigp handler

Hollis Blanchard (1):
  KVM: ppc: stop leaking host memory on VM exit

Marcelo Tosatti (2):
  KVM: MMU: fix sync of ptes addressed at owner pagetable
  KVM: MMU: avoid creation of unreachable pages in the shadow

Xiantao Zhang (2):
  KVM: ia64: Fix incorrect kbuild CFLAGS override
  KVM: ia64: Fix: Use correct calling convention for 
PAL_VPS_RESUME_HANDLER


 arch/ia64/kvm/Makefile |2 +-
 arch/ia64/kvm/optvfault.S  |   11 +++
 arch/powerpc/include/asm/kvm_ppc.h |2 ++
 arch/powerpc/kvm/44x_tlb.c |8 
 arch/powerpc/kvm/powerpc.c |1 +
 arch/s390/kvm/sigp.c   |5 +
 arch/x86/kvm/mmu.c |2 +-
 arch/x86/kvm/paging_tmpl.h |1 +
 arch/x86/kvm/vmx.c |4 +++-
 9 files changed, 29 insertions(+), 7 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76

2008-12-02 Thread SourceForge.net
Bugs item #2138166, was opened at 2008-09-30 15:39
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: John Rousseau (johnrrousseau)
Assigned to: Nobody/Anonymous (nobody)
Summary: Vista guest fails to start on kvm-76

Initial Comment:
CPU: Intel(R) Core(TM)2 Duo CPU T7250  @ 2.00GHz
Build: kvm-76
Host kernel: 2.6.26.3-29.fc9.x86_64
Host arch: x86_64
Guest: Windows Vista Ultimate 64-bit
QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net 
nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga 
-full-screen -smp 2

I've been running this guest on this host with kvm-75 without difficulty. 
kvm-76, built the same way that kvm-75 was (and on the same machine), fails to 
start my guest. The guest window is up, but the guest fails to complete startup.

Command line output is:
kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File 
exists
set_vram_mapping failed
kvm: get_dirty_pages returned -2

The last line repeats hundreds of times. 

--

Comment By: Technologov (technologov)
Date: 2008-12-02 16:02

Message:
Please close bug.

--

Comment By: Technologov (technologov)
Date: 2008-12-02 15:39

Message:
Please close bug.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-12 14:50

Message:
I've confirmed that this issue is resolved with kvm-77.

--

Comment By: Marco Menardi (markit)
Date: 2008-10-10 14:02

Message:
I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550,
kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead
(and I've had to revert to 75, of course). Any news? Would love to have
forecoming kvm77 with this blocking bug fixed.

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-03 03:06

Message:
kvm-2646c5.tar.gz: Worked fine
kvm-d558461.tar.gz: Failed (showed this bug)

I've never used git before, but if you teach me to fish...

I installed git, pulled the userspace and kernel trees, built kvm-75 and
kvm-76 and got the expected results, but when I did a bisect on kvm-75
(good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build.
configure among other things was missing. What am I doing wrong?

Also, what should I be syncing my kernel tree to when I am bisecting the
userspace tree?

Thanks.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 19:27

Message:
Are you using git? If so, can you bisect to find out who the culprit is?

If not, I've managed to archive two strategic commits you should try:

http://glommer.net/kvm-2646c5.tar.gz  and
http://glommer.net/kvm-d558461.tar.gz

please report success or failure with them

thanks!

--

Comment By: John Rousseau (johnrrousseau)
Date: 2008-10-02 18:48

Message:
I applied the patch to kvm-76 and ran into basically the same problem. The
guest still hung during boot and I got the plume of kvm: get_dirty_pages
returned -2 errors, but the first message kvm_create_phys_mem: File
existsset_vram_mapping: cannot allocate memory:
File exists wasn't displayed.

--

Comment By: Glauber de Oliveira Costa (glommer)
Date: 2008-10-02 16:01

Message:
can you please test the patch at http://glommer.net/band-aid.patch ?

--

Comment By: Brian Jackson (iggy_cav)
Date: 2008-09-30 17:06

Message:
This was reported on the mailing list. It's a problem with sdl output. Not
specific to any guest. Until the problem is fixed, I'd suggest using vnc
output.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/12] VT-d: Support multiple device assignment for KVM

2008-12-02 Thread Joerg Roedel
From: Weidong Han [EMAIL PROTECTED]

In order to support multiple device assignment for KVM, this patch does
following main changes:

- extend dmar_domain to own multiple devices from different iommus,
  use a bitmap of iommus to replace iommu pointer in dmar_domain.
- implement independent low level functions for kvm, then won't
  impact native VT-d.
- SAGAW capability may be different across iommus, that's to
  say the VT-d page table levels may be different among iommus.
  This patch uses a defaut agaw, and skip top levels of page
  tables for iommus which have smaller agaw than default.
- rename the APIs for kvm VT-d, make it more readable.

[Joerg: coding style cleanups]

Signed-off-by: Weidong Han [EMAIL PROTECTED]
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/pci/dmar.c|   15 +-
 drivers/pci/intel-iommu.c |  696 ++--
 include/linux/dma_remapping.h |   21 +-
 include/linux/dmar.h  |2 +
 include/linux/intel-iommu.h   |   21 +-
 5 files changed, 636 insertions(+), 119 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..d54d3db 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -484,13 +484,14 @@ void __init detect_intel_iommu(void)
dmar_tbl = NULL;
 }
 
-
 int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
struct intel_iommu *iommu;
int map_size;
u32 ver;
static int iommu_allocated = 0;
+   unsigned long sagaw;
+   int agaw;
 
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +507,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu-cap = dmar_readq(iommu-reg + DMAR_CAP_REG);
iommu-ecap = dmar_readq(iommu-reg + DMAR_ECAP_REG);
 
+   /* set agaw, SAGAW may be different across iommus */
+   sagaw = cap_sagaw(iommu-cap);
+   for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+agaw = 0; agaw--)
+   if (test_bit(agaw, sagaw))
+   break;
+   if (agaw  0) {
+   printk(KERN_ERR IOMMU: unsupported sagaw %lx\n, sagaw);
+   goto error;
+   }
+   iommu-agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu-ecap),
cap_max_fault_reg_offset(iommu-cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..7f12852 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -50,8 +50,6 @@
 #define IOAPIC_RANGE_END   (0xfeef)
 #define IOVA_START_ADDR(0x1000)
 
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
-
 #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
 
 
@@ -64,6 +62,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+   struct intel_iommu *iommu;
 };
 
 static struct deferred_flush_tables *deferred_flush;
@@ -184,6 +183,68 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
 }
 
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_only_iommu(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+   if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp))
+   return drhd-iommu;
+   }
+
+   return NULL;
+}
+
+static void domain_flush_cache(struct dmar_domain *domain,
+  void *addr, int size)
+{
+   struct intel_iommu *iommu;
+
+   if (domain-flags  DOMAIN_FLAG_VIRTUAL_MACHINE) {
+   struct dmar_drhd_unit *drhd;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+   iommu = drhd-iommu;
+
+   if (!test_bit(iommu-seq_id, domain-iommu_bmp))
+   continue;
+
+   if (!ecap_coherent(iommu-ecap))
+   clflush_cache_range(addr, size);
+   }
+   } else {
+   iommu = domain_get_only_iommu(domain);
+   if (iommu  !ecap_coherent(iommu-ecap))
+   clflush_cache_range(addr, size);
+   }
+}
+
+static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn)
+{
+   struct dmar_drhd_unit *drhd = NULL;
+   int i;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+
+   for (i = 0; i  drhd-devices_cnt; i++)
+   if (drhd-devices[i]-bus-number == bus 
+   drhd-devices[i]-devfn == devfn)
+   return drhd-iommu;
+
+   if (drhd-include_all)
+   return drhd-iommu;
+   

Re: [PATCH] extboot: properly set int 0x13 return value

2008-12-02 Thread Avi Kivity

Glauber Costa wrote:

Callers of int 0x13 usually rely on the carry flag being
clear/set to indicate the status of the interrupt execution.

However, our current code clear or set the flags register,
which is totally useless. Whichever value it has, will
be overwritten by the flags value _before_ the interrupt, due to
the iret instruction.

This fixes a bug that prevents slackware (and possibly win2k, untested)
to boot.

  


Applied, thanks.

 
-.macro clc

-   push %ax
-   pushf
-   pop %ax
-   and $(~FLAGS_CF), %ax
-   push %ax
-   popf
-   pop %ax
  


Anthony, any reason you did not use the 'clc' instruction instead of a 
macro?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] extboot: properly set int 0x13 return value

2008-12-02 Thread Anthony Liguori

Avi Kivity wrote:

Applied, thanks.

 
-.macro clc

-push %ax
-pushf
-pop %ax
-and $(~FLAGS_CF), %ax
-push %ax
-popf
-pop %ax
  


Anthony, any reason you did not use the 'clc' instruction instead of a 
macro?


Propensity for pain?

I have no idea.  I assume I had a reason at the time.  Probably debug 
related.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/13] [v3] Support VT-d multiple device assignment for KVM

2008-12-02 Thread Han, Weidong
This patchset supports VT-d multiple device assignment for KVM.

Main changes from V2 to V3:
- change intel iommu APIs, which are consistent with Joerg's generic iommu APIs.
- split to a serial smaller patches for easy reviewing.

Regards,
Weidong--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/13] move page table handling utility functions

2008-12-02 Thread Han, Weidong
move page table handling utility functions from intel-iommu.c to 
dma_remapping.h, because some of them will be used in other .c files.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |   45 
 include/linux/dma_remapping.h |   46 -
 2 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 39c5e9d..a18e0b4 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -288,51 +288,6 @@ out:
spin_unlock_irqrestore(iommu-lock, flags);
 }
 
-/* page table handling */
-#define LEVEL_STRIDE   (9)
-#define LEVEL_MASK (((u64)1  LEVEL_STRIDE) - 1)
-
-static inline int agaw_to_level(int agaw)
-{
-   return agaw + 2;
-}
-
-static inline int agaw_to_width(int agaw)
-{
-   return 30 + agaw * LEVEL_STRIDE;
-
-}
-
-static inline int width_to_agaw(int width)
-{
-   return (width - 30) / LEVEL_STRIDE;
-}
-
-static inline unsigned int level_to_offset_bits(int level)
-{
-   return (12 + (level - 1) * LEVEL_STRIDE);
-}
-
-static inline int address_level_offset(u64 addr, int level)
-{
-   return ((addr  level_to_offset_bits(level))  LEVEL_MASK);
-}
-
-static inline u64 level_mask(int level)
-{
-   return ((u64)-1  level_to_offset_bits(level));
-}
-
-static inline u64 level_size(int level)
-{
-   return ((u64)1  level_to_offset_bits(level));
-}
-
-static inline u64 align_to_level(u64 addr, int level)
-{
-   return ((addr + level_size(level) - 1)  level_mask(level));
-}
-
 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
 {
int addr_width = agaw_to_width(domain-agaw);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 66f7887..eeb8243 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -13,6 +13,50 @@
 #define DMA_32BIT_PFN  IOVA_PFN(DMA_32BIT_MASK)
 #define DMA_64BIT_PFN  IOVA_PFN(DMA_64BIT_MASK)
 
+/* page table handling */
+#define VTD_LEVEL_STRIDE   (9)
+#define VTD_LEVEL_MASK (((u64)1  VTD_LEVEL_STRIDE) - 1)
+
+static inline int agaw_to_level(int agaw)
+{
+   return agaw + 2;
+}
+
+static inline int agaw_to_width(int agaw)
+{
+   return 30 + agaw * VTD_LEVEL_STRIDE;
+}
+
+static inline int width_to_agaw(int width)
+{
+   return (width - 30) / VTD_LEVEL_STRIDE;
+}
+
+static inline unsigned int level_to_offset_bits(int level)
+{
+   return 12 + (level - 1) * VTD_LEVEL_STRIDE;
+}
+
+static inline int address_level_offset(u64 addr, int level)
+{
+   return (addr  level_to_offset_bits(level))  VTD_LEVEL_MASK;
+}
+
+static inline u64 level_mask(int level)
+{
+   return (u64)-1  level_to_offset_bits(level);
+}
+
+static inline u64 level_size(int level)
+{
+   return (u64)1  level_to_offset_bits(level);
+}
+
+static inline u64 align_to_level(u64 addr, int level)
+{
+   return (addr + level_size(level) - 1)  level_mask(level);
+}
+
 
 /*
  * 0: Present
@@ -27,7 +71,7 @@ struct root_entry {
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 static inline bool root_present(struct root_entry *root)
 {
-   return (root-val  1);
+   return root-val  1;
 }
 static inline void set_root_present(struct root_entry *root)
 {
-- 
1.5.1


0002-move-page-table-handling-utility-functions.patch
Description: 0002-move-page-table-handling-utility-functions.patch


[PATCH 01/13] iommu bitmap insteads of iommu pointer in dmar_domain

2008-12-02 Thread Han, Weidong
Support dmar_domain own multiple devices from different iommus, which are set 
in iommu bitmap. add function domain_get_iommu() to get the only one iommu of 
domain in native VT-d usage.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |  102 
 include/linux/dma_remapping.h |2 +-
 2 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa4..39c5e9d 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -64,6 +64,7 @@ struct deferred_flush_tables {
int next;
struct iova *iova[HIGH_WATER_MARK];
struct dmar_domain *domain[HIGH_WATER_MARK];
+   struct intel_iommu *iommu;
 };
 
 static struct deferred_flush_tables *deferred_flush;
@@ -184,6 +185,21 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
 }
 
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+   if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp))
+   return drhd-iommu;
+   }
+
+   return NULL;
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu 
*iommu,
u8 bus, u8 devfn)
@@ -324,6 +340,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
int level = agaw_to_level(domain-agaw);
int offset;
unsigned long flags;
+   struct intel_iommu *iommu = domain_get_iommu(domain);
 
BUG_ON(!domain-pgd);
 
@@ -347,7 +364,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
flags);
return NULL;
}
-   __iommu_flush_cache(domain-iommu, tmp_page,
+   __iommu_flush_cache(iommu, tmp_page,
PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
@@ -356,7 +373,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
 */
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
-   __iommu_flush_cache(domain-iommu, pte, sizeof(*pte));
+   __iommu_flush_cache(iommu, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
level--;
@@ -393,13 +410,14 @@ static struct dma_pte *dma_addr_level_pte(struct 
dmar_domain *domain, u64 addr,
 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
 {
struct dma_pte *pte = NULL;
+   struct intel_iommu *iommu = domain_get_iommu(domain);
 
/* get last level pte */
pte = dma_addr_level_pte(domain, addr, 1);
 
if (pte) {
dma_clear_pte(*pte);
-   __iommu_flush_cache(domain-iommu, pte, sizeof(*pte));
+   __iommu_flush_cache(iommu, pte, sizeof(*pte));
}
 }
 
@@ -428,6 +446,7 @@ static void dma_pte_free_pagetable(struct dmar_domain 
*domain,
int addr_width = agaw_to_width(domain-agaw);
struct dma_pte *pte;
int total = agaw_to_level(domain-agaw);
+   struct intel_iommu *iommu = domain_get_iommu(domain);
int level;
u64 tmp;
 
@@ -447,7 +466,7 @@ static void dma_pte_free_pagetable(struct dmar_domain 
*domain,
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
-   __iommu_flush_cache(domain-iommu,
+   __iommu_flush_cache(iommu,
pte, sizeof(*pte));
}
tmp += level_size(level);
@@ -1006,7 +1025,8 @@ static struct dmar_domain * iommu_alloc_domain(struct 
intel_iommu *iommu)
 
set_bit(num, iommu-domain_ids);
domain-id = num;
-   domain-iommu = iommu;
+   memset(domain-iommu_bmp, 0, sizeof(unsigned long));
+   set_bit(iommu-seq_id, domain-iommu_bmp);
iommu-domains[num] = domain;
spin_unlock_irqrestore(iommu-lock, flags);
 
@@ -1016,10 +1036,12 @@ static struct dmar_domain * iommu_alloc_domain(struct 
intel_iommu *iommu)
 static void iommu_free_domain(struct dmar_domain *domain)
 {
unsigned long flags;
+   struct intel_iommu *iommu;
 
-   spin_lock_irqsave(domain-iommu-lock, flags);
-   clear_bit(domain-id, domain-iommu-domain_ids);
-   

[PATCH 03/13] set iommu agaw

2008-12-02 Thread Han, Weidong
agaw may be different across iommus.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/dmar.c|   14 ++
 include/linux/dma_remapping.h |2 ++
 include/linux/intel-iommu.h   |1 +
 3 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3ad..ebcc7c2 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -491,6 +491,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int map_size;
u32 ver;
static int iommu_allocated = 0;
+   unsigned long sagaw;
+   int agaw;
 
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +508,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu-cap = dmar_readq(iommu-reg + DMAR_CAP_REG);
iommu-ecap = dmar_readq(iommu-reg + DMAR_ECAP_REG);
 
+   /* set agaw, SAGAW may be different across iommus */
+   sagaw = cap_sagaw(iommu-cap);
+   for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+agaw = 0; agaw--)
+   if (test_bit(agaw, sagaw))
+   break;
+   if (agaw  0) {
+   printk(KERN_ERR IOMMU: unsupported sagaw %lx\n, sagaw);
+   goto error;
+   }
+   iommu-agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu-ecap),
cap_max_fault_reg_offset(iommu-cap));
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index eeb8243..c9d99c9 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -1,6 +1,8 @@
 #ifndef _DMA_REMAPPING_H
 #define _DMA_REMAPPING_H
 
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
 /*
  * VT-d hardware uses 4KiB page size regardless of host page size.
  */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cf..24a2945 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -299,6 +299,7 @@ struct intel_iommu {
struct dmar_domain **domains; /* ptr to domains */
spinlock_t  lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
+   int agaw; /* agaw of this iommu */
 
unsigned int irq;
unsigned char name[7];/* Device Name */
-- 
1.5.1


0003-set-iommu-agaw.patch
Description: 0003-set-iommu-agaw.patch


[PATCH 04/13] iommu coherency

2008-12-02 Thread Han, Weidong
in dmar_domain, more than one iommus may be included in iommu_bmp. Due to 
Coherency capability may be different across iommus, set this variable to 
indicate iommu access is coherent or not. Only when all related iommus in a 
dmar_domain are all coherent, iommu access of this domain is coherent.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |6 ++
 include/linux/dma_remapping.h |2 ++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index a18e0b4..fa1507b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -982,6 +982,12 @@ static struct dmar_domain * iommu_alloc_domain(struct 
intel_iommu *iommu)
domain-id = num;
memset(domain-iommu_bmp, 0, sizeof(unsigned long));
set_bit(iommu-seq_id, domain-iommu_bmp);
+
+   if (ecap_coherent(iommu-ecap))
+   domain-iommu_coherency = 1;
+   else
+   domain-iommu_coherency = 0;
+
iommu-domains[num] = domain;
spin_unlock_irqrestore(iommu-lock, flags);
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index c9d99c9..add2111 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -175,6 +175,8 @@ struct dmar_domain {
 
 #define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
+
+   int iommu_coherency;/* iommu access is coherent or not */
 };
 
 /* PCI domain-device relationship */
-- 
1.5.1


0004-iommu-coherency.patch
Description: 0004-iommu-coherency.patch


[PATCH 05/13] add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE

2008-12-02 Thread Han, Weidong
By default, one domain owns one device, like native VT-d usage.

For kvm VT-d usage, more than one devices across iommus may be assigned to one 
domain, flag DOMAIN_FLAG_VIRTUAL_MACHINE is for this usage.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |3 ++-
 include/linux/dma_remapping.h |   11 ++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fa1507b..09a5150 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -989,6 +989,7 @@ static struct dmar_domain * iommu_alloc_domain(struct 
intel_iommu *iommu)
domain-iommu_coherency = 0;
 
iommu-domains[num] = domain;
+   domain-flags = 0;
spin_unlock_irqrestore(iommu-lock, flags);
 
return domain;
@@ -1387,7 +1388,7 @@ static struct dmar_domain *get_domain_for_dev(struct 
pci_dev *pdev, int gaw)
info-dev = NULL;
info-domain = domain;
/* This domain is shared by devices under p2p bridge */
-   domain-flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+   domain-flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
 
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index add2111..9e39c99 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -159,6 +159,16 @@ struct dma_pte {
 
 struct intel_iommu;
 
+/* domain flags, one domain owns one device by default */
+
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES   (1  0)
+
+/* domain represents a virtual machine, more than one devices
+ * across iommus may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE(1  1)
+
 struct dmar_domain {
int id; /* domain id */
unsigned long iommu_bmp;/* bitmap of iommus this domain uses*/
@@ -173,7 +183,6 @@ struct dmar_domain {
/* adjusted guest address width, 0 is level 2 30-bit */
int agaw;
 
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
int flags;
 
int iommu_coherency;/* iommu access is coherent or not */
-- 
1.5.1


0005-add-domain-flag-DOMAIN_FLAG_VIRTUAL_MACHINE.patch
Description: 0005-add-domain-flag-DOMAIN_FLAG_VIRTUAL_MACHINE.patch


[PATCH 06/13] add/remove domain device info for virtual machine VT-d

2008-12-02 Thread Han, Weidong
Separate add/remove domain device info functions for virtual machine VT-d from 
natvie VT-d.
 
Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |  164 +++-
 include/linux/dma_remapping.h |1 +
 2 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 09a5150..429aff4 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -200,6 +200,27 @@ static struct intel_iommu *domain_get_iommu(struct 
dmar_domain *domain)
return NULL;
 }
 
+static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn)
+{
+   struct dmar_drhd_unit *drhd = NULL;
+   int i;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+
+   for (i = 0; i  drhd-devices_cnt; i++)
+   if (drhd-devices[i]-bus-number == bus 
+   drhd-devices[i]-devfn == devfn)
+   return drhd-iommu;
+
+   if (drhd-include_all)
+   return drhd-iommu;
+   }
+
+   return NULL;
+}
+
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu 
*iommu,
u8 bus, u8 devfn)
@@ -934,7 +955,8 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i  cap_ndoms(iommu-cap); ) {
domain = iommu-domains[i];
clear_bit(i, iommu-domain_ids);
-   domain_exit(domain);
+   if (--domain-iommu_count == 0)
+   domain_exit(domain);
i = find_next_bit(iommu-domain_ids,
cap_ndoms(iommu-cap), i+1);
}
@@ -990,6 +1012,7 @@ static struct dmar_domain * iommu_alloc_domain(struct 
intel_iommu *iommu)
 
iommu-domains[num] = domain;
domain-flags = 0;
+   domain-iommu_count = 1;
spin_unlock_irqrestore(iommu-lock, flags);
 
return domain;
@@ -1269,9 +1292,12 @@ domain_page_mapping(struct dmar_domain *domain, 
dma_addr_t iova,
return 0;
 }
 
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void iommu_detach_dev(u8 bus, u8 devfn)
 {
-   struct intel_iommu *iommu = domain_get_iommu(domain);
+   struct intel_iommu *iommu = device_find_matched_iommu(bus, devfn);
+
+   if (!iommu)
+   return;
 
clear_context_table(iommu, bus, devfn);
iommu-flush.flush_context(iommu, 0, 0, 0,
@@ -1295,7 +1321,7 @@ static void domain_remove_dev_info(struct dmar_domain 
*domain)
info-dev-dev.archdata.iommu = NULL;
spin_unlock_irqrestore(device_domain_lock, flags);
 
-   detach_domain_for_dev(info-domain, info-bus, info-devfn);
+   iommu_detach_dev(info-bus, info-devfn);
free_devinfo_mem(info);
 
spin_lock_irqsave(device_domain_lock, flags);
@@ -2330,6 +2356,134 @@ int __init intel_iommu_init(void)
return 0;
 }
 
+/* Coherency capability may be different across iommus */
+static void domain_update_iommu_coherency(struct dmar_domain *domain)
+{
+   struct dmar_drhd_unit *drhd;
+
+   domain-iommu_coherency = 1;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+   if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp)) {
+   if (!ecap_coherent(drhd-iommu-ecap)) {
+   domain-iommu_coherency = 0;
+   break;
+   }
+   }
+   }
+}
+
+static int vm_domain_add_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+   struct device_domain_info *info;
+   unsigned long flags;
+
+   info = alloc_devinfo_mem();
+   if (!info)
+   return -ENOMEM;
+
+   info-bus = pdev-bus-number;
+   info-devfn = pdev-devfn;
+   info-dev = pdev;
+   info-domain = domain;
+
+   spin_lock_irqsave(device_domain_lock, flags);
+   list_add(info-link, domain-devices);
+   list_add(info-global, device_domain_list);
+   pdev-dev.archdata.iommu = info;
+   spin_unlock_irqrestore(device_domain_lock, flags);
+
+   return 0;
+}
+
+static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+   struct device_domain_info *info;
+   struct intel_iommu *iommu;
+   unsigned long flags;
+   int found = 0;
+
+   iommu = device_find_matched_iommu(pdev-bus-number, pdev-devfn);
+
+   spin_lock_irqsave(device_domain_lock, flags);
+   while (!list_empty(domain-devices)) {
+   info = list_entry(domain-devices.next,
+   struct device_domain_info, link);
+   if (info-bus 

[PATCH 07/13] add domain_flush_cache

2008-12-02 Thread Han, Weidong
For some common low level functions which will be also used by virtual machine 
usage, use domain_flush_cache instead of __iommu_flush_cache.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |   40 
 1 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 429aff4..b00a8f2 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -200,6 +200,13 @@ static struct intel_iommu *domain_get_iommu(struct 
dmar_domain *domain)
return NULL;
 }
 
+static void domain_flush_cache(struct dmar_domain *domain,
+  void *addr, int size)
+{
+   if (!domain-iommu_coherency)
+   clflush_cache_range(addr, size);
+}
+
 static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn)
 {
struct dmar_drhd_unit *drhd = NULL;
@@ -316,7 +323,6 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
int level = agaw_to_level(domain-agaw);
int offset;
unsigned long flags;
-   struct intel_iommu *iommu = domain_get_iommu(domain);
 
BUG_ON(!domain-pgd);
 
@@ -340,8 +346,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
flags);
return NULL;
}
-   __iommu_flush_cache(iommu, tmp_page,
-   PAGE_SIZE);
+   domain_flush_cache(domain, tmp_page, PAGE_SIZE);
dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
/*
 * high level table always sets r/w, last level page
@@ -349,7 +354,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain 
*domain, u64 addr)
 */
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
-   __iommu_flush_cache(iommu, pte, sizeof(*pte));
+   domain_flush_cache(domain, pte, sizeof(*pte));
}
parent = phys_to_virt(dma_pte_addr(*pte));
level--;
@@ -386,14 +391,13 @@ static struct dma_pte *dma_addr_level_pte(struct 
dmar_domain *domain, u64 addr,
 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
 {
struct dma_pte *pte = NULL;
-   struct intel_iommu *iommu = domain_get_iommu(domain);
 
/* get last level pte */
pte = dma_addr_level_pte(domain, addr, 1);
 
if (pte) {
dma_clear_pte(*pte);
-   __iommu_flush_cache(iommu, pte, sizeof(*pte));
+   domain_flush_cache(domain, pte, sizeof(*pte));
}
 }
 
@@ -422,7 +426,6 @@ static void dma_pte_free_pagetable(struct dmar_domain 
*domain,
int addr_width = agaw_to_width(domain-agaw);
struct dma_pte *pte;
int total = agaw_to_level(domain-agaw);
-   struct intel_iommu *iommu = domain_get_iommu(domain);
int level;
u64 tmp;
 
@@ -442,8 +445,7 @@ static void dma_pte_free_pagetable(struct dmar_domain 
*domain,
free_pgtable_page(
phys_to_virt(dma_pte_addr(*pte)));
dma_clear_pte(*pte);
-   __iommu_flush_cache(iommu,
-   pte, sizeof(*pte));
+   domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
@@ -1158,12 +1160,16 @@ static int domain_context_mapping_one(struct 
dmar_domain *domain,
u8 bus, u8 devfn)
 {
struct context_entry *context;
-   struct intel_iommu *iommu = domain_get_iommu(domain);
+   struct intel_iommu *iommu;
unsigned long flags;
 
pr_debug(Set context mapping for %02x:%02x.%d\n,
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
BUG_ON(!domain-pgd);
+
+   iommu = device_find_matched_iommu(bus, devfn);
+   if (!iommu)
+   return -ENODEV;
context = device_to_context_entry(iommu, bus, devfn);
if (!context)
return -ENOMEM;
@@ -1225,12 +1231,15 @@ domain_context_mapping(struct dmar_domain *domain, 
struct pci_dev *pdev)
tmp-bus-number, tmp-devfn);
 }
 
-static int domain_context_mapped(struct dmar_domain *domain,
-   struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
 {
int ret;
struct pci_dev *tmp, *parent;
-   struct intel_iommu *iommu = domain_get_iommu(domain);
+   struct intel_iommu *iommu;
+
+   iommu = device_find_matched_iommu(pdev-bus-number, pdev-devfn);
+   if (!iommu)
+   return -ENODEV;
 
ret = device_context_mapped(iommu,

[PATCH 08/13] allocation and free functions of virtual machine domain

2008-12-02 Thread Han, Weidong

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |  104 -
 1 files changed, 103 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index b00a8f2..e96b3bc 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -947,6 +947,7 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 
 
 static void domain_exit(struct dmar_domain *domain);
+static void vm_domain_exit(struct dmar_domain *domain);
 
 void free_dmar_iommu(struct intel_iommu *iommu)
 {
@@ -957,8 +958,13 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i  cap_ndoms(iommu-cap); ) {
domain = iommu-domains[i];
clear_bit(i, iommu-domain_ids);
-   if (--domain-iommu_count == 0)
+
+   if (domain-flags  DOMAIN_FLAG_VIRTUAL_MACHINE) {
+   if (--domain-iommu_count == 0)
+   vm_domain_exit(domain);
+   } else
domain_exit(domain);
+
i = find_next_bit(iommu-domain_ids,
cap_ndoms(iommu-cap), i+1);
}
@@ -2492,6 +2498,102 @@ static void vm_domain_remove_all_dev_info(struct 
dmar_domain *domain)
spin_unlock_irqrestore(device_domain_lock, flags);
 }
 
+/* domain id for virtual machine, it won't be set in context */
+static unsigned long vm_domid;
+
+static struct dmar_domain *iommu_alloc_vm_domain(void)
+{
+   struct dmar_domain *domain;
+
+   domain = alloc_domain_mem();
+   if (!domain)
+   return NULL;
+
+   domain-id = vm_domid++;
+   domain-iommu_count = 0;
+   domain-iommu_coherency = 0;
+   memset(domain-iommu_bmp, 0, sizeof(unsigned long));
+   domain-flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+
+   return domain;
+}
+
+static int vm_domain_init(struct dmar_domain *domain, int guest_width)
+{
+   int adjust_width;
+
+   init_iova_domain(domain-iovad, DMA_32BIT_PFN);
+   spin_lock_init(domain-mapping_lock);
+
+   domain_reserve_special_ranges(domain);
+
+   /* calculate AGAW */
+   domain-gaw = guest_width;
+   adjust_width = guestwidth_to_adjustwidth(guest_width);
+   domain-agaw = width_to_agaw(adjust_width);
+
+   INIT_LIST_HEAD(domain-devices);
+
+   /* always allocate the top pgd */
+   domain-pgd = (struct dma_pte *)alloc_pgtable_page();
+   if (!domain-pgd)
+   return -ENOMEM;
+   domain_flush_cache(domain, domain-pgd, PAGE_SIZE);
+   return 0;
+}
+
+static void iommu_free_vm_domain(struct dmar_domain *domain)
+{
+   unsigned long flags;
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+   unsigned long i;
+   unsigned long ndomains;
+
+   for_each_drhd_unit(drhd) {
+   if (drhd-ignored)
+   continue;
+   iommu = drhd-iommu;
+
+   ndomains = cap_ndoms(iommu-cap);
+   i = find_first_bit(iommu-domain_ids, ndomains);
+   for (; i  ndomains; ) {
+   if (iommu-domains[i] == domain) {
+   spin_lock_irqsave(iommu-lock, flags);
+   clear_bit(i, iommu-domain_ids);
+   iommu-domains[i] = NULL;
+   spin_unlock_irqrestore(iommu-lock, flags);
+   break;
+   }
+   i = find_next_bit(iommu-domain_ids, ndomains, i+1);
+   }
+   }
+}
+
+static void vm_domain_exit(struct dmar_domain *domain)
+{
+   u64 end;
+
+   /* Domain 0 is reserved, so dont process it */
+   if (!domain)
+   return;
+
+   vm_domain_remove_all_dev_info(domain);
+   /* destroy iovas */
+   put_iova_domain(domain-iovad);
+   end = DOMAIN_MAX_ADDR(domain-gaw);
+   end = end  (~VTD_PAGE_MASK);
+
+   /* clear ptes */
+   dma_pte_clear_range(domain, 0, end);
+
+   /* free page tables */
+   dma_pte_free_pagetable(domain, 0, end);
+
+   iommu_free_vm_domain(domain);
+   free_domain_mem(domain);
+}
+
 void intel_iommu_domain_exit(struct dmar_domain *domain)
 {
u64 end;
-- 
1.5.1


0008-allocation-and-free-functions-of-virtual-machine-do.patch
Description: 0008-allocation-and-free-functions-of-virtual-machine-do.patch


[PATCH 09/13] change domain_context_mapping_one for virtual machine domain

2008-12-02 Thread Han, Weidong
vm_domid won't be set in context, find available domain id for a device from 
its iommu.

For a virtual machine domain, a default agaw will be set, and skip top levels 
of page tables for iommu which has less agaw than default.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |   57 ++--
 1 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e96b3bc..3f987d7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1168,6 +1168,11 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
struct context_entry *context;
struct intel_iommu *iommu;
unsigned long flags;
+   struct dma_pte *pgd;
+   unsigned long num;
+   unsigned long ndomains;
+   int id;
+   int agaw;
 
pr_debug(Set context mapping for %02x:%02x.%d\n,
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1185,9 +1190,55 @@ static int domain_context_mapping_one(struct dmar_domain 
*domain,
return 0;
}
 
-   context_set_domain_id(*context, domain-id);
-   context_set_address_width(*context, domain-agaw);
-   context_set_address_root(*context, virt_to_phys(domain-pgd));
+   id = domain-id;
+   pgd = domain-pgd;
+
+   if (domain-flags  DOMAIN_FLAG_VIRTUAL_MACHINE) {
+   int found = 0;
+
+   /* find an available domain id for this device in iommu */
+   ndomains = cap_ndoms(iommu-cap);
+   num = find_first_bit(iommu-domain_ids, ndomains);
+   for (; num  ndomains; ) {
+   if (iommu-domains[num] == domain) {
+   id = num;
+   found = 1;
+   break;
+   }
+   num = find_next_bit(iommu-domain_ids,
+   cap_ndoms(iommu-cap), num+1);
+   }
+
+   if (found == 0) {
+   num = find_first_zero_bit(iommu-domain_ids, ndomains);
+   if (num = ndomains) {
+   spin_unlock_irqrestore(iommu-lock, flags);
+   printk(KERN_ERR IOMMU: no free domain ids\n);
+   return -EFAULT;
+   }
+
+   set_bit(num, iommu-domain_ids);
+   iommu-domains[num] = domain;
+   id = num;
+   }
+
+   /* Skip top levels of page tables for
+* iommu which has less agaw than default.
+*/
+   for (agaw = domain-agaw; agaw != iommu-agaw; agaw--) {
+   pgd = phys_to_virt(dma_pte_addr(*pgd));
+   if (!dma_pte_present(*pgd)) {
+   spin_unlock_irqrestore(iommu-lock, flags);
+   return -ENOMEM;
+   }
+   }
+
+   set_bit(iommu-seq_id, domain-iommu_bmp);
+   }
+
+   context_set_domain_id(*context, id);
+   context_set_address_width(*context, iommu-agaw);
+   context_set_address_root(*context, virt_to_phys(pgd));
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
context_set_fault_enable(*context);
context_set_present(*context);
-- 
1.5.1


0009-change-domain_context_mapping_one-for-virtual-machin.patch
Description: 0009-change-domain_context_mapping_one-for-virtual-machin.patch


[PATCH 10/13] change intel iommu APIs

2008-12-02 Thread Han, Weidong
These APIs will be used by kvm VT-d. The domain used by these APIs is virtual 
machine domain (domain flag is DOMAIN_FLAG_VIRTUAL_MACHINE).

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c   |  128 ---
 include/linux/intel-iommu.h |   20 ---
 2 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3f987d7..0db77e2 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2645,93 +2645,87 @@ static void vm_domain_exit(struct dmar_domain *domain)
free_domain_mem(domain);
 }
 
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+struct dmar_domain *intel_iommu_alloc_domain(void)
 {
-   u64 end;
-
-   /* Domain 0 is reserved, so dont process it */
-   if (!domain)
-   return;
-
-   end = DOMAIN_MAX_ADDR(domain-gaw);
-   end = end  (~VTD_PAGE_MASK);
-
-   /* clear ptes */
-   dma_pte_clear_range(domain, 0, end);
-
-   /* free page tables */
-   dma_pte_free_pagetable(domain, 0, end);
-
-   iommu_free_domain(domain);
-   free_domain_mem(domain);
-}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
-{
-   struct dmar_drhd_unit *drhd;
struct dmar_domain *domain;
-   struct intel_iommu *iommu;
 
-   drhd = dmar_find_matched_drhd_unit(pdev);
-   if (!drhd) {
-   printk(KERN_ERR intel_iommu_domain_alloc: drhd == NULL\n);
-   return NULL;
-   }
-
-   iommu = drhd-iommu;
-   if (!iommu) {
-   printk(KERN_ERR
-   intel_iommu_domain_alloc: iommu == NULL\n);
-   return NULL;
-   }
-   domain = iommu_alloc_domain(iommu);
+   domain = iommu_alloc_vm_domain();
if (!domain) {
printk(KERN_ERR
intel_iommu_domain_alloc: domain == NULL\n);
return NULL;
}
-   if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+   if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
intel_iommu_domain_alloc: domain_init() failed\n);
-   intel_iommu_domain_exit(domain);
+   vm_domain_exit(domain);
return NULL;
}
+
return domain;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-int intel_iommu_context_mapping(
-   struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
 {
-   int rc;
-   rc = domain_context_mapping(domain, pdev);
-   return rc;
+   vm_domain_exit(domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
-int intel_iommu_page_mapping(
-   struct dmar_domain *domain, dma_addr_t iova,
-   u64 hpa, size_t size, int prot)
+int intel_iommu_attach_device(struct dmar_domain *domain,
+ struct pci_dev *pdev)
 {
-   int rc;
-   rc = domain_page_mapping(domain, iova, hpa, size, prot);
-   return rc;
+   int ret;
+
+   /* normally pdev is not mapped */
+   if (unlikely(domain_context_mapped(pdev))) {
+   struct dmar_domain *old_domain;
+
+   old_domain = find_domain(pdev);
+   if (old_domain) {
+   if (domain-flags  DOMAIN_FLAG_VIRTUAL_MACHINE)
+   vm_domain_remove_one_dev_info(old_domain, pdev);
+   else
+   domain_remove_dev_info(old_domain);
+   }
+   }
+
+   ret = domain_context_mapping(domain, pdev);
+   if (ret)
+   return ret;
+
+   ret = vm_domain_add_dev_info(domain, pdev);
+   return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
 
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+void intel_iommu_detach_device(struct dmar_domain *domain,
+  struct pci_dev *pdev)
 {
-   iommu_detach_dev(bus, devfn);
+   vm_domain_remove_one_dev_info(domain, pdev);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
+   u64 hpa, size_t size, int prot)
 {
-   return find_domain(pdev);
+   int ret;
+   ret = domain_page_mapping(domain, iova, hpa, size, prot);
+   return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_map_address);
+
+void intel_iommu_unmap_address(struct dmar_domain *domain,
+  dma_addr_t iova, size_t size)
+{
+   dma_addr_t base;
+
+ 

[PATCH 13/13] KVM: support device assignment

2008-12-02 Thread Han, Weidong
Support device assignment, it can be used in device hotplug.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 include/linux/kvm.h  |5 +
 include/linux/kvm_host.h |8 
 virt/kvm/kvm_main.c  |   42 ++
 virt/kvm/vtd.c   |   24 
 4 files changed, 79 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0997e6f..2904276 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -395,6 +395,9 @@ struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
+#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#define KVM_CAP_DEVICE_DEASSIGNMENT 21
+#endif
 
 /*
  * ioctls for VM fds
@@ -428,6 +431,8 @@ struct kvm_trace_rec {
   struct kvm_assigned_pci_dev)
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
+#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \
+struct kvm_assigned_pci_dev)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aeabd32..cb1d404 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -333,6 +333,8 @@ int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
  struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
  gfn_t base_gfn,
@@ -356,6 +358,12 @@ static inline int kvm_assign_device(struct kvm *kvm,
 {
return 0;
 }
+
+static inline int kvm_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev)
+{
+   return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 43a5236..fe6aba0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -499,6 +499,35 @@ out_free:
 }
 #endif
 
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_pci_dev *assigned_dev)
+{
+   int r = 0;
+   struct kvm_assigned_dev_kernel *match;
+
+   mutex_lock(kvm-lock);
+
+   match = kvm_find_assigned_dev(kvm-arch.assigned_dev_head,
+ assigned_dev-assigned_dev_id);
+   if (!match) {
+   printk(KERN_INFO %s: device hasn't been assigned before, 
+ so cannot be deassigned\n, __func__);
+   r = -EINVAL;
+   goto out;
+   }
+
+   if (assigned_dev-flags  KVM_DEV_ASSIGN_ENABLE_IOMMU)
+   kvm_deassign_device(kvm, match);
+
+   kvm_free_assigned_device(kvm, match);
+
+out:
+   mutex_unlock(kvm-lock);
+   return r;
+}
+#endif
+
 static inline int valid_vcpu(int n)
 {
return likely(n = 0  n  KVM_MAX_VCPUS);
@@ -1838,6 +1867,19 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
 #endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+   case KVM_DEASSIGN_PCI_DEVICE: {
+   struct kvm_assigned_pci_dev assigned_dev;
+
+   r = -EFAULT;
+   if (copy_from_user(assigned_dev, argp, sizeof assigned_dev))
+   goto out;
+   r = kvm_vm_ioctl_deassign_device(kvm, assigned_dev);
+   if (r)
+   goto out;
+   break;
+   }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 44bb58a..174ea1f 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -116,6 +116,30 @@ int kvm_assign_device(struct kvm *kvm,
return 0;
 }
 
+int kvm_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev)
+{
+   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
+   struct pci_dev *pdev = NULL;
+
+   /* check if iommu exists and in use */
+   if (!domain)
+   return 0;
+
+   pdev = assigned_dev-dev;
+   if (pdev == NULL)
+   return -ENODEV;
+
+   intel_iommu_detach_device(domain, pdev);
+
+   printk(KERN_DEBUG deassign device: host bdf = %x:%x:%x\n,
+   assigned_dev-host_busnr,
+   PCI_SLOT(assigned_dev-host_devfn),
+   PCI_FUNC(assigned_dev-host_devfn));
+
+   return 0;
+}
+
 int kvm_iommu_map_guest(struct kvm *kvm)
 {
int r;
-- 
1.5.1


0013-KVM-support-device-assignment.patch
Description: 0013-KVM-support-device-assignment.patch


[PATCH 12/13] KVM: use the new intel iommu APIs

2008-12-02 Thread Han, Weidong
intel iommu APIs are updated, use the new APIs.

In addition, change kvm_iommu_map_guest() to just create the domain, let 
kvm_iommu_assign_device() assign device.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 include/linux/kvm_host.h |   15 +--
 virt/kvm/kvm_main.c  |7 +++-
 virt/kvm/vtd.c   |   98 ++
 3 files changed, 71 insertions(+), 49 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d..aeabd32 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -329,9 +329,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id);
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
-int kvm_iommu_map_guest(struct kvm *kvm,
-   struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
  gfn_t base_gfn,
@@ -340,9 +341,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
return 0;
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
+static inline int kvm_iommu_map_guest(struct kvm *kvm)
 {
return -ENODEV;
 }
@@ -351,6 +350,12 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
return 0;
 }
+
+static inline int kvm_assign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev)
+{
+   return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8dab7ce..43a5236 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -472,7 +472,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(match-list, kvm-arch.assigned_dev_head);
 
if (assigned_dev-flags  KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-   r = kvm_iommu_map_guest(kvm, match);
+   if (!kvm-arch.intel_iommu_domain) {
+   r = kvm_iommu_map_guest(kvm);
+   if (r)
+   goto out_list_del;
+   }
+   r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index a770874..44bb58a 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -45,20 +45,18 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
for (i = 0; i  npages; i++) {
/* check if already mapped */
-   pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-gfn_to_gpa(gfn));
-   if (pfn)
+   if (intel_iommu_iova_to_phys(domain,
+gfn_to_gpa(gfn)))
continue;
 
pfn = gfn_to_pfn(kvm, gfn);
-   r = intel_iommu_page_mapping(domain,
-gfn_to_gpa(gfn),
-pfn_to_hpa(pfn),
-PAGE_SIZE,
-DMA_PTE_READ |
-DMA_PTE_WRITE);
+   r = intel_iommu_map_address(domain,
+   gfn_to_gpa(gfn),
+   pfn_to_hpa(pfn),
+   PAGE_SIZE,
+   DMA_PTE_READ | DMA_PTE_WRITE);
if (r) {
-   printk(KERN_ERR kvm_iommu_map_pages:
+   printk(KERN_ERR kvm_iommu_map_address:
   iommu failed to map pfn=%lx\n, pfn);
goto unmap_pages;
}
@@ -86,50 +84,55 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
return r;
 }
 
-int kvm_iommu_map_guest(struct kvm *kvm,
-   struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
 {
struct pci_dev *pdev = NULL;
+   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
int r;
 
-   if (!intel_iommu_found()) {
-   printk(KERN_ERR %s: intel iommu not found\n, __func__);
+   /* check if iommu exists and in use */
+   if (!domain)
+   return 0;
+
+   pdev = assigned_dev-dev;
+   if (pdev == NULL)
return -ENODEV;
+
+   r = intel_iommu_attach_device(domain, pdev);
+   if (r) {
+

[PATCH 2/2] kvm: set owner of cpu and vm file operations

2008-12-02 Thread Christian Borntraeger
There is a race between a close of the file descriptors and module
unload in the kvm module.

You can easily trigger this problem by applying this debug patch:
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -648,10 +648,14 @@ void kvm_free_physmem(struct kvm *kvm)
kvm_free_physmem_slot(kvm-memslots[i], NULL);
 }

+#include linux/delay.h
 static void kvm_destroy_vm(struct kvm *kvm)
 {
struct mm_struct *mm = kvm-mm;

+   printk(off1\n);
+   msleep(5000);
+   printk(off2\n);
spin_lock(kvm_lock);
list_del(kvm-vm_list);
spin_unlock(kvm_lock);

and killing the userspace, followed by an rmmod.

The problem is that kvm_destroy_vm can run while the module count
is 0. That means, you can remove the module while kvm_destroy_vm
is running. But kvm_destroy_vm is part of the module text. This
causes a kerneloops. The race exists without the msleep but is much
harder to trigger.

This patch requires the fix for anon_inodes (anon_inodes: use fops-owner
for module refcount).
With this patch, we can set the owner of all anonymous KVM inodes file
operations. The VFS will then control the KVM module refcount as long as there
is an open file. kvm_destroy_vm will be called by the release function of the
last closed file - before the VFS drops the module refcount. 

Signed-off-by: Christian Borntraeger [EMAIL PROTECTED]

---
 virt/kvm/kvm_main.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

Index: kvm/virt/kvm/kvm_main.c
===
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -1303,7 +1303,7 @@ static int kvm_vcpu_release(struct inode
return 0;
 }
 
-static const struct file_operations kvm_vcpu_fops = {
+static struct file_operations kvm_vcpu_fops = {
.release= kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
.compat_ioctl   = kvm_vcpu_ioctl,
@@ -1697,7 +1697,7 @@ static int kvm_vm_mmap(struct file *file
return 0;
 }
 
-static const struct file_operations kvm_vm_fops = {
+static struct file_operations kvm_vm_fops = {
.release= kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
.compat_ioctl   = kvm_vm_ioctl,
@@ -2061,6 +2061,8 @@ int kvm_init(void *opaque, unsigned int 
}
 
kvm_chardev_ops.owner = module;
+   kvm_vm_fops.owner = module;
+   kvm_vcpu_fops.owner = module;
 
r = misc_register(kvm_dev);
if (r) {
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Hangs

2008-12-02 Thread xming
The same guest did it again.

# uname -a
Linux spaceball 2.6.27.6 #1 SMP Fri Nov 14 11:51:10 CET 2008 i686 QEMU
Virtual CPU version 0.9.1 AuthenticAMD GNU/Linux

# date
Thu Dec 19 01:54:27 WET 1912

# uptime
 01:54:29 up 14666 days, 21:17, 12 users,  load average: 3.99, 3.97, 3.91

What can I do to provide more info?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/12] VT-d: register functions for the IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 2e8b102..bb6f771 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -55,6 +55,7 @@
 
 
 static void flush_unmaps_timeout(unsigned long data);
+static struct iommu_ops intel_iommu_ops;
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 
@@ -2429,6 +2430,9 @@ int __init intel_iommu_init(void)
init_timer(unmap_timer);
force_iommu = 1;
dma_ops = intel_dma_ops;
+
+   register_iommu(intel_iommu_ops);
+
return 0;
 }
 
@@ -2929,3 +2933,12 @@ static phys_addr_t intel_iommu_iova_to_phys(struct 
iommu_domain *domain,
return paddr;
 }
 
+static struct iommu_ops intel_iommu_ops = {
+   .domain_init= intel_iommu_domain_init,
+   .domain_destroy = intel_iommu_domain_destroy,
+   .attach_dev = intel_iommu_attach_device,
+   .detach_dev = intel_iommu_detach_device,
+   .map= intel_iommu_map,
+   .unmap  = intel_iommu_unmap,
+   .iova_to_phys   = intel_iommu_iova_to_phys,
+};
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/12] VT-d: adapt domain iova_to_phys function for IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c   |   15 ---
 include/linux/intel-iommu.h |2 --
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ac22973..2e8b102 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2913,18 +2913,19 @@ int intel_iommu_found(void)
 }
 EXPORT_SYMBOL_GPL(intel_iommu_found);
 
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+   unsigned long iova)
 {
+   struct dmar_domain *dmar_domain = domain-priv;
struct dma_pte *pte;
-   u64 pfn;
+   phys_addr_t paddr;
 
-   pfn = 0;
-   pte = addr_to_dma_pte(domain, iova);
+   paddr = 0;
+   pte = addr_to_dma_pte(dmar_domain, iova);
 
if (pte)
-   pfn = dma_pte_addr(*pte);
+   paddr = dma_pte_addr(*pte);
 
-   return pfn  VTD_PAGE_SHIFT;
+   return paddr;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ac79a1c..469508f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 
did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
-
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
 #else /* CONFIG_DMAR */
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2318236 ] SCSI debug

2008-12-02 Thread SourceForge.net
Bugs item #2318236, was opened at 2008-11-20 13:41
Message generated for change (Comment added) made by ryandbair
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2318236group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Duplicate
Priority: 5
Private: No
Submitted By: Ryan Bair (ryandbair)
Assigned to: Nobody/Anonymous (nobody)
Summary: SCSI debug

Initial Comment:
Here is the stdout with SCSI_DEBUG enabled. The guest is Windows Server 2003 R2 
x64 with an emulated scsi device being served from a 36GB raw file on a Debian 
Lenny host with KVM-79. I get the mentioned error on both quick and full format.

Let me know if there is anything else I can provide that would be of assistance.

--

Comment By: Ryan Bair (ryandbair)
Date: 2008-12-02 09:52

Message:
Sorry, I meant to reply to bug 2171940 but got a bit confused while
attempting to attach a file. 

I'm seeing the same issue as in that bug. Setup is extremely unstable with
SCSI drives and formatting the drive always results in an error saying that
the drive could not be formatted and it may be faulty. I've also tried
preformatting the drive, in that case it eventually gives a BSOD while
copying files with ntfs.sys being the faulting module. I've retried a the
setup about 6 times and the results are consistent. 

--

Comment By: Avi Kivity (avik)
Date: 2008-11-23 13:46

Message:
What exactly is the problem you're seeing?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2318236group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] KVM: remove the IRQ ACK notifier assertions

2008-12-02 Thread Avi Kivity

Mark McLoughlin wrote:

We will obviously never pass a NULL struct kvm_irq_ack_notifier* to
this functions. They are always embedded in the assigned device
structure, so the assertion add nothing.

The irqchip_in_kernel() assertion is very out of place - clearly
this little abstraction needs to know nothing about the upper
layer details.
  


Applied all, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] anon_inodes: use fops-owner for module refcount

2008-12-02 Thread Christian Borntraeger

There is an imbalance for anonymous inodes. If the fops-owner field is set,
the module reference count of owner is decreases on release. 
(filp_close -- __fput --- fops_put)

On the other hand, anon_inode_getfd does not increase the module reference 
count of owner. This causes two problems:

- if owner is set, the module refcount goes negative
- if owner is not set, the module can be unloaded while code is running 

This patch changes anon_inode_getfd to be symmetric regarding fops-owner 
handling.

I have checked all existing users of anon_inode_getfd. Noone sets fops-owner, 
thats why nobody has seen the module refcount negative. The refcounting was 
tested with a patched and unpatched KVM module.(see patch 2/2) I also did an
epoll_open/close test.

Signed-off-by: Christian Borntraeger [EMAIL PROTECTED]
Reviewed-by: Davide Libenzi [EMAIL PROTECTED]
---
 fs/anon_inodes.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

Index: kvm/fs/anon_inodes.c
===
--- kvm.orig/fs/anon_inodes.c
+++ kvm/fs/anon_inodes.c
@@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, c
if (IS_ERR(anon_inode_inode))
return -ENODEV;
 
+   if (fops-owner  !try_module_get(fops-owner))
+   return -ENOENT;
+
error = get_unused_fd_flags(flags);
if (error  0)
-   return error;
+   goto err_module;
fd = error;
 
/*
@@ -128,6 +131,8 @@ err_dput:
dput(dentry);
 err_put_unused_fd:
put_unused_fd(fd);
+err_module:
+   module_put(fops-owner);
return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] module_refcounting and anonymous inodes

2008-12-02 Thread Christian Borntraeger
Hello Avi,

here is the latest respin of my fixes for the kvm module unload problem:

[PATCH 1/2] anon_inodes: use fops-owner for module refcount
[PATCH 2/2] kvm: set owner of cpu and vm file operations

Both patches fix module reference counting problems and only matter for module
unload - nothing critical. 

Tested on s390 and x86_32.


Christian
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/12] VT-d: adapt domain map and unmap functions for IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c   |   22 +++---
 include/linux/intel-iommu.h |4 
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 62ae6b1..ac22973 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2864,20 +2864,21 @@ static void intel_iommu_detach_device(struct 
iommu_domain *domain,
vm_domain_remove_one_dev_info(dmar_domain, pdev);
 }
 
-int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova,
+  phys_addr_t hpa, size_t size, int prot)
 {
+   struct dmar_domain *dmar_domain = domain-priv;
u64 max_addr;
int addr_width;
int ret;
 
max_addr = (iova  VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
-   if (domain-max_addr  max_addr) {
+   if (dmar_domain-max_addr  max_addr) {
int min_agaw;
u64 end;
 
/* check if minimum agaw is sufficient for mapped address */
-   min_agaw = vm_domain_min_agaw(domain);
+   min_agaw = vm_domain_min_agaw(dmar_domain);
addr_width = agaw_to_width(min_agaw);
end = DOMAIN_MAX_ADDR(addr_width);
end = end  VTD_PAGE_MASK;
@@ -2887,25 +2888,24 @@ int intel_iommu_map_pages(struct dmar_domain *domain, 
dma_addr_t iova,
   __func__, min_agaw, max_addr);
return -EFAULT;
}
-   domain-max_addr = max_addr;
+   dmar_domain-max_addr = max_addr;
}
 
-   ret = domain_page_mapping(domain, iova, hpa, size, prot);
+   ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_map_pages);
 
-void intel_iommu_unmap_pages(struct dmar_domain *domain,
-dma_addr_t iova, size_t size)
+static void intel_iommu_unmap(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
 {
+   struct dmar_domain *dmar_domain = domain-priv;
dma_addr_t base;
 
/* The address might not be aligned */
base = iova  PAGE_MASK;
size = PAGE_ALIGN(size);
-   dma_pte_clear_range(domain, base, base + size);
+   dma_pte_clear_range(dmar_domain, base, base + size);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages);
 
 int intel_iommu_found(void)
 {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 41d2a3b..ac79a1c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,10 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 
did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot);
-void intel_iommu_unmap_pages(struct dmar_domain *domain,
-dma_addr_t iova, size_t size);
 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
 
 #ifdef CONFIG_DMAR
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2353510 ] Fedora 10 failures

2008-12-02 Thread SourceForge.net
Bugs item #2353510, was opened at 2008-11-27 14:46
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2353510group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Technologov (technologov)
Assigned to: Nobody/Anonymous (nobody)
Summary: Fedora 10 failures

Initial Comment:

Description:
Fedora 10 fails to install on KVM. (KVM-79)

The DVD version stucks at the near end setup stage, when trying to install GRUB 
bootloader into HDD.
It didn't proceed within one hour, which indicates stucked VM.

Sometimes it may stuck earlier - during init or during early setup.

Live CD (32-bit) started fine on both Intel and AMD. (except top menu minor 
rendering bug)

Guest(s): Fedora 10 64-bit
Guest(s): Fedora 10 32-bit
Host(s): Fedora 7 64-bit, Intel, KVM-79
Host(s): Fedora 7 64-bit, AMD, KVM-79

Command: (for DVD)
qemu-kvm -cdrom /isos/linux/Fedora-10-x86_64-DVD.iso -m 512 -hda 
/vm/f10-64.qcow2  -boot d

*and* (for LiveCD)
qemu-kvm -cdrom /isos/linux/F10-i686-Live.iso -m 512

-Alexey, 27.11.2008.

--

Comment By: Technologov (technologov)
Date: 2008-12-02 12:39

Message:

I have opened similar bug against Fedora 10 bugzilla:
https://bugzilla.redhat.com/show_bug.cgi?id=474116

-Alexey

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2353510group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: STOP error with virtio on KVM-79/2.6.18/Win2k3 x64 guest

2008-12-02 Thread Adrian Schmitz
 -Original Message-
 From: Dor Laor [mailto:[EMAIL PROTECTED]
 Sent: Monday, December 01, 2008 5:27 PM
 To: Adrian Schmitz
 Cc: kvm@vger.kernel.org; Avi Kivity
 Subject: Re: STOP error with virtio on KVM-79/2.6.18/Win2k3 x64 guest
 
 What driver version are you using? Version 2 is obsolete.
 I posted ver 3 few months ago, Avi can you please upload it to
 sourceforge.
 My old public space was blocked so I'll send you a private attachment
 to
 test.
 
 Dor.

Okay, I received the version 3 drivers and installed them on my guest.
This fixed the STOP errors. I can now run the same iperf tests without
any crashes. 

The only problem I have now is that the virtio seems to be slower for me
than e1000. Running iperf between the guest machine and the bridge
interface on the host using the e1000 driver consistently gave
measurements of around 320 Mb/s. With the new virtio drivers, I'm
getting roughly 120 Mb/s. I'm not sure if I'm missing something simple.

I tried changing the connection rate setting in the guest from the
default 100M to 1G, but that didn't seem to help. I also tried using a
tcp window size of 16k instead of the guest os default 8k, but that
didn't make much of a difference, either. Below is some information
about my setup. Please let me know if there's any other info I can
provide, and thanks again for your help.

Host Dist:  CentOS 5.2
Host Kernel:2.6.18
Host Hardware:  Dual AMD Quad-Core, 8G memory
KVM Version:79 (modules and tools built and installed from source)
Guest OS:   Windows 2003 Server x64 SP2 (with all critical
updates)
Guest VCPUs:2
Guest memory:   4G
KVM command used: /usr/bin/kvm -S -M pc -m 4096 -smp 2 -name bth-host00
-uuid 84d941a5-126e-d001-6029-d7d434a7dad6 -monitor pty -localtime -boot
c -drive file=/dev/mapper/sys_bth-host00,if=ide,index=0,boot=on -drive
file=/isoimages/netkvm.iso,if=ide,media=cdrom,index=2 -net
nic,macaddr=00:16:3e:5d:a7:46,vlan=0,model=virtio -net
tap,fd=19,script=,vlan=0,ifname=vnet1 -serial none -parallel none -usb
-usbdevice tablet -vnc 127.0.0.1:1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/12] KVM: change KVM iommu.c to use IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 arch/ia64/kvm/Makefile  |2 +-
 arch/x86/include/asm/kvm_host.h |3 +-
 arch/x86/kvm/Makefile   |2 +-
 virt/kvm/iommu.c|   68 ---
 virt/kvm/kvm_main.c |2 +-
 5 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index cb69dfc..0bb99b7 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
 
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f58f7eb..77f4afa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -14,6 +14,7 @@
 #include linux/types.h
 #include linux/mm.h
 #include linux/mmu_notifier.h
+#include linux/iommu.h
 
 #include linux/kvm.h
 #include linux/kvm_para.h
@@ -356,7 +357,7 @@ struct kvm_arch{
 */
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
-   struct dmar_domain *intel_iommu_domain;
+   struct iommu_domain *iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 00f46c2..d3ec292 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o 
ioapic.o \
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 832ee04..110c455 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -16,15 +16,18 @@
  *
  * Copyright (C) 2006-2008 Intel Corporation
  * Copyright IBM Corporation, 2008
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
  * Author: Allen M. Kay [EMAIL PROTECTED]
  * Author: Weidong Han [EMAIL PROTECTED]
  * Author: Ben-Ami Yassour [EMAIL PROTECTED]
+ * Author: Joerg Roedel [EMAIL PROTECTED]
  */
 
 #include linux/list.h
 #include linux/kvm_host.h
 #include linux/pci.h
 #include linux/dmar.h
+#include linux/iommu.h
 #include linux/intel-iommu.h
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm);
@@ -36,9 +39,9 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 {
gfn_t gfn = base_gfn;
pfn_t pfn;
-   int r = 0;
-   unsigned long i;
-   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
+   phys_addr_t paddr;
+   int i, r = 0;
+   struct iommu_domain *domain = kvm-arch.iommu_domain;
 
/* check if iommu exists and in use */
if (!domain)
@@ -46,18 +49,13 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
for (i = 0; i  npages; i++) {
/* check if already mapped */
-   pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-gfn_to_gpa(gfn));
-   if (pfn)
+   paddr = (pfn_t)iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+   if (paddr)
kvm_iommu_put_pages(kvm, gfn, 1);
 
pfn = gfn_to_pfn(kvm, gfn);
-   r = intel_iommu_map_pages(domain,
- gfn_to_gpa(gfn),
- pfn_to_hpa(pfn),
- PAGE_SIZE,
- DMA_PTE_READ |
- DMA_PTE_WRITE);
+   r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+   PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
if (r) {
printk(KERN_ERR kvm_iommu_map_pages:
   iommu failed to map pfn=%lx\n, pfn);
@@ -91,7 +89,7 @@ int kvm_assign_device(struct kvm *kvm,
  struct kvm_assigned_dev_kernel *assigned_dev)
 {
struct pci_dev *pdev = NULL;
-   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
+   struct iommu_domain *domain = kvm-arch.iommu_domain;
int r;
 
/* check if iommu exists and in use */
@@ -102,7 +100,12 @@ int kvm_assign_device(struct kvm *kvm,
if (pdev == NULL)
return -ENODEV;
 
-   r = intel_iommu_assign_device(domain, pdev);
+   if (!iommu_found()) {
+   printk(KERN_ERR %s: No IOMMU found\n, __func__);
+   return -ENODEV;
+   }
+
+   r = iommu_attach_device(domain, pdev-dev);
if (r) {
printk(KERN_ERR assign device %x:%x.%x failed,
   pdev-bus-number,
@@ 

[ kvm-Bugs-2088475 ] OpenSuse10.2 can not be installed

2008-12-02 Thread SourceForge.net
Bugs item #2088475, was opened at 2008-09-02 11:37
Message generated for change (Comment added) made by technologov
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Jiajun Xu (jiajun)
Assigned to: Nobody/Anonymous (nobody)
Summary: OpenSuse10.2 can not be installed

Initial Comment:
OpenSuse10.2 can not be installed on KVM. Installer will stop after loading 
ISOLinux.
It is against latest kvm comit, kvm.git 
:5b9207ec01681337786c7898ffc0165ec4e7c2e4
userspace.git :5f2a9719f105e29fbde4529cf919a5351b05da9a.


--

Comment By: Technologov (technologov)
Date: 2008-12-02 16:06

Message:
BTW: If you absolutely _must_ have openSUSE 10.2 there are workarounds that
allow you to install it anyway.

1. Install 10.2 using Qemu, then disable bootloader
-or-
2. Start VM, and press-n-hold shift during KVM's BIOS load.

--

Comment By: Technologov (technologov)
Date: 2008-12-02 15:58

Message:
It crashed with old KVMs, but with newer it just stucks. Doesn't matters.

And yes, openSUSE 11.0 tested to work.

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-10-16 17:23

Message:
From the bug description, opensuse11.0 should work?
And we did not meet guest crash when installation, guest hangs when
loading grub and no any error messages printed.

--

Comment By: Technologov (technologov)
Date: 2008-10-16 17:04

Message:
Known issue:
https://sourceforge.net/tracker/index.php?func=detailaid=1760424group_id=180599atid=893831

This bug is duplicate.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/12] select IOMMU_API when DMAR and/or AMD_IOMMU is selected

2008-12-02 Thread Joerg Roedel
These two IOMMUs can implement the current version of this API. So
select the API if one or both of these IOMMU drivers is selected.

Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 arch/ia64/Kconfig |3 +++
 arch/x86/Kconfig  |3 +++
 drivers/base/Makefile |1 +
 3 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed..6a7b0c9 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
 
 config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
+config IOMMU_API
+   def_bool (DMAR)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7..b9f7187 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -580,6 +580,9 @@ config SWIOTLB
 config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
 
+config IOMMU_API
+   def_bool (AMD_IOMMU || DMAR)
+
 config MAXSMP
bool Configure Maximum number of SMP Processors and NUMA Nodes
depends on X86_64  SMP  BROKEN
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index c666373..b5b8ba5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER)   += firmware_class.o
 obj-$(CONFIG_NUMA) += node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
 obj-$(CONFIG_SMP)  += topology.o
+obj-$(CONFIG_IOMMU_API) += iommu.o
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_MODULES)  += module.o
 endif
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm-userspace: Add missing KVM string in the signature of CPUID

2008-12-02 Thread Guillaume Thouvenin
It adds a missing KVM string in the signature of the CPUID. Without
it signature[2] is not well defined.

Signed-off-by: Guillaume Thouvenin [EMAIL PROTECTED]

---
 qemu/qemu-kvm-x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c
index 671b5b3..e9b200a 100644
--- a/qemu/qemu-kvm-x86.c
+++ b/qemu/qemu-kvm-x86.c
@@ -573,7 +573,7 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 
 #ifdef KVM_CPUID_SIGNATURE
 /* Paravirtualization CPUIDs */
-memcpy(signature, KVMKVMKVM, 12);
+memcpy(signature, KVMKVMKVMKVM, 12);
 pv_ent = cpuid_ent[cpuid_nent++];
 memset(pv_ent, 0, sizeof(*pv_ent));
 pv_ent-function = KVM_CPUID_SIGNATURE;
-- 
1.6.0.4.623.g171d7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/12] VT-d: adapt domain init and destroy functions for IOMMU API

2008-12-02 Thread Joerg Roedel
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c   |   30 +-
 include/linux/intel-iommu.h |2 --
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 7f12852..59b9cdb 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -35,6 +35,7 @@
 #include linux/mempool.h
 #include linux/timer.h
 #include linux/iova.h
+#include linux/iommu.h
 #include linux/intel-iommu.h
 #include asm/cacheflush.h
 #include asm/iommu.h
@@ -2779,32 +2780,34 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
return domain;
 }
 
-struct dmar_domain *intel_iommu_alloc_domain(void)
+static int intel_iommu_domain_init(struct iommu_domain *domain)
 {
-   struct dmar_domain *domain;
+   struct dmar_domain *dmar_domain;
 
-   domain = iommu_alloc_vm_domain();
-   if (!domain) {
+   dmar_domain = iommu_alloc_vm_domain();
+   if (!dmar_domain) {
printk(KERN_ERR
intel_iommu_domain_alloc: domain == NULL\n);
-   return NULL;
+   return -ENOMEM;
}
-   if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+   if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
intel_iommu_domain_alloc: domain_init() failed\n);
-   vm_domain_exit(domain);
-   return NULL;
+   vm_domain_exit(dmar_domain);
+   return -ENOMEM;
}
+   domain-priv = dmar_domain;
 
-   return domain;
+   return 0;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-void intel_iommu_free_domain(struct dmar_domain *domain)
+static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 {
-   vm_domain_exit(domain);
+   struct dmar_domain *dmar_domain = domain-priv;
+
+   domain-priv = NULL;
+   vm_domain_exit(dmar_domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
 int intel_iommu_assign_device(struct dmar_domain *domain,
  struct pci_dev *pdev)
@@ -2922,3 +2925,4 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, 
u64 iova)
return pfn  VTD_PAGE_SHIFT;
 }
 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c2f37b8..5a4ce23 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 
did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-struct dmar_domain *intel_iommu_alloc_domain(void);
-void intel_iommu_free_domain(struct dmar_domain *domain);
 int intel_iommu_assign_device(struct dmar_domain *domain,
  struct pci_dev *pdev);
 void intel_iommu_deassign_device(struct dmar_domain *domain,
-- 
1.5.6.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Kvm: Qemu: save nvram

2008-12-02 Thread Daniel P. Berrange
On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote:
 This patch to save the nvram. It save the nvram by specify the arg of 
 -name.And the saved file named by the arg. If do not specify the arg,
 it will not save the nvram

I think we might be better off having an explicit command line arg for nvram
path rather than hardcoding the directory, because there may well be times
where you want to have nvram saved, but don't want to specify -name, and
vica-verca. 

  -nvram foo.data

could prepend a default directory of $localstatedir/lib/qemu/nvram, where
$localstatedir  is set from 'configure' script, or

  -nvram /some/path/foo.data

would use the explicit path given.

 diff --git a/qemu/target-ia64/firmware.h b/qemu/target-ia64/firmware.h
 index 553a9f9..71aef2a 100644
 --- a/qemu/target-ia64/firmware.h
 +++ b/qemu/target-ia64/firmware.h
 @@ -34,11 +34,27 @@

[..snip...]

 +#define NVRAM_DIR /usr/local/share/qemu/nvram/

This is definitely wrong. You cannot assume /usr/local as the install
prefix, and using '$prefix/share' violates the FHS. '$prefix/share' is
for readonly data that can be shared across machines, not variable runtime
state data. I'd expect it to be in $localstatedir/lib/qemu/nvram, which
would normally default to $prefix/var/lib/qemu/nvram, but for distro package
builds typically be overridden to /var/lib/qemu/nvram. 

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: splice() based interguest networking

2008-12-02 Thread Avi Kivity

Anthony Liguori wrote:


1) On TX, we vmsplice() from the sg buffer to one pipe.  This will end 
up being vmsplice_to_pipe() in the kernel which is zero-copy.




That implies we do the MAC address switching in userspace (or that this 
is a point-to-point protocol, which severely limits its usefulness).


I think we can still have one-copy interguest networking if we have 
proper skb destructors; and since we need that for the more important 
guest-external copyless support, we basically get it for free (if delayed).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/12] KVM: change to use new APIs for kvm vtd

2008-12-02 Thread Joerg Roedel
From: Weidong Han [EMAIL PROTECTED]

This patch changes to use new APIs for KVM VT-d, and add device
deassignment for hotplug.

[Joerg: coding style cleanups]

Signed-off-by: Weidong Han [EMAIL PROTECTED]
Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
---
 include/linux/kvm.h  |5 ++
 include/linux/kvm_host.h |   23 --
 virt/kvm/kvm_main.c  |   49 -
 virt/kvm/vtd.c   |  107 +++---
 4 files changed, 143 insertions(+), 41 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0997e6f..49432e9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -395,6 +395,9 @@ struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#define KVM_CAP_DEVICE_DEASSIGNMENT 21
+#endif
 
 /*
  * ioctls for VM fds
@@ -428,6 +431,8 @@ struct kvm_trace_rec {
   struct kvm_assigned_pci_dev)
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
+#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \
+struct kvm_assigned_pci_dev)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d..cb1d404 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -329,9 +329,12 @@ void kvm_free_irq_source_id(struct kvm *kvm, int 
irq_source_id);
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
-int kvm_iommu_map_guest(struct kvm *kvm,
-   struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_assign_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
  gfn_t base_gfn,
@@ -340,9 +343,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
return 0;
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
+static inline int kvm_iommu_map_guest(struct kvm *kvm)
 {
return -ENODEV;
 }
@@ -351,6 +352,18 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
return 0;
 }
+
+static inline int kvm_assign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev)
+{
+   return 0;
+}
+
+static inline int kvm_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev)
+{
+   return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8dab7ce..fe6aba0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -472,7 +472,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(match-list, kvm-arch.assigned_dev_head);
 
if (assigned_dev-flags  KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-   r = kvm_iommu_map_guest(kvm, match);
+   if (!kvm-arch.intel_iommu_domain) {
+   r = kvm_iommu_map_guest(kvm);
+   if (r)
+   goto out_list_del;
+   }
+   r = kvm_assign_device(kvm, match);
if (r)
goto out_list_del;
}
@@ -494,6 +499,35 @@ out_free:
 }
 #endif
 
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+   struct kvm_assigned_pci_dev *assigned_dev)
+{
+   int r = 0;
+   struct kvm_assigned_dev_kernel *match;
+
+   mutex_lock(kvm-lock);
+
+   match = kvm_find_assigned_dev(kvm-arch.assigned_dev_head,
+ assigned_dev-assigned_dev_id);
+   if (!match) {
+   printk(KERN_INFO %s: device hasn't been assigned before, 
+ so cannot be deassigned\n, __func__);
+   r = -EINVAL;
+   goto out;
+   }
+
+   if (assigned_dev-flags  KVM_DEV_ASSIGN_ENABLE_IOMMU)
+   kvm_deassign_device(kvm, match);
+
+   kvm_free_assigned_device(kvm, match);
+
+out:
+   mutex_unlock(kvm-lock);
+   return r;
+}
+#endif
+
 static inline int valid_vcpu(int n)
 {
return likely(n = 0  n  KVM_MAX_VCPUS);
@@ -1833,6 +1867,19 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
 #endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+   case KVM_DEASSIGN_PCI_DEVICE: {
+   struct kvm_assigned_pci_dev assigned_dev;
+
+   r = -EFAULT;
+   if 

[PATCH 2/2] qemu: ppc: fix build warnings

2008-12-02 Thread Hollis Blanchard
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
---
 qemu/hw/device_tree.c |   14 +++---
 qemu/hw/device_tree.h |   12 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/qemu/hw/device_tree.c b/qemu/hw/device_tree.c
index e73129d..2621ff1 100644
--- a/qemu/hw/device_tree.c
+++ b/qemu/hw/device_tree.c
@@ -31,7 +31,7 @@
 /* This function reads device-tree property files that are of
  * a single cell size
  */
-uint32_t read_proc_dt_prop_cell(char *path_in_device_tree)
+uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree)
 {
char *buf = NULL;
int i;
@@ -65,7 +65,7 @@ uint32_t read_proc_dt_prop_cell(char *path_in_device_tree)
 
 #ifdef CONFIG_LIBFDT
 /* support functions */
-static int get_offset_of_node(void *fdt, char *node_path)
+static int get_offset_of_node(void *fdt, const char *node_path)
 {
int node_offset;
node_offset = fdt_path_offset(fdt, node_path);
@@ -78,7 +78,7 @@ static int get_offset_of_node(void *fdt, char *node_path)
 }
 
 /* public functions */
-void *load_device_tree(char *filename_path, unsigned long load_addr)
+void *load_device_tree(const char *filename_path, unsigned long load_addr)
 {
int dt_file_size;
int dt_file_load_size;
@@ -134,7 +134,7 @@ fail:
return NULL;
 }
 
-void dump_device_tree_to_file(void *fdt, char *filename)
+void dump_device_tree_to_file(void *fdt, const char *filename)
 {
int fd;
fd = open(filename, O_RDWR|O_CREAT, O_RDWR);
@@ -148,7 +148,7 @@ void dump_device_tree_to_file(void *fdt, char *filename)
close(fd);
 }
 
-void dt_cell(void *fdt, char *node_path, char *property,
+void dt_cell(void *fdt, const char *node_path, const char *property,
uint32_t val)
 {
int offset;
@@ -163,7 +163,7 @@ void dt_cell(void *fdt, char *node_path, char *property,
 }
 
 /* This function is to manipulate a cell with multiple values */
-void dt_cell_multi(void *fdt, char *node_path, char *property,
+void dt_cell_multi(void *fdt, const char *node_path, const char *property,
uint32_t *val_array, int size)
 {
int offset;
@@ -177,7 +177,7 @@ void dt_cell_multi(void *fdt, char *node_path, char 
*property,
}
 }
 
-void dt_string(void *fdt, char *node_path, char *property,
+void dt_string(void *fdt, const char *node_path, const char *property,
char *string)
 {
int offset;
diff --git a/qemu/hw/device_tree.h b/qemu/hw/device_tree.h
index 05a81ef..a311309 100644
--- a/qemu/hw/device_tree.h
+++ b/qemu/hw/device_tree.h
@@ -11,16 +11,16 @@
  */
 
 /* device-tree proc support functions */
-uint32_t read_proc_dt_prop_cell(char *path_in_device_tree);
+uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree);
 
 #ifdef CONFIG_LIBFDT
 /* device tree functions */
-void *load_device_tree(char *filename_path, target_ulong load_addr);
-void dump_device_tree_to_file(void *fdt, char *filename);
-void dt_cell(void *fdt, char *node_path, char *property,
+void *load_device_tree(const char *filename_path, target_ulong load_addr);
+void dump_device_tree_to_file(void *fdt, const char *filename);
+void dt_cell(void *fdt, const char *node_path, const char *property,
uint32_t val);
-void dt_cell_multi(void *fdt, char *node_path, char *property,
+void dt_cell_multi(void *fdt, const char *node_path, const char *property,
uint32_t *val_array, int size);
-void dt_string(void *fdt, char *node_path, char *property,
+void dt_string(void *fdt, const char *node_path, const char *property,
char *string);
 #endif
-- 
1.5.6.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


powerpc kvm-userspace build fixes

2008-12-02 Thread Hollis Blanchard

These patches fix the kvm-userspace qemu build after a recent merge with
upstream qemu.

I'm also seeing a build dependency issue with dyngen-opc.h that I don't see
upstream. I haven't sorted that out yet, but make 
qemu/ppcemb-softmmu/dyngen-opc.h first works around the problem.

-Hollis
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] qemu: ppc: fix build after qemu upstream changes

2008-12-02 Thread Hollis Blanchard
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
---
 qemu/hw/ppc440_bamboo.c |   37 +
 1 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c
index bf42245..79e4ea8 100644
--- a/qemu/hw/ppc440_bamboo.c
+++ b/qemu/hw/ppc440_bamboo.c
@@ -38,13 +38,14 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
qemu_irq *pic;
ppc4xx_pci_t *pci;
CPUState *env;
-   uint64_t ep=0;
-   uint64_t la=0;
-   int is_linux=1; /* Will assume allways is Linux for now */
-   target_long kernel_size=0;
-   target_ulong initrd_base=0;
-   target_long initrd_size=0;
-   target_ulong dt_base=0;
+uint64_t elf_entry;
+uint64_t elf_lowaddr;
+   target_ulong entry = 0;
+   target_ulong loadaddr = 0;
+   target_long kernel_size = 0;
+   target_ulong initrd_base = 0;
+   target_long initrd_size = 0;
+   target_ulong dt_base = 0;
void *fdt;
int ret;
int ram_stick_sizes[] = {25620, 12820, 6420,
@@ -105,20 +106,24 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
 
/* load kernel with uboot loader */
printf(%s: load kernel\n, __func__);
-   ret = load_uimage(kernel_filename, ep, la, kernel_size, is_linux);
-   if (ret  0)
-   ret = load_elf(kernel_filename, 0, ep, la, NULL);
-
-   if (ret  0) {
+   kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL);
+   if (kernel_size  0) {
+   kernel_size = load_elf(kernel_filename, 0, elf_entry, 
elf_lowaddr,
+  NULL);
+entry = elf_entry;
+loadaddr = elf_lowaddr;
+}
+
+   if (kernel_size  0) {
fprintf(stderr, qemu: could not load kernel '%s'\n,
kernel_filename);
exit(1);
}
-   printf(kernel is at guest address: 0x%lx\n, (unsigned long)la);
+   printf(kernel is at guest address: 0x%lx\n, (unsigned long)loadaddr);
 
/* load initrd */
if (initrd_filename) {
-   initrd_base = kernel_size + la;
+   initrd_base = kernel_size + loadaddr;
printf(%s: load initrd\n, __func__);
initrd_size = load_image(initrd_filename,
phys_ram_base + initrd_base);
@@ -156,7 +161,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
if (initrd_base)
dt_base = initrd_base + initrd_size;
else
-   dt_base = kernel_size + la;
+   dt_base = kernel_size + loadaddr;
 
fdt = load_device_tree(buf, (unsigned long)(phys_ram_base + dt_base));
if (fdt == NULL) {
@@ -188,7 +193,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
/* location of device tree in register */
env-gpr[3] = dt_base;
 #endif
-   env-nip = ep;
+   env-nip = entry;
}
 
if (pci) {
-- 
1.5.6.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] KVM: add KVM_USERSPACE_IRQ_SOURCE_ID assertions

2008-12-02 Thread Avi Kivity

Mark McLoughlin wrote:

Make sure kvm_request_irq_source_id() never returns
KVM_USERSPACE_IRQ_SOURCE_ID.

Likewise, check that kvm_free_irq_source_id() never accepts
KVM_USERSPACE_IRQ_SOURCE_ID.
  


An alternative way to do this is to drop the distinction 
KVM_USERSPACE_IRQ_SOURCE_ID has, and simply allocate it via the normal 
irq source id allocation API (and store it in struct kvm).  That's not 
worth the churn though.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Luis Henriques wrote:

On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote:
  

Luis Henriques wrote:


No, I was not able to reproduce the issue.  Please let me know if you need some
more information on my system (.config, for instance).
  
  
Were you using some other virtualization product?  Were you running  
suspend/resume?



No for both questions.  However, I had compiled support for suspend (not sure if
this is what you mean by running suspend/resume) - This is a feature I used
only once or twice...
  


The underlying problem is that an svm instruction has been executed, but 
svm is disabled.  Since kvm enables svm unconditionally on all 
processors on startup, there are only a few paths that can potentially 
trigger this:


- another virtualization module turned svm off
- cpu hotadd/hotremove (suspend/resume triggers this)
- something did a read-modify-write cycle on cr4 (which contains the svm 
enable bit) while kvm enabled that bit

- core was turned off (does linux power management do that?)

Anything ring a bell?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: Qemu: push_nmi should be only used by I386 Arch.

2008-12-02 Thread Avi Kivity

Hollis Blanchard wrote:

Well, it happens, but I do wish that more people would use cscope or
even grep to find all users of a symbol.

  


That's reasonable.


I also wish that Avi would get his PPC box working so he could catch
build breaks like these. Cross-compilers would do as well.

  


I now have a build box somewhere.  It's now cloning the source 
repositories.  Once I start rejecting patches as won't build, I hope 
people will be more careful.



Acked-by: Hollis Blanchard [EMAIL PROTECTED]
  


Applied, thanks Jan.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] add ksm kernel shared memory driver.

2008-12-02 Thread Chris Wright
* Alan Cox ([EMAIL PROTECTED]) wrote:
  +   r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE);
  +   mutex_unlock(sha1_lock);
  +   if (r) {
  +   char *old_addr, *new_addr;
  +   old_addr = kmap_atomic(oldpage, KM_USER0);
  +   new_addr = kmap_atomic(newpage, KM_USER1);
  +   r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN,
  +   PAGE_SIZE-PAGEHASH_LEN);
 
 NAK - this isn't guaranteed to be robust so you could end up merging
 different pages one provided by a malicious attacker.

I presume you're referring to the digest comparison.  While there's
theoretical concern of hash collision, it's mitigated by hmac(sha1)
so the attacker can't brute force for known collisions.

thanks,
-chris
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] add ksm kernel shared memory driver.

2008-12-02 Thread Alan Cox
On Tue, 2 Dec 2008 10:07:24 -0800
Chris Wright [EMAIL PROTECTED] wrote:

 * Alan Cox ([EMAIL PROTECTED]) wrote:
   + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE);
   + mutex_unlock(sha1_lock);
   + if (r) {
   + char *old_addr, *new_addr;
   + old_addr = kmap_atomic(oldpage, KM_USER0);
   + new_addr = kmap_atomic(newpage, KM_USER1);
   + r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN,
   + PAGE_SIZE-PAGEHASH_LEN);
  
  NAK - this isn't guaranteed to be robust so you could end up merging
  different pages one provided by a malicious attacker.
 
 I presume you're referring to the digest comparison.  While there's
 theoretical concern of hash collision, it's mitigated by hmac(sha1)
 so the attacker can't brute force for known collisions.

Using current known techniques. A random collision is just as bad news.

This code simply isn't fit for the kernel.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[BUG] virtio-pci queue allocation not page-aligned

2008-12-02 Thread Hollis Blanchard
I just spent a number of hours tracking this one down, and I'm not too
thrilled about it. vp_find_vq() does the memory allocation for virtio
PCI rings, and it uses kzalloc() to do it. This is bad because the ring
memory *must* be page-aligned.

According to Anthony, at the time this code was written, various slab
allocators were checked and all happened to return page-aligned buffers.
So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while
investigating an unrelated problem, which offset the address by 64
bytes.

One option is to add a BUG_ON(addr  ~PAGE_MASK) to vp_find_vq(). That's
better than nothing, but still stinks.

Another is to use Kconfig to express that slab debugging breaks virtio.
Also pretty lame IMHO, will look pretty funny in the Kconfig file, and
that only solves today's problem. Another slab allocator or a change in
behavior of an existing allocator could mean that ordinary allocations
also become non-page-aligned.

Finally, we could use the interface intended for exactly this purpose:
the page allocator. If there's some problem with high memory, don't
allocate it with GFP_HIGHMEM.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Luis Henriques
On Tue, Dec 02, 2008 at 02:23:52PM +0200, Avi Kivity wrote:
 Luis Henriques wrote:
 On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote:
   
 Luis Henriques wrote:
 
 No, I was not able to reproduce the issue.  Please let me know if you need 
 some
 more information on my system (.config, for instance).
 
 Were you using some other virtualization product?  Were you running   
 suspend/resume?
 

 No for both questions.  However, I had compiled support for suspend (not 
 sure if
 this is what you mean by running suspend/resume) - This is a feature I used
 only once or twice...
   

 The underlying problem is that an svm instruction has been executed, but  
 svm is disabled.  Since kvm enables svm unconditionally on all  
 processors on startup, there are only a few paths that can potentially  
 trigger this:

 - another virtualization module turned svm off
 - cpu hotadd/hotremove (suspend/resume triggers this)
 - something did a read-modify-write cycle on cr4 (which contains the svm  
 enable bit) while kvm enabled that bit
 - core was turned off (does linux power management do that?)

 Anything ring a bell?

Ok, I am not sure but there is a possibility of having the vboxdrv driver
loaded. _But_ I was not using, i.e., I do not use VirtualBox.  In my
attempts to reproduce the issue, I tried to load this module but, unfortunatly,
my distro has this package broken ATM (err... in fact, the problem is not the
distro but me - I am using an unstable version).

vboxdrv could be a problem if I was using it, but I believe it shouldn't cause
this if it is not being used... but it's just a guess.

-- 
Luis Henriques

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] virtio-pci queue allocation not page-aligned

2008-12-02 Thread Anthony Liguori

Hollis Blanchard wrote:

Finally, we could use the interface intended for exactly this purpose:
the page allocator. If there's some problem with high memory, don't
allocate it with GFP_HIGHMEM.
  


Can you work up a patch to do this?

Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Luis Henriques wrote:

Ok, I am not sure but there is a possibility of having the vboxdrv driver
loaded. _But_ I was not using, i.e., I do not use VirtualBox.  In my
attempts to reproduce the issue, I tried to load this module but, unfortunatly,
my distro has this package broken ATM (err... in fact, the problem is not the
distro but me - I am using an unstable version).

vboxdrv could be a problem if I was using it, but I believe it shouldn't cause
this if it is not being used... but it's just a guess.
  


Let's keep an eye open on it.  If it reproduces, be sure to note what 
drivers are loaded.  Meanwhile, I don't recommend having different 
virtualization modules loaded concurrently.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Luis Henriques wrote:

Ok, I am not sure but there is a possibility of having the vboxdrv driver
loaded. _But_ I was not using, i.e., I do not use VirtualBox.  In my
attempts to reproduce the issue, I tried to load this module but, unfortunatly,
my distro has this package broken ATM (err... in fact, the problem is not the
distro but me - I am using an unstable version).

vboxdrv could be a problem if I was using it, but I believe it shouldn't cause
this if it is not being used... but it's just a guess.
  


Let's keep an eye open on it.  If it reproduces, be sure to note what 
drivers are loaded.  Meanwhile, I don't recommend having different 
virtualization modules loaded concurrently.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Luis Henriques
(I am CC'ing to Steven Rostedt since he might be interested on this)

On Tue, Dec 02, 2008 at 07:09:14PM +, Luis Henriques wrote:
 On Tue, Dec 02, 2008 at 02:23:52PM +0200, Avi Kivity wrote:
  Luis Henriques wrote:
  On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote:

  Luis Henriques wrote:
  
  No, I was not able to reproduce the issue.  Please let me know if you 
  need some
  more information on my system (.config, for instance).
  
  Were you using some other virtualization product?  Were you running   
  suspend/resume?
  
 
  No for both questions.  However, I had compiled support for suspend (not 
  sure if
  this is what you mean by running suspend/resume) - This is a feature I 
  used
  only once or twice...

 
  The underlying problem is that an svm instruction has been executed, but  
  svm is disabled.  Since kvm enables svm unconditionally on all  
  processors on startup, there are only a few paths that can potentially  
  trigger this:
 
  - another virtualization module turned svm off
  - cpu hotadd/hotremove (suspend/resume triggers this)
  - something did a read-modify-write cycle on cr4 (which contains the svm  
  enable bit) while kvm enabled that bit
  - core was turned off (does linux power management do that?)
 
  Anything ring a bell?
 
 Ok, I am not sure but there is a possibility of having the vboxdrv driver
 loaded. _But_ I was not using, i.e., I do not use VirtualBox.  In my
 attempts to reproduce the issue, I tried to load this module but, 
 unfortunatly,
 my distro has this package broken ATM (err... in fact, the problem is not the
 distro but me - I am using an unstable version).
 
 vboxdrv could be a problem if I was using it, but I believe it shouldn't cause
 this if it is not being used... but it's just a guess.

I have some other information to had to my previous email.  However, I do not
know whether it is related with my first bug report.

It looks like ftrace may stop the CPUs in some situations and I have been
playing with ftrace for some time.  So, here's what I just did:  started ftrace
with function tracer and then started kvm.  I got ugly crashes and apparently
quite easy to reproduce (I get complete freeze or immediate reboot).

I did not investigated this issue and, again, it may not be related with my
initial report but there's definitely something wrong here, right?

(just to refresh, I am using 2.6.28-rc6-7-ged31348 in x86_64 machine)

-- 
Luis Henriques

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Steven Rostedt
[ added Ingo too ]

On Tue, 2008-12-02 at 19:46 +, Luis Henriques wrote:
 (I am CC'ing to Steven Rostedt since he might be interested on this)
 
 On Tue, Dec 02, 2008 at 07:09:14PM +, Luis Henriques wrote:

 I have some other information to had to my previous email.  However, I do not
 know whether it is related with my first bug report.
 
 It looks like ftrace may stop the CPUs in some situations and I have been
 playing with ftrace for some time.  So, here's what I just did:  started 
 ftrace
 with function tracer and then started kvm.  I got ugly crashes and apparently
 quite easy to reproduce (I get complete freeze or immediate reboot).
 
 I did not investigated this issue and, again, it may not be related with my
 initial report but there's definitely something wrong here, right?
 
 (just to refresh, I am using 2.6.28-rc6-7-ged31348 in x86_64 machine)
 

Hi,

ftrace only stops the CPUs on start up or shutdown of the function
tracer (i.e. echo function  /debugfs/tracing/current_tracer).  It does
not stop the CPUs at any other time.

Now what ftrace does do, is to call a tracing function at pretty much
every function call in the kernel. In most places this is fine, but
there are some cases that this can be an issue. For example, we can not
trace suspend and resume because on resume smp_processor_id() is
undefined, which ftrace uses.

The following must be available without recursion for the function
tracer to work:

  local_irq_save/restore
  smp_processor_id
  preempt_enable/disable_notrace
  atomic_inc/dec

There could be other things that might be causing the crash. Do you have
a crash dump and config available?

Thanks,

-- Steve


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Steven Rostedt wrote:

The following must be available without recursion for the function
tracer to work:

  local_irq_save/restore
  smp_processor_id
  preempt_enable/disable_notrace
  atomic_inc/dec
  


In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the 
vmrun instruction and the call to load_host_msrs() is executed without a 
live pda, so no smp_processor_id().  Could easily be fixed by 
rearranging things.



--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Steven Rostedt

On Tue, 2008-12-02 at 22:38 +0200, Avi Kivity wrote:
 Steven Rostedt wrote:
  The following must be available without recursion for the function
  tracer to work:
 
local_irq_save/restore
smp_processor_id
preempt_enable/disable_notrace
atomic_inc/dec

 
 In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the 
 vmrun instruction and the call to load_host_msrs() is executed without a 
 live pda, so no smp_processor_id().  Could easily be fixed by 
 rearranging things.

That would be best, but if you have trouble, you could surround the
trouble area with a: tracing_stop(); tracing_start(); That will prevent
tracing within those locations. The function tracer will still be
called, but it will exit the function without doing anything else.

Note: that stops tracing on all CPUS, not just the CPU that called it.

-- Steve


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Avi Kivity

Steven Rostedt wrote:

 

In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the 
vmrun instruction and the call to load_host_msrs() is executed without a 
live pda, so no smp_processor_id().  Could easily be fixed by 
rearranging things.



That would be best, but if you have trouble, you could surround the
trouble area with a: tracing_stop(); tracing_start(); That will prevent
tracing within those locations. The function tracer will still be
called, but it will exit the function without doing anything else.

Note: that stops tracing on all CPUS, not just the CPU that called it.
  


The vmrun instruction can execute for a long time (hours, if you have a 
dyntick kernel and no timers scheduled), so that's less than optimal.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Hangs

2008-12-02 Thread chris
On Tue, Dec 02, 2008 at 02:09:39PM +0200, Avi Kivity wrote:
 xming wrote:
 The same guest did it again.
 
 # uname -a
 Linux spaceball 2.6.27.6 #1 SMP Fri Nov 14 11:51:10 CET 2008 i686 QEMU
 Virtual CPU version 0.9.1 AuthenticAMD GNU/Linux
 
 # date
 Thu Dec 19 01:54:27 WET 1912
 
 # uptime
  01:54:29 up 14666 days, 21:17, 12 users,  load average: 3.99, 3.97, 3.91
 
 What can I do to provide more info?
   
 
 A way to reproduce would be best.  If you have access to multiple hosts, 
 try to isolate whether it happens only on amd or only on intel.
 
 -- 
 error compiling committee.c: too many arguments to function
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


I have a way to reproduce my instance of the problem easily now.   I was trying
to build a new kernel on my guest,  and found that depmod hangs guests every 
time. 
   In my case, I only have an amd processor - I don't have an intel 
host to try it on, right now,  but it happens on Ubuntu 8.04
and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships
with ubuntu 8.10.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348

2008-12-02 Thread Steven Rostedt



On Tue, 2 Dec 2008, Luis Henriques wrote:
 
 Unfortunately, I have only my laptop (where the crash is occuring) and no
 serial port on it (I am not able to get any output from the console).  Do you
 have any suggestion on how to collect information on the crash?  I can try to
 configure Kdump to capture more info.

It is probably caused by what Avi mentioned. I guess the best you can do 
is wait for a patch from Avi and try that out.

Thanks,

-- Steve

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] add ksm kernel shared memory driver.

2008-12-02 Thread Chris Wright
* Alan Cox ([EMAIL PROTECTED]) wrote:
 On Tue, 2 Dec 2008 10:07:24 -0800
 Chris Wright [EMAIL PROTECTED] wrote:
  * Alan Cox ([EMAIL PROTECTED]) wrote:
+   r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE);
+   mutex_unlock(sha1_lock);
+   if (r) {
+   char *old_addr, *new_addr;
+   old_addr = kmap_atomic(oldpage, KM_USER0);
+   new_addr = kmap_atomic(newpage, KM_USER1);
+   r = !memcmp(old_addr+PAGEHASH_LEN, 
new_addr+PAGEHASH_LEN,
+   PAGE_SIZE-PAGEHASH_LEN);
   
   NAK - this isn't guaranteed to be robust so you could end up merging
   different pages one provided by a malicious attacker.
  
  I presume you're referring to the digest comparison.  While there's
  theoretical concern of hash collision, it's mitigated by hmac(sha1)
  so the attacker can't brute force for known collisions.
 
 Using current known techniques. A random collision is just as bad news.

And, just to clarify, your concern would extend to any digest based
comparison?  Or are you specifically concerned about sha1?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] add ksm kernel shared memory driver.

2008-12-02 Thread Jonathan Corbet
On Tue, 2 Dec 2008 13:24:11 -0800
Chris Wright [EMAIL PROTECTED] wrote:

  Using current known techniques. A random collision is just as bad
  news.  
 
 And, just to clarify, your concern would extend to any digest based
 comparison?  Or are you specifically concerned about sha1?

Wouldn't this issue just go away if the code simply compared the full
pages, rather than skipping the hashed 128 bytes at the beginning?
Given the cost of this whole operation (which, it seems, can involve
copying one of the pages before testing for equality), skipping the
comparison of 128 bytes seems like a bit of a premature optimization.

jon
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3 of 6] kvm: ppc: directly insert shadow mappings into the hardware TLB

2008-12-02 Thread Hollis Blanchard
Formerly, we used to maintain a per-vcpu shadow TLB and on every entry to the
guest would load this array into the hardware TLB. This consumed 1280 bytes of
memory (64 entries of 16 bytes plus a struct page pointer each), and also
required some assembly to loop over the array on every entry.

Instead of saving a copy in memory, we can just store shadow mappings directly
into the hardware TLB, accepting that the host kernel will clobber these as
part of the normal 440 TLB round robin. When we do that we need less than half
the memory, and we have decreased the exit handling time for all guest exits,
at the cost of increased number of TLB misses because the host overwrites some
guest entries.

These savings will be increased on processors with larger TLBs or which
implement intelligent flush instructions like tlbivax (which will avoid the
need to walk arrays in software).

In addition to that and to the code simplification, we have a greater chance of
leaving other host userspace mappings in the TLB, instead of forcing all
subsequent tasks to re-fault all their mappings.

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/kvm_44x.h 
b/arch/powerpc/include/asm/kvm_44x.h
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -22,19 +22,25 @@
 
 #include linux/kvm_host.h
 
-/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
 #define PPC44x_TLB_SIZE 64
+
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+   struct page *page;
+   u16 gtlb_index;
+   u8 writeable;
+   u8 tid;
+};
 
 struct kvmppc_vcpu_44x {
/* Unmodified copy of the guest's TLB. */
-   struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE];
-   /* TLB that's actually used when the guest is running. */
-   struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-   /* Pages which are referenced in the shadow TLB. */
-   struct page *shadow_pages[PPC44x_TLB_SIZE];
+   struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
 
-   /* Track which TLB entries we've modified in the current exit. */
-   u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+   /* References to guest pages in the hardware TLB. */
+   struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
 
struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -53,7 +53,8 @@ extern void kvmppc_emulate_dec(struct kv
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-   u64 asid, u32 flags, u32 max_bytes);
+   u64 asid, u32 flags, u32 max_bytes,
+   unsigned int gtlb_idx);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -357,12 +357,6 @@ int main(void)
 #ifdef CONFIG_KVM
DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
 
-   DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu));
-   DEFINE(VCPU44x_SHADOW_TLB,
-  offsetof(struct kvmppc_vcpu_44x, shadow_tlb));
-   DEFINE(VCPU44x_SHADOW_MOD,
-  offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod));
-
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(s
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-   int i;
-
-   /* Mark every guest entry in the shadow TLB entry modified, so that they
-* will all be reloaded on the next vcpu run (instead of being
-* demand-faulted). */
-   for (i = 0; i = tlb_44x_hwater; i++)
-   kvmppc_tlbe_set_modified(vcpu, i);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-   /* Don't leave guest TLB entries resident when being de-scheduled. */
-   /* XXX It would be nice to differentiate between heavyweight exit and
-* sched_out here, since we could avoid the TLB flush for heavyweight
-* exits. */
+   /* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any 
TLB
+* entries around when we're descheduled, so we must completely flush 
the
+* TLB of all guest mappings. On the other 

[PATCH 4 of 6] kvm: ppc: save and restore guest mappings on context switch

2008-12-02 Thread Hollis Blanchard
Store shadow TLB entries in memory, but only use it on host context switch
(instead of every guest entry). This improves performance for most workloads on
440 by reducing the guest TLB miss rate.

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/kvm_44x.h 
b/arch/powerpc/include/asm/kvm_44x.h
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -42,6 +42,10 @@ struct kvmppc_vcpu_44x {
/* References to guest pages in the hardware TLB. */
struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
 
+   /* State of the shadow TLB at guest context switch time. */
+   struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+   u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
struct kvm_vcpu vcpu;
 };
 
@@ -51,5 +55,7 @@ static inline struct kvmppc_vcpu_44x *to
 }
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
 
 #endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -96,15 +96,12 @@ void kvmppc_core_load_guest_debugstate(s
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+   kvmppc_44x_tlb_load(vcpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-   /* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any 
TLB
-* entries around when we're descheduled, so we must completely flush 
the
-* TLB of all guest mappings. On the other hand, if there is only one
-* guest, this flush is completely unnecessary. */
-   _tlbia();
+   kvmppc_44x_tlb_put(vcpu);
 }
 
 int kvmppc_core_check_processor_compat(void)
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -73,6 +73,25 @@ static inline void kvmppc_44x_tlbie(unsi
);
 }
 
+static inline void kvmppc_44x_tlbre(unsigned int index,
+struct kvmppc_44x_tlbe *tlbe)
+{
+   asm volatile(
+   tlbre %[word0], %[index], 0\n
+   mfspr %[tid], %[sprn_mmucr]\n
+   andi. %[tid], %[tid], 0xff\n
+   tlbre %[word1], %[index], 1\n
+   tlbre %[word2], %[index], 2\n
+   : [word0] =r(tlbe-word0),
+ [word1] =r(tlbe-word1),
+ [word2] =r(tlbe-word2),
+ [tid]   =r(tlbe-tid)
+   : [index] r(index),
+ [sprn_mmucr] i(SPRN_MMUCR)
+   : cc
+   );
+}
+
 static inline void kvmppc_44x_tlbwe(unsigned int index,
 struct kvmppc_44x_tlbe *stlbe)
 {
@@ -115,6 +134,44 @@ static u32 kvmppc_44x_tlb_shadow_attrib(
 
return attrib;
 }
+
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+   struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+   int i;
+
+   for (i = 0; i = tlb_44x_hwater; i++) {
+   struct kvmppc_44x_tlbe *stlbe = vcpu_44x-shadow_tlb[i];
+
+   if (get_tlb_v(stlbe)  get_tlb_ts(stlbe))
+   kvmppc_44x_tlbwe(i, stlbe);
+   }
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int i)
+{
+   vcpu_44x-shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+   struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+   int i;
+
+   for (i = 0; i = tlb_44x_hwater; i++) {
+   struct kvmppc_44x_tlbe *stlbe = vcpu_44x-shadow_tlb[i];
+
+   if (vcpu_44x-shadow_tlb_mod[i])
+   kvmppc_44x_tlbre(i, stlbe);
+
+   if (get_tlb_v(stlbe)  get_tlb_ts(stlbe))
+   kvmppc_44x_tlbie(i);
+   }
+}
+
 
 /* Search the guest TLB for a matching entry. */
 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
@@ -283,6 +340,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp
ref-tid = stlbe.tid;
 
/* Insert shadow mapping into hardware TLB. */
+   kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
kvmppc_44x_tlbwe(victim, stlbe);
KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, 
stlbe.word1,
stlbe.word2, handler);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2 of 6] powerpc/44x: declare tlb_44x_index for use in C code

2008-12-02 Thread Hollis Blanchard
KVM currently ignores the host's round robin TLB eviction selection, instead
maintaining its own TLB state and its own round robin index. However, by
participating in the normal 44x TLB selection, we can drop the alternate TLB
processing in KVM. This results in a significant performance improvement,
since that processing currently must be done on *every* guest exit.

Accordingly, KVM needs to be able to access and increment tlb_44x_index.
(KVM on 440 cannot be a module, so there is no need to export this symbol.)

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
Acked-by: Josh Boyer [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/mmu-44x.h 
b/arch/powerpc/include/asm/mmu-44x.h
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
 
 typedef struct {
unsigned long id;
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1 of 6] kvm: ppc: support large host pages

2008-12-02 Thread Hollis Blanchard
KVM on 440 has always been able to handle large guest mappings with 4K host
pages -- we must, since the guest kernel uses 256MB mappings.

This patch makes KVM work when the host has large pages too (tested with 64K).

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,8 +52,8 @@ extern int kvmppc_emulate_mmio(struct kv
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
-   u64 asid, u32 flags);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+   u64 asid, u32 flags, u32 max_bytes);
 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -27,6 +27,13 @@
 #include asm/kvm_44x.h
 
 #include 44x_tlb.h
+
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE   PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (112)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
 
 #define PPC44x_TLB_UATTR_MASK \
(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
@@ -179,15 +186,26 @@ void kvmppc_tlbe_set_modified(struct kvm
vcpu_44x-shadow_tlb_mod[i] = 1;
 }
 
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
-u32 flags)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+u32 flags, u32 max_bytes)
 {
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct page *new_page;
struct kvmppc_44x_tlbe *stlbe;
hpa_t hpaddr;
+   gfn_t gfn;
unsigned int victim;
 
/* Future optimization: don't overwrite the TLB entry containing the
@@ -198,6 +216,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp
stlbe = vcpu_44x-shadow_tlb[victim];
 
/* Get reference to new page. */
+   gfn = gpaddr  PAGE_SHIFT;
new_page = gfn_to_page(vcpu-kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR Couldn't get guest page for gfn %lx!\n, gfn);
@@ -220,10 +239,25 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp
stlbe-tid = !(asid  0xff);
 
/* Force TS=1 for all guest mappings. */
-   /* For now we hardcode 4KB mappings, but it will be important to
-* use host large pages in the future. */
-   stlbe-word0 = (gvaddr  PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
-  | PPC44x_TLB_4K;
+   stlbe-word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+   if (max_bytes = PAGE_SIZE) {
+   /* Guest mapping is larger than or equal to host page size. We 
can use
+* a native host mapping. */
+   stlbe-word0 |= (gvaddr  PAGE_MASK) | PPC44x_TLBE_SIZE;
+   } else {
+   /* Guest mapping is smaller than host page size. We must 
restrict the
+* size of the mapping to be at most the smaller of the two, 
but for
+* simplicity we fall back to a 4K mapping (this is probably 
what the
+* guest is using anyways). */
+   stlbe-word0 |= (gvaddr  PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+   /* 'hpaddr' is a host page, which is larger than the mapping 
we're
+* inserting here. To compensate, we must add the in-page 
offset to the
+* sub-page. */
+   hpaddr |= gpaddr  (PAGE_MASK ^ PAGE_MASK_4K);
+   }
+
stlbe-word1 = (hpaddr  0xfc00) | ((hpaddr  32)  0xf);
stlbe-word2 = kvmppc_44x_tlb_shadow_attrib(flags,
vcpu-arch.msr  MSR_PR);
@@ -322,10 +356,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcp
 int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
 {
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-   u64 eaddr;
-   u64 raddr;
+   gva_t eaddr;
u64 asid;
-   u32 flags;
struct kvmppc_44x_tlbe *tlbe;
unsigned int index;
 
@@ -364,15 +396,22 @@ int 

[PATCH 6 of 6] kvm: ppc: mostly cosmetic updates to the exit timing accounting code

2008-12-02 Thread Hollis Blanchard
The only significant changes were to kvmppc_exit_timing_write() and
kvmppc_exit_timing_show(), both of which were dramatically simplified.

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -102,9 +102,8 @@ enum kvm_exit_types {
__NUMBER_OF_KVM_EXIT_TYPES
 };
 
-#ifdef CONFIG_KVM_EXIT_TIMING
 /* allow access to big endian 32bit upper/lower parts and 64bit var */
-struct exit_timing {
+struct kvmppc_exit_timing {
union {
u64 tv64;
struct {
@@ -112,7 +111,6 @@ struct exit_timing {
} tv32;
};
 };
-#endif
 
 struct kvm_arch {
 };
@@ -174,8 +172,8 @@ struct kvm_vcpu_arch {
u32 dbcr1;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
-   struct exit_timing timing_exit;
-   struct exit_timing timing_last_enter;
+   struct kvmppc_exit_timing timing_exit;
+   struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;
u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -132,7 +132,7 @@ int kvmppc_core_emulate_op(struct kvm_ru
run-dcr.is_write = 0;
vcpu-arch.io_gpr = rt;
vcpu-arch.dcr_needed = 1;
-   account_exit(vcpu, DCR_EXITS);
+   kvmppc_account_exit(vcpu, DCR_EXITS);
emulated = EMULATE_DO_DCR;
}
 
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_ru
run-dcr.data = vcpu-arch.gpr[rs];
run-dcr.is_write = 1;
vcpu-arch.dcr_needed = 1;
-   account_exit(vcpu, DCR_EXITS);
+   kvmppc_account_exit(vcpu, DCR_EXITS);
emulated = EMULATE_DO_DCR;
}
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -202,7 +202,7 @@ int kvmppc_handle_exit(struct kvm_run *r
break;
 
case BOOKE_INTERRUPT_EXTERNAL:
-   account_exit(vcpu, EXT_INTR_EXITS);
+   kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
if (need_resched())
cond_resched();
r = RESUME_GUEST;
@@ -212,7 +212,7 @@ int kvmppc_handle_exit(struct kvm_run *r
/* Since we switched IVPR back to the host's value, the host
 * handled this interrupt the moment we enabled interrupts.
 * Now we just offer it a chance to reschedule the guest. */
-   account_exit(vcpu, DEC_EXITS);
+   kvmppc_account_exit(vcpu, DEC_EXITS);
if (need_resched())
cond_resched();
r = RESUME_GUEST;
@@ -225,7 +225,7 @@ int kvmppc_handle_exit(struct kvm_run *r
vcpu-arch.esr = vcpu-arch.fault_esr;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
r = RESUME_GUEST;
-   account_exit(vcpu, USR_PR_INST);
+   kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
 
@@ -233,7 +233,7 @@ int kvmppc_handle_exit(struct kvm_run *r
switch (er) {
case EMULATE_DONE:
/* don't overwrite subtypes, just account kvm_stats */
-   account_exit_stat(vcpu, EMULATED_INST_EXITS);
+   kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
/* Future optimization: only reload non-volatiles if
 * they were actually modified by emulation. */
r = RESUME_GUEST_NV;
@@ -259,7 +259,7 @@ int kvmppc_handle_exit(struct kvm_run *r
 
case BOOKE_INTERRUPT_FP_UNAVAIL:
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
-   account_exit(vcpu, FP_UNAVAIL);
+   kvmppc_account_exit(vcpu, FP_UNAVAIL);
r = RESUME_GUEST;
break;
 
@@ -267,20 +267,20 @@ int kvmppc_handle_exit(struct kvm_run *r
vcpu-arch.dear = vcpu-arch.fault_dear;
vcpu-arch.esr = vcpu-arch.fault_esr;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
-   account_exit(vcpu, DSI_EXITS);
+   kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
 
case BOOKE_INTERRUPT_INST_STORAGE:

[PATCH 0 of 6] PowerPC KVM patches for 2.6.29

2008-12-02 Thread Hollis Blanchard
Hi Avi, here's the latest batch of PowerPC kernel patches.

The first set dramatically improve performance. Most importantly, we add
support for large host pages with KVM (i.e. PAGE_SHIFT  12). (Large *guest*
pages have already been supported since day 1, since the guest kernel uses
them for the linear map.) Followup patches further improve performance by
changing how we manage the shadow TLB.

The last two add some accounting code to easily discover performance
bottlenecks. This is especially important since the 440 core lacks performance
monitoring hardware.

These patches, in conjunction with 64KB pages on guest and host, get us to 96%
of native performance for compute-bound workloads, which I'm pretty happy
with. See http://kvm.qumranet.com/kvmwiki/PowerPC_Exittimings for more
details (those statistics were gathered using the accounting patches).

These have been tested pretty thoroughly for several weeks. Please apply for
2.6.29. Thanks!

-Hollis
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5 of 6] kvm: ppc: Implement in-kernel exit timing statistics

2008-12-02 Thread Hollis Blanchard
Existing KVM statistics are either just counters (kvm_stat) reported for
KVM generally or trace based aproaches like kvm_trace.
For KVM on powerpc we had the need to track the timings of the different exit
types. While this could be achieved parsing data created with a kvm_trace
extension this adds too much overhead (at least on embedded PowerPC) slowing
down the workloads we wanted to measure.

Therefore this patch adds a in-kernel exit timing statistic to the powerpc kvm
code. These statistic is available per vmvcpu under the kvm debugfs directory.
As this statistic is low, but still some overhead it can be enabled via a
.config entry and should be off by default.

Since this patch touched all powerpc kvm_stat code anyway this code is now
merged and simplified together with the exit timing statistic code (still
working with exit timing disabled in .config).

Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED]
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -71,6 +71,49 @@ struct kvmppc_44x_tlbe {
u32 word2;
 };
 
+enum kvm_exit_types {
+   MMIO_EXITS,
+   DCR_EXITS,
+   SIGNAL_EXITS,
+   ITLB_REAL_MISS_EXITS,
+   ITLB_VIRT_MISS_EXITS,
+   DTLB_REAL_MISS_EXITS,
+   DTLB_VIRT_MISS_EXITS,
+   SYSCALL_EXITS,
+   ISI_EXITS,
+   DSI_EXITS,
+   EMULATED_INST_EXITS,
+   EMULATED_MTMSRWE_EXITS,
+   EMULATED_WRTEE_EXITS,
+   EMULATED_MTSPR_EXITS,
+   EMULATED_MFSPR_EXITS,
+   EMULATED_MTMSR_EXITS,
+   EMULATED_MFMSR_EXITS,
+   EMULATED_TLBSX_EXITS,
+   EMULATED_TLBWE_EXITS,
+   EMULATED_RFI_EXITS,
+   DEC_EXITS,
+   EXT_INTR_EXITS,
+   HALT_WAKEUP,
+   USR_PR_INST,
+   FP_UNAVAIL,
+   DEBUG_EXITS,
+   TIMEINGUEST,
+   __NUMBER_OF_KVM_EXIT_TYPES
+};
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct exit_timing {
+   union {
+   u64 tv64;
+   struct {
+   u32 tbu, tbl;
+   } tv32;
+   };
+};
+#endif
+
 struct kvm_arch {
 };
 
@@ -130,6 +173,19 @@ struct kvm_vcpu_arch {
u32 dbcr0;
u32 dbcr1;
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+   struct exit_timing timing_exit;
+   struct exit_timing timing_last_enter;
+   u32 last_exit_type;
+   u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+   u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+   u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+   u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+   u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+   u64 timing_last_exit;
+   struct dentry *debugfs_exit_timing;
+#endif
+
u32 last_inst;
ulong fault_dear;
ulong fault_esr;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -381,5 +381,16 @@ int main(void)
DEFINE(PTE_SHIFT, PTE_SHIFT);
 #endif
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+   DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+   arch.timing_exit.tv32.tbu));
+   DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+   arch.timing_exit.tv32.tbl));
+   DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+   arch.timing_last_enter.tv32.tbu));
+   DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+   arch.timing_last_enter.tv32.tbl));
+#endif
+
return 0;
 }
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -22,6 +22,7 @@
 #include asm/dcr-regs.h
 #include asm/disassemble.h
 #include asm/kvm_44x.h
+#include timing.h
 
 #include booke.h
 #include 44x_tlb.h
@@ -58,11 +59,11 @@ int kvmppc_core_emulate_op(struct kvm_ru
int ws;
 
switch (get_op(inst)) {
-
case OP_RFI:
switch (get_xop(inst)) {
case XOP_RFI:
kvmppc_emul_rfi(vcpu);
+   kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
*advance = 0;
break;
 
@@ -78,10 +79,12 @@ int kvmppc_core_emulate_op(struct kvm_ru
case XOP_MFMSR:
rt = get_rt(inst);
vcpu-arch.gpr[rt] = vcpu-arch.msr;
+   kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
 
case XOP_MTMSR:
rs = get_rs(inst);
+   kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
  

Re: [BUG] virtio-pci queue allocation not page-aligned

2008-12-02 Thread Rusty Russell
On Wednesday 03 December 2008 05:38:21 Hollis Blanchard wrote:
 I just spent a number of hours tracking this one down, and I'm not too
 thrilled about it. vp_find_vq() does the memory allocation for virtio
 PCI rings, and it uses kzalloc() to do it. This is bad because the ring
 memory *must* be page-aligned.

 According to Anthony, at the time this code was written, various slab
 allocators were checked and all happened to return page-aligned buffers.
 So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while
 investigating an unrelated problem, which offset the address by 64
 bytes.

 One option is to add a BUG_ON(addr  ~PAGE_MASK) to vp_find_vq(). That's
 better than nothing, but still stinks.

It's a bug, we fix it.  I've complained before, but since there was no 
evidence of it actually breaking, I didn't push.

Prepare a patch, I'll try to get it in this release.

Thanks,
Rusty.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] add ksm kernel shared memory driver.

2008-12-02 Thread Alan Cox
On Tue, 2 Dec 2008 13:24:11 -0800
Chris Wright [EMAIL PROTECTED] wrote:

 * Alan Cox ([EMAIL PROTECTED]) wrote:
  On Tue, 2 Dec 2008 10:07:24 -0800
  Chris Wright [EMAIL PROTECTED] wrote:
   * Alan Cox ([EMAIL PROTECTED]) wrote:
 + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE);
 + mutex_unlock(sha1_lock);
 + if (r) {
 + char *old_addr, *new_addr;
 + old_addr = kmap_atomic(oldpage, KM_USER0);
 + new_addr = kmap_atomic(newpage, KM_USER1);
 + r = !memcmp(old_addr+PAGEHASH_LEN, 
 new_addr+PAGEHASH_LEN,
 + PAGE_SIZE-PAGEHASH_LEN);

NAK - this isn't guaranteed to be robust so you could end up merging
different pages one provided by a malicious attacker.
   
   I presume you're referring to the digest comparison.  While there's
   theoretical concern of hash collision, it's mitigated by hmac(sha1)
   so the attacker can't brute force for known collisions.
  
  Using current known techniques. A random collision is just as bad news.
 
 And, just to clarify, your concern would extend to any digest based
 comparison?  Or are you specifically concerned about sha1?

Taken off list 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] virtio-pci queue allocation not page-aligned

2008-12-02 Thread Hollis Blanchard
On Wed, 2008-12-03 at 08:35 +1030, Rusty Russell wrote:
 On Wednesday 03 December 2008 05:38:21 Hollis Blanchard wrote:
  I just spent a number of hours tracking this one down, and I'm not too
  thrilled about it. vp_find_vq() does the memory allocation for virtio
  PCI rings, and it uses kzalloc() to do it. This is bad because the ring
  memory *must* be page-aligned.
 
  According to Anthony, at the time this code was written, various slab
  allocators were checked and all happened to return page-aligned buffers.
  So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while
  investigating an unrelated problem, which offset the address by 64
  bytes.
 
  One option is to add a BUG_ON(addr  ~PAGE_MASK) to vp_find_vq(). That's
  better than nothing, but still stinks.
 
 It's a bug, we fix it.  I've complained before, but since there was no 
 evidence of it actually breaking, I didn't push.
 
 Prepare a patch, I'll try to get it in this release.

virtio: ring queues must be page-aligned

kzalloc() does not guarantee page alignment, and in fact this broke when
I enabled CONFIG_SLUB_DEBUG_ON.

Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
---
Tested with virtio-blk root filesystem.

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -237,7 +237,8 @@ static struct virtqueue *vp_find_vq(stru
info-queue_index = index;
info-num = num;
 
-   info-queue = kzalloc(PAGE_ALIGN(vring_size(num)), GFP_KERNEL);
+   info-queue = alloc_pages_exact(PAGE_ALIGN(vring_size(num)),
+   GFP_KERNEL|__GFP_ZERO);
if (info-queue == NULL) {
err = -ENOMEM;
goto out_info;


-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Hangs

2008-12-02 Thread xming
 I have a way to reproduce my instance of the problem easily now.   I was 
 trying
 to build a new kernel on my guest,  and found that depmod hangs guests every
 time.
   In my case, I only have an amd processor - I don't have an intel
 host to try it on, right now,  but it happens on Ubuntu 8.04
 and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships
 with ubuntu 8.10.

I have AMD too, vanilla kernel 2.6.27.6 and kvm-79 (although I have
this before 79).
the guest is SMP (UP guests hang too but less frequent).

depmod does not hang here (not reproducible). Heavy CPU + heavy IO on nfs mounts
triggers this on my side.

I have a very subjective feeling that it happens more frequently when
the host has
less uptime (freshly rebooted).
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Hangs

2008-12-02 Thread chris
Sounds like your configuration is very similar to mine.  
I'm also on a vanilla kernel (2.6.27.7 in my case) with kvm-79 and 
AMD processors.

You sparked my curiosity on the depmod -a issue, so I spent some time trying
it on different configurations.

I have two servers:
  * 1.8GHz AMD Opteron 2210 on an HP DL385G2,  AMD Opteron 2210
  * 2.5GHz AMD Athlon 4850e on a green build w/ a Gigabyte GA-MA74Gm-S2 

I can reproduce it on the Gigagbyte build with ease.

On the HP DL385G2 server, I could reproduce it on one of my guests but only
once in every four times (and with multiple guests running, not sure if that
made a difference).

Like you, I also have had the feeling that my occassional hangs were more 
likely on a freshly reboot, but don't have anything real conclusive to prove it.

I'm afraid I don't have an Intel-based server around right now to see if this
is an AMD-only issue.  I might be able to scrounge up an HP DL380G5 
(with an Intel Core 2) but I'm not sure.

Chris


On Wed, Dec 03, 2008 at 12:01:32AM +0100, xming wrote:
  I have a way to reproduce my instance of the problem easily now.   I was 
  trying
  to build a new kernel on my guest,  and found that depmod hangs guests every
  time.
In my case, I only have an amd processor - I don't have an intel
  host to try it on, right now,  but it happens on Ubuntu 8.04
  and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships
  with ubuntu 8.10.
 
 I have AMD too, vanilla kernel 2.6.27.6 and kvm-79 (although I have
 this before 79).
 the guest is SMP (UP guests hang too but less frequent).
 
 depmod does not hang here (not reproducible). Heavy CPU + heavy IO on nfs 
 mounts
 triggers this on my side.
 
 I have a very subjective feeling that it happens more frequently when
 the host has
 less uptime (freshly rebooted).
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 0/3 resend] introduction

2008-12-02 Thread Jeff Kirsher
On Tue, Dec 2, 2008 at 1:27 AM, Yu Zhao [EMAIL PROTECTED] wrote:
 SR-IOV drivers of Intel 82576 NIC are available. There are two parts
 of the drivers: Physical Function driver and Virtual Function driver.
 The PF driver is based on the IGB driver and is used to control PF to
 allocate hardware specific resources and interface with the SR-IOV core.
 The VF driver is a new NIC driver that is same as the traditional PCI
 device driver. It works in both the host and the guest (Xen and KVM)
 environment.

 These two drivers are testing versions and they are *only* intended to
 show how to use SR-IOV API.

 Intel 82576 NIC specification can be found at:
 http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf

 [SR-IOV driver example 0/3 resend] introduction
 [SR-IOV driver example 1/3 resend] PF driver: hardware specific operations
 [SR-IOV driver example 2/3 resend] PF driver: integrate with SR-IOV core
 [SR-IOV driver example 3/3 resend] VF driver: an independent PCI NIC driver
 --


First of all, we (e1000-devel) do support the SR-IOV API.

With that said, NAK on the driver changes.  We were not involved in
these changes and are currently working on a version of the drivers
that will make them acceptable for kernel inclusion.

-- 
Cheers,
Jeff
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] Kvm: Qemu: save nvram

2008-12-02 Thread Zhang, Xiantao
Daniel P. Berrange wrote:
 On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote:
 Daniel P. Berrange wrote:
 On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote:
 
 This patch to save the nvram. It save the nvram by specify the arg
 of -name.And the saved file named by the arg. If do not specify
 the arg, it will not save the nvram 
 
 
 I think we might be better off having an explicit command line arg
 for nvram path rather than hardcoding the directory, because there
 may well be times where you want to have nvram saved, but don't
 want to specify -name, and vica-verca. 
 
  -nvram foo.data

If acceptable for upstream, it should be the best choice.  

 could prepend a default directory of $localstatedir/lib/qemu/nvram,
 where $localstatedir  is set from 'configure' script, or
 
  -nvram /some/path/foo.data
 
 would use the explicit path given.
 
 
 I prefer current directory if relative path is given.  Since we
 encourage running qemu as an unprivileged user, and we don't want a
 world-writable directory, each user will have to provide a private
 storage location.
 
 Fine by me - avoids needing to embed any path in QEMU code at all then

So we don't need a default path ? 
Yang, could you address Daniel and Avi's comments in next version ? 

Xiantao

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)

2008-12-02 Thread Greg KH
On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
 Hi,
 
 this patch series makes the current KVM device passthrough code generic
 enough so that other IOMMU implementation can also plug into this code.
 It works by factoring the functions Vt-d code exports to KVM into a
 generic interface which allows different backends.
 
 This is the second version of the patchset. The most important change to
 the previous version is that this patchset was rebased to the improved
 API from Han Weidong which supports multiple devices per IOMMU domain.
 
 For completeness, this series also includes the patches from Han with
 some cleanups. So this patchset can be applied on current avi/master
 tree.

Have you tried porting any of the current iommu controllers to this new
framework to see if it works properly for them?

thanks,

greg k-h
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] Kvm: Qemu: save nvram

2008-12-02 Thread Zhang, Yang
Ok, I will save it in current directory and don't read nvram from file, if 
don't specify -nvram.

Best Regards
--yang
-Original Message-
From: Zhang, Xiantao 
Sent: 2008年12月3日 14:02
To: Daniel P. Berrange; Avi Kivity
Cc: Zhang, Yang; [EMAIL PROTECTED]; kvm@vger.kernel.org
Subject: RE: [PATCH] Kvm: Qemu: save nvram

Daniel P. Berrange wrote:
 On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote:
 Daniel P. Berrange wrote:
 On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote:
 
 This patch to save the nvram. It save the nvram by specify the arg
 of -name.And the saved file named by the arg. If do not specify
 the arg, it will not save the nvram 
 
 
 I think we might be better off having an explicit command line arg
 for nvram path rather than hardcoding the directory, because there
 may well be times where you want to have nvram saved, but don't
 want to specify -name, and vica-verca. 
 
  -nvram foo.data

If acceptable for upstream, it should be the best choice.  

 could prepend a default directory of $localstatedir/lib/qemu/nvram,
 where $localstatedir  is set from 'configure' script, or
 
  -nvram /some/path/foo.data
 
 would use the explicit path given.
 
 
 I prefer current directory if relative path is given.  Since we
 encourage running qemu as an unprivileged user, and we don't want a
 world-writable directory, each user will have to provide a private
 storage location.
 
 Fine by me - avoids needing to embed any path in QEMU code at all then

So we don't need a default path ? 
Yang, could you address Daniel and Avi's comments in next version ? 

Xiantao



[PATCH] register page alignment memory for MMIO of assigned device

2008-12-02 Thread Han, Weidong
MMIO of assigned device is registered as memory slot. Size of memory slot in 
KVM must be page size multiple. But MMIO of some devices (e.g. EHCI controller) 
is not page size mutiple, so it fails to register these MMIOs, thus device 
assignment fails. In order to solve it, need to register target page alignment 
memory for these MMIOs.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 qemu/hw/device-assignment.c |   12 +---
 1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index eaff09e..4a38a22 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -157,12 +157,18 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, 
int region_num,
 region-e_physbase = e_phys;
 region-e_size = e_size;
 
-if (!first_map)
-   kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize);
+if (!first_map) {
+int slot = get_slot(old_ephys);
+if (slot != -1)
+   kvm_destroy_phys_mem(kvm_context, old_ephys,
+ TARGET_PAGE_ALIGN(old_esize));
+}
 
 if (e_size  0)
ret = kvm_register_phys_mem(kvm_context, e_phys,
-region-u.r_virtbase, e_size, 0);
+region-u.r_virtbase,
+TARGET_PAGE_ALIGN(e_size), 0);
+
 if (ret != 0) {
fprintf(stderr, %s: Error: create new mapping failed\n, __func__);
exit(1);
-- 
1.5.1


0001-register-page-alignment-memory-for-MMIO-of-assigned.patch
Description: 0001-register-page-alignment-memory-for-MMIO-of-assigned.patch


Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)

2008-12-02 Thread Joerg Roedel
On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote:
 On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
  Hi,
  
  this patch series makes the current KVM device passthrough code generic
  enough so that other IOMMU implementation can also plug into this code.
  It works by factoring the functions Vt-d code exports to KVM into a
  generic interface which allows different backends.
  
  This is the second version of the patchset. The most important change to
  the previous version is that this patchset was rebased to the improved
  API from Han Weidong which supports multiple devices per IOMMU domain.
  
  For completeness, this series also includes the patches from Han with
  some cleanups. So this patchset can be applied on current avi/master
  tree.
 
 Have you tried porting any of the current iommu controllers to this new
 framework to see if it works properly for them?

It works currently for VT-d. I also port it to AMD IOMMU currently. With
some extensions (offset for start address, flags and size limitation) it
is also suitable for IOMMUs like GART or similar ones.

Joerg
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)

2008-12-02 Thread Greg KH
On Wed, Dec 03, 2008 at 08:50:49AM +0100, Joerg Roedel wrote:
 On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote:
  On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote:
   Hi,
   
   this patch series makes the current KVM device passthrough code generic
   enough so that other IOMMU implementation can also plug into this code.
   It works by factoring the functions Vt-d code exports to KVM into a
   generic interface which allows different backends.
   
   This is the second version of the patchset. The most important change to
   the previous version is that this patchset was rebased to the improved
   API from Han Weidong which supports multiple devices per IOMMU domain.
   
   For completeness, this series also includes the patches from Han with
   some cleanups. So this patchset can be applied on current avi/master
   tree.
  
  Have you tried porting any of the current iommu controllers to this new
  framework to see if it works properly for them?
 
 It works currently for VT-d. I also port it to AMD IOMMU currently. With
 some extensions (offset for start address, flags and size limitation) it
 is also suitable for IOMMUs like GART or similar ones.

What about the Calgary chipset?

thanks,

gerg k-h
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] qemu: ppc: fix build warnings

2008-12-02 Thread Hollis Blanchard
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
---
 qemu/hw/device_tree.c |   14 +++---
 qemu/hw/device_tree.h |   12 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/qemu/hw/device_tree.c b/qemu/hw/device_tree.c
index e73129d..2621ff1 100644
--- a/qemu/hw/device_tree.c
+++ b/qemu/hw/device_tree.c
@@ -31,7 +31,7 @@
 /* This function reads device-tree property files that are of
  * a single cell size
  */
-uint32_t read_proc_dt_prop_cell(char *path_in_device_tree)
+uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree)
 {
char *buf = NULL;
int i;
@@ -65,7 +65,7 @@ uint32_t read_proc_dt_prop_cell(char *path_in_device_tree)
 
 #ifdef CONFIG_LIBFDT
 /* support functions */
-static int get_offset_of_node(void *fdt, char *node_path)
+static int get_offset_of_node(void *fdt, const char *node_path)
 {
int node_offset;
node_offset = fdt_path_offset(fdt, node_path);
@@ -78,7 +78,7 @@ static int get_offset_of_node(void *fdt, char *node_path)
 }
 
 /* public functions */
-void *load_device_tree(char *filename_path, unsigned long load_addr)
+void *load_device_tree(const char *filename_path, unsigned long load_addr)
 {
int dt_file_size;
int dt_file_load_size;
@@ -134,7 +134,7 @@ fail:
return NULL;
 }
 
-void dump_device_tree_to_file(void *fdt, char *filename)
+void dump_device_tree_to_file(void *fdt, const char *filename)
 {
int fd;
fd = open(filename, O_RDWR|O_CREAT, O_RDWR);
@@ -148,7 +148,7 @@ void dump_device_tree_to_file(void *fdt, char *filename)
close(fd);
 }
 
-void dt_cell(void *fdt, char *node_path, char *property,
+void dt_cell(void *fdt, const char *node_path, const char *property,
uint32_t val)
 {
int offset;
@@ -163,7 +163,7 @@ void dt_cell(void *fdt, char *node_path, char *property,
 }
 
 /* This function is to manipulate a cell with multiple values */
-void dt_cell_multi(void *fdt, char *node_path, char *property,
+void dt_cell_multi(void *fdt, const char *node_path, const char *property,
uint32_t *val_array, int size)
 {
int offset;
@@ -177,7 +177,7 @@ void dt_cell_multi(void *fdt, char *node_path, char 
*property,
}
 }
 
-void dt_string(void *fdt, char *node_path, char *property,
+void dt_string(void *fdt, const char *node_path, const char *property,
char *string)
 {
int offset;
diff --git a/qemu/hw/device_tree.h b/qemu/hw/device_tree.h
index 05a81ef..a311309 100644
--- a/qemu/hw/device_tree.h
+++ b/qemu/hw/device_tree.h
@@ -11,16 +11,16 @@
  */
 
 /* device-tree proc support functions */
-uint32_t read_proc_dt_prop_cell(char *path_in_device_tree);
+uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree);
 
 #ifdef CONFIG_LIBFDT
 /* device tree functions */
-void *load_device_tree(char *filename_path, target_ulong load_addr);
-void dump_device_tree_to_file(void *fdt, char *filename);
-void dt_cell(void *fdt, char *node_path, char *property,
+void *load_device_tree(const char *filename_path, target_ulong load_addr);
+void dump_device_tree_to_file(void *fdt, const char *filename);
+void dt_cell(void *fdt, const char *node_path, const char *property,
uint32_t val);
-void dt_cell_multi(void *fdt, char *node_path, char *property,
+void dt_cell_multi(void *fdt, const char *node_path, const char *property,
uint32_t *val_array, int size);
-void dt_string(void *fdt, char *node_path, char *property,
+void dt_string(void *fdt, const char *node_path, const char *property,
char *string);
 #endif
-- 
1.5.6.5

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] qemu: ppc: fix build after qemu upstream changes

2008-12-02 Thread Hollis Blanchard
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED]
---
 qemu/hw/ppc440_bamboo.c |   37 +
 1 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c
index bf42245..79e4ea8 100644
--- a/qemu/hw/ppc440_bamboo.c
+++ b/qemu/hw/ppc440_bamboo.c
@@ -38,13 +38,14 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
qemu_irq *pic;
ppc4xx_pci_t *pci;
CPUState *env;
-   uint64_t ep=0;
-   uint64_t la=0;
-   int is_linux=1; /* Will assume allways is Linux for now */
-   target_long kernel_size=0;
-   target_ulong initrd_base=0;
-   target_long initrd_size=0;
-   target_ulong dt_base=0;
+uint64_t elf_entry;
+uint64_t elf_lowaddr;
+   target_ulong entry = 0;
+   target_ulong loadaddr = 0;
+   target_long kernel_size = 0;
+   target_ulong initrd_base = 0;
+   target_long initrd_size = 0;
+   target_ulong dt_base = 0;
void *fdt;
int ret;
int ram_stick_sizes[] = {25620, 12820, 6420,
@@ -105,20 +106,24 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
 
/* load kernel with uboot loader */
printf(%s: load kernel\n, __func__);
-   ret = load_uimage(kernel_filename, ep, la, kernel_size, is_linux);
-   if (ret  0)
-   ret = load_elf(kernel_filename, 0, ep, la, NULL);
-
-   if (ret  0) {
+   kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL);
+   if (kernel_size  0) {
+   kernel_size = load_elf(kernel_filename, 0, elf_entry, 
elf_lowaddr,
+  NULL);
+entry = elf_entry;
+loadaddr = elf_lowaddr;
+}
+
+   if (kernel_size  0) {
fprintf(stderr, qemu: could not load kernel '%s'\n,
kernel_filename);
exit(1);
}
-   printf(kernel is at guest address: 0x%lx\n, (unsigned long)la);
+   printf(kernel is at guest address: 0x%lx\n, (unsigned long)loadaddr);
 
/* load initrd */
if (initrd_filename) {
-   initrd_base = kernel_size + la;
+   initrd_base = kernel_size + loadaddr;
printf(%s: load initrd\n, __func__);
initrd_size = load_image(initrd_filename,
phys_ram_base + initrd_base);
@@ -156,7 +161,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
if (initrd_base)
dt_base = initrd_base + initrd_size;
else
-   dt_base = kernel_size + la;
+   dt_base = kernel_size + loadaddr;
 
fdt = load_device_tree(buf, (unsigned long)(phys_ram_base + dt_base));
if (fdt == NULL) {
@@ -188,7 +193,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size,
/* location of device tree in register */
env-gpr[3] = dt_base;
 #endif
-   env-nip = ep;
+   env-nip = entry;
}
 
if (pci) {
-- 
1.5.6.5

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


powerpc kvm-userspace build fixes

2008-12-02 Thread Hollis Blanchard

These patches fix the kvm-userspace qemu build after a recent merge with
upstream qemu.

I'm also seeing a build dependency issue with dyngen-opc.h that I don't see
upstream. I haven't sorted that out yet, but make 
qemu/ppcemb-softmmu/dyngen-opc.h first works around the problem.

-Hollis
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html