Re: [Qemu-devel] [PATCH v2] Fix guest OS hang when 64bit PCI bar present

2013-02-24 Thread Alexey Korolev
>> This patch addresses the issue fully described here:
>> http://lists.nongnu.org/archive/html/qemu-devel/2013-02/msg01804.html
>> 
>> Linux kernels prior to 2.6.36 do not disable the PCI device during
>> enumeration process. Since lower and higher parts of a 64bit BAR
>> are programmed separately this leads to qemu receiving a request to occupy
>> a completely wrong address region for a short period of time.
>> We have found that the boot process screws up completely if kvm-apic range
>> is overlapped even for a short period of time (it is fine for other
>> regions though).
> 
> So the issue it that we hide the MSI target region from the device
> models for a "short periods of time"? How short is this? It would be
>good to fully understand what goes "completely wrong" to avoid that we
> miss to fix something else (IO-APIC, HPET) or even paper over a problem
> of the memory subsystem.
As far as I can understand the region corruption and recovery happens within
one call of pci_read_bases of linux kernel enumeration:
(http://lxr.linux.no/#linux+v2.6.21/drivers/pci/probe.c#L173)
Unfortunately I cannot even guess what is happening with kvm-apic-msi, or why
it does not tolerate overlaps. This is actually a still open qestion.

>> 
>> This patch raises the priority of the kvm-apic memory region, so it is
>> never pushed out by PCI devices. The patch is quite safe as it does not
>> touch memory manager.

I have fixed styling issues. Please use this version.

Signed-off-by: Alexey Korolev 
Signed-off-by: Michael S. Tsirkin 
---
 hw/sysbus.c   |   26 ++
 hw/sysbus.h   |2 ++
 target-i386/cpu.c |3 ++-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/hw/sysbus.c b/hw/sysbus.c
index 6d9d1df..242eb1e 100644
--- a/hw/sysbus.c
+++ b/hw/sysbus.c
@@ -48,7 +48,8 @@ void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq 
irq)
 }
 }
 
-void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
+static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
+   bool may_overlap, unsigned priority)
 {
 assert(n >= 0 && n < dev->num_mmio);
 
@@ -61,11 +62,28 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
 memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory);
 }
 dev->mmio[n].addr = addr;
-memory_region_add_subregion(get_system_memory(),
-addr,
-dev->mmio[n].memory);
+if (may_overlap) {
+memory_region_add_subregion_overlap(get_system_memory(),
+addr,
+dev->mmio[n].memory,
+priority);
+} else {
+memory_region_add_subregion(get_system_memory(),
+addr,
+dev->mmio[n].memory);
+}
+}
+
+void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
+{
+sysbus_mmio_map_common(dev, n, addr, false, 0);
 }
 
+void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
+ unsigned priority)
+{
+sysbus_mmio_map_common(dev, n, addr, true, priority);
+}
 
 /* Request an IRQ source.  The actual IRQ object may be populated later.  */
 void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p)
diff --git a/hw/sysbus.h b/hw/sysbus.h
index a7fcded..2100bd7 100644
--- a/hw/sysbus.h
+++ b/hw/sysbus.h
@@ -56,6 +56,8 @@ void sysbus_init_ioports(SysBusDevice *dev, pio_addr_t 
ioport, pio_addr_t size);
 
 void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq);
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
+void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
+ unsigned priority);
 void sysbus_add_memory(SysBusDevice *dev, hwaddr addr,
MemoryRegion *mem);
 void sysbus_add_memory_overlap(SysBusDevice *dev, hwaddr addr,
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5582e5f..4b72094 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2088,7 +2088,8 @@ static void x86_cpu_apic_init(X86CPU *cpu, Error **errp)
 /* NOTE: the APIC is directly connected to the CPU - it is not
on the global memory bus. */
 /* XXX: what if the base changes? */
-sysbus_mmio_map(SYS_BUS_DEVICE(env->apic_state), 0, MSI_ADDR_BASE);
+sysbus_mmio_map_overlap(SYS_BUS_DEVICE(env->apic_state), 0,
+MSI_ADDR_BASE, 1000);
 apic_mapped = 1;
 }
 }
-- 
1.7.9.5




Re: [Qemu-devel] [PATCH v2] Fix guest OS hang when 64bit PCI bar present

2013-02-23 Thread Michael S. Tsirkin
On Fri, Feb 22, 2013 at 04:58:44PM +1300, Alexey Korolev wrote:
> This patch addresses the issue fully described here:
> http://lists.nongnu.org/archive/html/qemu-devel/2013-02/msg01804.html
> 
> Linux kernels prior to 2.6.36 do not disable the PCI device during
> enumeration process. Since lower and higher parts of a 64bit BAR
> are programmed separately this leads to qemu receiving a request to occupy
> a completely wrong address region for a short period of time.
> We have found that the boot process screws up completely if kvm-apic range
> is overlapped even for a short period of time (it is fine for other
> regions though).
> 
> This patch raises the priority of the kvm-apic memory region, so it is
> never pushed out by PCI devices. The patch is quite safe as it does not
> touch memory manager.
> 
> Signed-off-by: Alexey Korolev 
> Signed-off-by: Michael S. Tsirkin 

I agree we need to look closer at the modeling, but
this looks like the right thing to do for 1.4 stable branch.
So let's take it.

Acked-by: Michael S. Tsirkin 


> ---
>  hw/sysbus.c   |   27 +++
>  hw/sysbus.h   |2 ++
>  target-i386/cpu.c |3 ++-
>  3 files changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/sysbus.c b/hw/sysbus.c
> index 6d9d1df..50c7232 100644
> --- a/hw/sysbus.c
> +++ b/hw/sysbus.c
> @@ -48,7 +48,8 @@ void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq 
> irq)
>  }
>  }
>  
> -void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
> +static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
> +   bool may_overlap, unsigned priority)
>  {
>  assert(n >= 0 && n < dev->num_mmio);
>  
> @@ -61,11 +62,29 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr 
> addr)
>  memory_region_del_subregion(get_system_memory(), 
> dev->mmio[n].memory);
>  }
>  dev->mmio[n].addr = addr;
> -memory_region_add_subregion(get_system_memory(),
> -addr,
> -dev->mmio[n].memory);
> +if (may_overlap) {
> +memory_region_add_subregion_overlap(get_system_memory(),
> +addr,
> +dev->mmio[n].memory,
> +priority);
> +}
> +else {
> +memory_region_add_subregion(get_system_memory(),
> +addr,
> +dev->mmio[n].memory);
> +}
>  }
>  
> +void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
> +{
> +sysbus_mmio_map_common(dev, n, addr, false, 0);
> +}
> +
> +void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
> + unsigned priority)
> +{
> +sysbus_mmio_map_common(dev, n, addr, true, priority);
> +}
>  
>  /* Request an IRQ source.  The actual IRQ object may be populated later.  */
>  void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p)
> diff --git a/hw/sysbus.h b/hw/sysbus.h
> index a7fcded..2100bd7 100644
> --- a/hw/sysbus.h
> +++ b/hw/sysbus.h
> @@ -56,6 +56,8 @@ void sysbus_init_ioports(SysBusDevice *dev, pio_addr_t 
> ioport, pio_addr_t size);
>  
>  void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq);
>  void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
> +void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
> + unsigned priority);
>  void sysbus_add_memory(SysBusDevice *dev, hwaddr addr,
> MemoryRegion *mem);
>  void sysbus_add_memory_overlap(SysBusDevice *dev, hwaddr addr,
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index dfcf86e..4cf27eb 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2078,7 +2078,8 @@ static void x86_cpu_apic_init(X86CPU *cpu, Error **errp)
>  /* NOTE: the APIC is directly connected to the CPU - it is not
> on the global memory bus. */
>  /* XXX: what if the base changes? */
> -sysbus_mmio_map(SYS_BUS_DEVICE(env->apic_state), 0, MSI_ADDR_BASE);
> +sysbus_mmio_map_overlap(SYS_BUS_DEVICE(env->apic_state), 0,
> +MSI_ADDR_BASE, 0x1000);
>  apic_mapped = 1;
>  }
>  }
> -- 
> 1.7.9.5



Re: [Qemu-devel] [PATCH v2] Fix guest OS hang when 64bit PCI bar present

2013-02-22 Thread Jan Kiszka
On 2013-02-22 04:58, Alexey Korolev wrote:
> This patch addresses the issue fully described here:
> http://lists.nongnu.org/archive/html/qemu-devel/2013-02/msg01804.html
> 
> Linux kernels prior to 2.6.36 do not disable the PCI device during
> enumeration process. Since lower and higher parts of a 64bit BAR
> are programmed separately this leads to qemu receiving a request to occupy
> a completely wrong address region for a short period of time.
> We have found that the boot process screws up completely if kvm-apic range
> is overlapped even for a short period of time (it is fine for other
> regions though).

So the issue it that we hide the MSI target region from the device
models for a "short periods of time"? How short is this? It would be
good to fully understand what goes "completely wrong" to avoid that we
miss to fix something else (IO-APIC, HPET) or even paper over a problem
of the memory subsystem.

> 
> This patch raises the priority of the kvm-apic memory region, so it is
> never pushed out by PCI devices. The patch is quite safe as it does not
> touch memory manager.
> 
> Signed-off-by: Alexey Korolev 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  hw/sysbus.c   |   27 +++
>  hw/sysbus.h   |2 ++
>  target-i386/cpu.c |3 ++-
>  3 files changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/sysbus.c b/hw/sysbus.c
> index 6d9d1df..50c7232 100644
> --- a/hw/sysbus.c
> +++ b/hw/sysbus.c
> @@ -48,7 +48,8 @@ void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq 
> irq)
>  }
>  }
>  
> -void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
> +static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
> +   bool may_overlap, unsigned priority)
>  {
>  assert(n >= 0 && n < dev->num_mmio);
>  
> @@ -61,11 +62,29 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr 
> addr)
>  memory_region_del_subregion(get_system_memory(), 
> dev->mmio[n].memory);
>  }
>  dev->mmio[n].addr = addr;
> -memory_region_add_subregion(get_system_memory(),
> -addr,
> -dev->mmio[n].memory);
> +if (may_overlap) {
> +memory_region_add_subregion_overlap(get_system_memory(),
> +addr,
> +dev->mmio[n].memory,
> +priority);
> +}
> +else {

Coding style: "} else {"

> +memory_region_add_subregion(get_system_memory(),
> +addr,
> +dev->mmio[n].memory);
> +}
>  }
>  
> +void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
> +{
> +sysbus_mmio_map_common(dev, n, addr, false, 0);
> +}
> +
> +void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
> + unsigned priority)
> +{
> +sysbus_mmio_map_common(dev, n, addr, true, priority);
> +}
>  
>  /* Request an IRQ source.  The actual IRQ object may be populated later.  */
>  void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p)
> diff --git a/hw/sysbus.h b/hw/sysbus.h
> index a7fcded..2100bd7 100644
> --- a/hw/sysbus.h
> +++ b/hw/sysbus.h
> @@ -56,6 +56,8 @@ void sysbus_init_ioports(SysBusDevice *dev, pio_addr_t 
> ioport, pio_addr_t size);
>  
>  void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq);
>  void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
> +void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
> + unsigned priority);
>  void sysbus_add_memory(SysBusDevice *dev, hwaddr addr,
> MemoryRegion *mem);
>  void sysbus_add_memory_overlap(SysBusDevice *dev, hwaddr addr,
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index dfcf86e..4cf27eb 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2078,7 +2078,8 @@ static void x86_cpu_apic_init(X86CPU *cpu, Error **errp)
>  /* NOTE: the APIC is directly connected to the CPU - it is not
> on the global memory bus. */
>  /* XXX: what if the base changes? */
> -sysbus_mmio_map(SYS_BUS_DEVICE(env->apic_state), 0, MSI_ADDR_BASE);
> +sysbus_mmio_map_overlap(SYS_BUS_DEVICE(env->apic_state), 0,
> +MSI_ADDR_BASE, 0x1000);
  ^^
Minor, but "0x" makes this look like an address.

>  apic_mapped = 1;
>  }
>  }
> 

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH v2] Fix guest OS hang when 64bit PCI bar present

2013-02-22 Thread Peter Maydell
On 22 February 2013 03:58, Alexey Korolev  wrote:
> This patch addresses the issue fully described here:
> http://lists.nongnu.org/archive/html/qemu-devel/2013-02/msg01804.html
>
> Linux kernels prior to 2.6.36 do not disable the PCI device during
> enumeration process. Since lower and higher parts of a 64bit BAR
> are programmed separately this leads to qemu receiving a request to occupy
> a completely wrong address region for a short period of time.
> We have found that the boot process screws up completely if kvm-apic range
> is overlapped even for a short period of time (it is fine for other
> regions though).
>
> This patch raises the priority of the kvm-apic memory region, so it is
> never pushed out by PCI devices. The patch is quite safe as it does not
> touch memory manager.
>
> Signed-off-by: Alexey Korolev 
> Signed-off-by: Michael S. Tsirkin 

Reviewed-by: Peter Maydell 
as far as the sysbus changes go. I can't comment on whether the
change to the PC emulation is correct or not but it looks
vaguely plausible.

thanks
-- PMM



[Qemu-devel] [PATCH v2] Fix guest OS hang when 64bit PCI bar present

2013-02-21 Thread Alexey Korolev
This patch addresses the issue fully described here:
http://lists.nongnu.org/archive/html/qemu-devel/2013-02/msg01804.html

Linux kernels prior to 2.6.36 do not disable the PCI device during
enumeration process. Since lower and higher parts of a 64bit BAR
are programmed separately this leads to qemu receiving a request to occupy
a completely wrong address region for a short period of time.
We have found that the boot process screws up completely if kvm-apic range
is overlapped even for a short period of time (it is fine for other
regions though).

This patch raises the priority of the kvm-apic memory region, so it is
never pushed out by PCI devices. The patch is quite safe as it does not
touch memory manager.

Signed-off-by: Alexey Korolev 
Signed-off-by: Michael S. Tsirkin 
---
 hw/sysbus.c   |   27 +++
 hw/sysbus.h   |2 ++
 target-i386/cpu.c |3 ++-
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/hw/sysbus.c b/hw/sysbus.c
index 6d9d1df..50c7232 100644
--- a/hw/sysbus.c
+++ b/hw/sysbus.c
@@ -48,7 +48,8 @@ void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq 
irq)
 }
 }
 
-void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
+static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr,
+   bool may_overlap, unsigned priority)
 {
 assert(n >= 0 && n < dev->num_mmio);
 
@@ -61,11 +62,29 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
 memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory);
 }
 dev->mmio[n].addr = addr;
-memory_region_add_subregion(get_system_memory(),
-addr,
-dev->mmio[n].memory);
+if (may_overlap) {
+memory_region_add_subregion_overlap(get_system_memory(),
+addr,
+dev->mmio[n].memory,
+priority);
+}
+else {
+memory_region_add_subregion(get_system_memory(),
+addr,
+dev->mmio[n].memory);
+}
 }
 
+void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
+{
+sysbus_mmio_map_common(dev, n, addr, false, 0);
+}
+
+void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
+ unsigned priority)
+{
+sysbus_mmio_map_common(dev, n, addr, true, priority);
+}
 
 /* Request an IRQ source.  The actual IRQ object may be populated later.  */
 void sysbus_init_irq(SysBusDevice *dev, qemu_irq *p)
diff --git a/hw/sysbus.h b/hw/sysbus.h
index a7fcded..2100bd7 100644
--- a/hw/sysbus.h
+++ b/hw/sysbus.h
@@ -56,6 +56,8 @@ void sysbus_init_ioports(SysBusDevice *dev, pio_addr_t 
ioport, pio_addr_t size);
 
 void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq);
 void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr);
+void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
+ unsigned priority);
 void sysbus_add_memory(SysBusDevice *dev, hwaddr addr,
MemoryRegion *mem);
 void sysbus_add_memory_overlap(SysBusDevice *dev, hwaddr addr,
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index dfcf86e..4cf27eb 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2078,7 +2078,8 @@ static void x86_cpu_apic_init(X86CPU *cpu, Error **errp)
 /* NOTE: the APIC is directly connected to the CPU - it is not
on the global memory bus. */
 /* XXX: what if the base changes? */
-sysbus_mmio_map(SYS_BUS_DEVICE(env->apic_state), 0, MSI_ADDR_BASE);
+sysbus_mmio_map_overlap(SYS_BUS_DEVICE(env->apic_state), 0,
+MSI_ADDR_BASE, 0x1000);
 apic_mapped = 1;
 }
 }
-- 
1.7.9.5