Re: [RFC][PATCH 1/2] Make irq0-inti2 override in BIOS configurable from userspace

2009-01-24 Thread Marcelo Tosatti
Hi Beth,

On Thu, Jan 22, 2009 at 10:29:36PM -0600, Beth Kon wrote:
 This series of patches (nearly) resolves the irq0-inti2 override issue, and 
 gets the hpet working on kvm with and
 without the in-kernel irqchip (i.e., it disables userspace and in-kernel pit 
 as needed).
 
 - irq0-inti2
   The resolution was to always use the override unless the kernel cannot do 
 irq routing (i.e., compatibility with old
   kernels). So qemu checks whether the kernel is capable of irq routing. If 
 so, qemu tells kvm to route irq0 to 
   inti2 via the irq routing interface, and tells bios to add the irq0-inti2 
 override to the MADT interrupt source 
   override table, and to the mp table (for the non-acpi case). The only 
 outstanding problem here is that when I set 
   acpi=off on the kernel boot line, the boot fails. Apparently linux does not 
 like the way I implemented the override 
   for the mp table in rombios32.c. Since I am pressed for time at the moment, 
 I'm putting this patch set out for comments 
   in hopes that someone else may immediately see the problem. Otherwise  I'll 
 keep looking into it as time permits.

The mptable is not being initialized as you intented because
irq0override is read from fw cfg after mptable initialization (oops).

Also the kernel irq ack notifiers lack knowledge about the routing
table, assuming identity mapping.

See attached patches (qemu-kvm.c change avoids the creation of two
routing entries for pin 2 pointing to different gsi's, 0 and 2).

RHEL5 guests complain about Invalid checksum, so there's still
something wrong in either acpi or mptable setup. I'll try to look
into this next week if nobody beats me.

BIOS-e820: fffbc000 - 0001 (reserved)
DMI 2.4 present.
   ERROR: Invalid checksum
No NUMA configuration found

Great work Beth, thanks!


Index: kvm-userspace.tip/bios/rombios32.c
===
--- kvm-userspace.tip.orig/bios/rombios32.c
+++ kvm-userspace.tip/bios/rombios32.c
@@ -2299,14 +2299,14 @@ void rombios32_init(uint32_t *s3_resume_
 
 if (bios_table_cur_addr != 0) {
 
+irq0_override_probe();
+
 mptable_init();
 
 uuid_probe();
 
 smbios_init();
  
-irq0_override_probe();
-
 if (acpi_enabled)
 acpi_bios_init();
 
Index: kvm-userspace.tip/qemu/qemu-kvm.c
===
--- kvm-userspace.tip.orig/qemu/qemu-kvm.c
+++ kvm-userspace.tip/qemu/qemu-kvm.c
@@ -817,7 +817,7 @@ int kvm_qemu_create_context(void)
 for (i = 0; i  24; ++i) {
 if (i == 0)
 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, 2);
-else
+else if (i != 2)
 r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i);
 if (r  0)
 return r;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0..9316037 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,8 @@ static void pic_unlock(struct kvm_pic *s)
spin_unlock(s-lock);
 
while (acks) {
-   kvm_notify_acked_irq(kvm, __ffs(acks));
+   kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+__ffs(acks));
acks = acks - 1;
}
 
@@ -232,7 +233,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
}
pic_update_irq(s);
pic_unlock(s);
-   kvm_notify_acked_irq(kvm, irq);
+   kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
 
return intno;
 }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee..9f59318 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
 #include lapic.h
 
 #define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+   ((irq)  8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
 
 struct kvm;
 struct kvm_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce285e0..c03a0a9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int 
irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
 void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e85a2bc..1c986ac 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -293,20 +293,20 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int 
irq, int level)
}
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+static void 

[RFC][PATCH 1/2] Make irq0-inti2 override in BIOS configurable from userspace

2009-01-22 Thread Beth Kon
This series of patches (nearly) resolves the irq0-inti2 override issue, and 
gets the hpet working on kvm with and
without the in-kernel irqchip (i.e., it disables userspace and in-kernel pit as 
needed).

- irq0-inti2
  The resolution was to always use the override unless the kernel cannot do irq 
routing (i.e., compatibility with old
  kernels). So qemu checks whether the kernel is capable of irq routing. If so, 
qemu tells kvm to route irq0 to 
  inti2 via the irq routing interface, and tells bios to add the irq0-inti2 
override to the MADT interrupt source 
  override table, and to the mp table (for the non-acpi case). The only 
outstanding problem here is that when I set 
  acpi=off on the kernel boot line, the boot fails. Apparently linux does not 
like the way I implemented the override 
  for the mp table in rombios32.c. Since I am pressed for time at the moment, 
I'm putting this patch set out for comments 
  in hopes that someone else may immediately see the problem. Otherwise  I'll 
keep looking into it as time permits.

- hpet
  The hpet works with and without in-kernel irqchip. And many thanks to Marcelo 
for finding a bios corruption bug that
  was the primary source of win2k864 problems. Now the hpet works on linux 
(ubuntu 8.0.4), win2k832. On win2k864 it works
  with the in-kernel irqchip but is broken (i.e.,black screen) when 
-no-kvm-irqchip is specified. Though I found that 
  it is also broken when I remove these 2 patches, so it appears to have 
nothing to do with hpet or irq routing. Needs 
  more looking into.


Signed-off-by: Beth Kon e...@us.ibm.com
---
 bios/Makefile|2 +-
 bios/rombios32.c |   40 
 qemu/hw/apic.c   |5 ++---
 qemu/hw/fw_cfg.c |1 +
 qemu/hw/fw_cfg.h |1 +
 qemu/qemu-kvm.c  |5 -
 qemu/sysemu.h|1 +
 qemu/vl.c|   10 --
 8 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/bios/Makefile b/bios/Makefile
index 2d1f40d..374d70e 100644
--- a/bios/Makefile
+++ b/bios/Makefile
@@ -48,7 +48,7 @@ LIBS =  -lm
 RANLIB = ranlib
 
 BCC = bcc
-GCC = gcc $(CFLAGS)
+GCC = gcc $(CFLAGS) -fno-stack-protector
 HOST_CC = gcc
 AS86 = as86
 
diff --git a/bios/rombios32.c b/bios/rombios32.c
index 9d2eaaa..84f15fb 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -443,6 +443,7 @@ uint32_t cpuid_ext_features;
 unsigned long ram_size;
 uint64_t ram_end;
 uint8_t bios_uuid[16];
+uint8_t irq0_override;
 #ifdef BX_USE_EBDA_TABLES
 unsigned long ebda_cur_addr;
 #endif
@@ -475,6 +476,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
 #define QEMU_CFG_SIGNATURE  0x00
 #define QEMU_CFG_ID 0x01
 #define QEMU_CFG_UUID   0x02
+#define QEMU_CFG_IRQ0_OVERRIDE 0x07
 
 int qemu_cfg_port;
 
@@ -516,6 +518,18 @@ void uuid_probe(void)
 memset(bios_uuid, 0, 16);
 }
 
+void irq0_override_probe(void)
+{
+#ifdef BX_QEMU
+if(qemu_cfg_port) {
+qemu_cfg_select(QEMU_CFG_IRQ0_OVERRIDE);
+qemu_cfg_read(irq0_override, 1);
+return;
+}
+#endif
+memset(irq0_override, 0, 1);
+}
+
 void cpu_probe(void)
 {
 uint32_t eax, ebx, ecx, edx;
@@ -1149,6 +1163,8 @@ static void mptable_init(void)
 
 /* irqs */
 for(i = 0; i  16; i++) {
+if (irq0_override  i == 2)
+continue;
 putb(q, 3); /* entry type = I/O interrupt */
 putb(q, 0); /* interrupt type = vectored interrupt */
 putb(q, 0); /* flags: po=0, el=0 */
@@ -1156,7 +1172,10 @@ static void mptable_init(void)
 putb(q, 0); /* source bus ID = ISA */
 putb(q, i); /* source bus IRQ */
 putb(q, ioapic_id); /* dest I/O APIC ID */
-putb(q, i); /* dest I/O APIC interrupt in */
+if (irq0_override  i == 0)
+putb(q, 2); /* dest I/O APIC interrupt in */
+else 
+putb(q, i); /* dest I/O APIC interrupt in */
 }
 /* patch length */
 len = q - mp_config_table;
@@ -1505,6 +1524,11 @@ void acpi_bios_init(void)
 sizeof(struct madt_processor_apic) * MAX_CPUS +
 sizeof(struct madt_io_apic);
 madt = (void *)(addr);
+for (i = 0; i  16; i++)
+if (PCI_ISA_IRQ_MASK  (1U  i))
+madt_size += sizeof(struct madt_intsrcovr);
+if (irq0_override)
+madt_size += sizeof(struct madt_intsrcovr);
 addr += madt_size;
 
 acpi_tables_size = addr - base_addr;
@@ -1594,8 +1618,15 @@ void acpi_bios_init(void)
 io_apic-interrupt = cpu_to_le32(0);
 
 intsrcovr = (struct madt_intsrcovr*)(io_apic + 1);
-for ( i = 0; i  16; i++ ) {
-if ( PCI_ISA_IRQ_MASK  (1U  i) ) {
+for (i = 0; i  16; i++) {
+if (irq0_override  i == 0) {
+memset(intsrcovr, 0, sizeof(*intsrcovr));
+intsrcovr-type   = APIC_XRUPT_OVERRIDE;
+intsrcovr-length = sizeof(*intsrcovr);
+intsrcovr-source = i;
+intsrcovr-gsi= 2;
+intsrcovr-flags  = 0;  //conforms