[Xen-devel][Qemu][e1000] fix a TSE bug

2008-07-07 Thread Xu, Anthony
Qemu e1000 NIC didn't work well for guest windows on xen/ia64.
After investigation, it's a e1000 TSE bug.
Previously, all data descriptors used TSE context descriptor by default,
It's not correct, per spec, data descriptor uses TSE bit to indicate
whether use TSE,
Legacy data descripter never use TSE.
This patch fixed this bug.

Signed-off-by; Anthony Xu < [EMAIL PROTECTED] >



diff -r 11318234588e tools/ioemu/hw/e1000.c
--- a/tools/ioemu/hw/e1000.cThu Jun 19 12:48:04 2008 +0900
+++ b/tools/ioemu/hw/e1000.cMon Jul 07 16:23:42 2008 +0800
@@ -103,6 +103,7 @@ typedef struct E1000State_st {
 char tse;
 char ip;
 char tcp;
+char cptse; //current packet tse bit
 } tx;
 
 struct {
@@ -306,7 +307,7 @@ xmit_seg(E1000State *s)
 unsigned int frames = s->tx.tso_frames, css, sofar, n;
 struct e1000_tx *tp = &s->tx;
 
-if (tp->tse) {
+if (tp->tse && tp->cptse) {
 css = tp->ipcss;
 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
frames, tp->size, css);
@@ -380,37 +381,49 @@ process_tx_desc(E1000State *s, struct e1
 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
 }
 return;
-} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D))
+} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)){
+// data descriptor
 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
+tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
+} else
+// legacy descriptor
+tp->cptse = 0;
 
 addr = le64_to_cpu(dp->buffer_addr);
-if (tp->tse) {
+if (tp->tse && tp->cptse) {
 hdr = tp->hdr_len;
 msh = hdr + tp->mss;
+do {
+bytes = split_size;
+if (tp->size + bytes > msh)
+bytes = msh - tp->size;
+cpu_physical_memory_read(addr, tp->data + tp->size, bytes);
+if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
+memmove(tp->header, tp->data, hdr);
+tp->size = sz;
+addr += bytes;
+if (sz == msh) {
+xmit_seg(s);
+memmove(tp->data, tp->header, hdr);
+tp->size = hdr;
+}
+} while (split_size -= bytes);
+} else if (!tp->tse && tp->cptse) {
+// context descriptor TSE is not set, while data descriptor TSE
is set
+DBGOUT(TXERR, "TCP segmentaion Error\n");
+} else {
+cpu_physical_memory_read(addr, tp->data + tp->size,
split_size);
+tp->size += split_size;
 }
-do {
-bytes = split_size;
-if (tp->size + bytes > msh)
-bytes = msh - tp->size;
-cpu_physical_memory_read(addr, tp->data + tp->size, bytes);
-if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
-memmove(tp->header, tp->data, hdr);
-tp->size = sz;
-addr += bytes;
-if (sz == msh) {
-xmit_seg(s);
-memmove(tp->data, tp->header, hdr);
-tp->size = hdr;
-}
-} while (split_size -= bytes);
 
 if (!(txd_lower & E1000_TXD_CMD_EOP))
 return;
-if (tp->size > hdr)
+if (!(tp->tse && tp->cptse && tp->size < hdr))
 xmit_seg(s);
 tp->tso_frames = 0;
 tp->sum_needed = 0;
 tp->size = 0;
+tp->cptse = 0;
 }
 
 static uint32_t


qemu-e1000.patch
Description: qemu-e1000.patch


RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-07-02 Thread Xu, Anthony
Hi Jes,

You can use this one, I'm considering how to make it more generic per
AVI comment.

Anthony

Jes Sorensen wrote:
> Hi Anthony,
> 
> Looking back through this thread - do you happen to have an updated
> patch for the irq problem? If not, could you let me know which is your
> latest version and I will try and look at it.
> 
> Cheers,
> Jes


0002-Irq-assignment.patch
Description: 0002-Irq-assignment.patch


RE: [PATCH] make qemu compile for kvm/ia64

2008-06-19 Thread Xu, Anthony
Thanks for your comments
It is the revised one


>From 1a30adfc5ded3608ac2f09499b42234cf7d54a19 Mon Sep 17 00:00:00 2001
From: Anthony Xu  <[EMAIL PROTECTED]>
Date: Fri, 20 Jun 2008 10:45:13 -0400
Subject: [PATCH] Make qemu compile for kvm-ia64

Since merging with Qemu upsteram, it can't be compiled
for kvm-ia64

Signed-off-by: Anthony Xu <[EMAIL PROTECTED]>
---
 qemu/Makefile.target |8 +++-
 qemu/cpu-exec.c  |2 ++
 qemu/exec.c  |2 ++
 qemu/target-ia64/cpu.h   |3 ---
 qemu/target-ia64/exec.h  |5 +
 qemu/target-ia64/fake-exec.c |6 ++
 6 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index ac5eda1..a1491d0 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -201,8 +201,10 @@ ifdef CONFIG_DYNGEN_OP
 LIBOBJS+=op.o
 endif
 # TCG code generator
+ifneq ($(TARGET_ARCH), ia64)
 LIBOBJS+= tcg/tcg.o tcg/tcg-dyngen.o tcg/tcg-runtime.o
 CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH)
+endif
 ifeq ($(ARCH),sparc64)
 CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc
 endif
@@ -239,7 +241,11 @@ LIBOBJS+= qemu-kvm-powerpc.o
 endif
 endif
 
-LIBOBJS+= op_helper.o helper.o
+LIBOBJS+= op_helper.o 
+
+ifneq ($(TARGET_ARCH), ia64)
+LIBOBJS+= helper.o
+endif
 
 ifeq ($(TARGET_BASE_ARCH), arm)
 LIBOBJS+= neon_helper.o iwmmxt_helper.o
diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
index 252927f..e8d1cce 100644
--- a/qemu/cpu-exec.c
+++ b/qemu/cpu-exec.c
@@ -21,7 +21,9 @@
 #define CPU_NO_GLOBAL_REGS
 #include "exec.h"
 #include "disas.h"
+#if !defined(TARGET_IA64)
 #include "tcg.h"
+#endif
 
 #if !defined(CONFIG_SOFTMMU)
 #undef EAX
diff --git a/qemu/exec.c b/qemu/exec.c
index 37a956b..544b963 100644
--- a/qemu/exec.c
+++ b/qemu/exec.c
@@ -37,7 +37,9 @@
 #include "exec-all.h"
 #include "qemu-common.h"
 
+#if !defined(TARGET_IA64)
 #include "tcg.h"
+#endif
 #include "qemu-kvm.h"
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/qemu/target-ia64/cpu.h b/qemu/target-ia64/cpu.h
index f8e5e8a..12718f5 100644
--- a/qemu/target-ia64/cpu.h
+++ b/qemu/target-ia64/cpu.h
@@ -43,9 +43,6 @@
 #include "softfloat.h"
 typedef struct CPUIA64State {
 CPU_COMMON;
-/* exception/interrupt handling */
-   jmp_buf jmp_env;
-   int exception_index;
 
int interrupt_request;
int user_mode_only;
diff --git a/qemu/target-ia64/exec.h b/qemu/target-ia64/exec.h
index 155cfa6..db1149a 100644
--- a/qemu/target-ia64/exec.h
+++ b/qemu/target-ia64/exec.h
@@ -26,6 +26,8 @@ uint32_t T0;
 uint32_t T1;
 uint32_t T2;
 
+#define tcg_qemu_tb_exec(tb_ptr) 0
+
 static inline void env_to_regs(void)
 {
 }
@@ -34,6 +36,9 @@ static inline void regs_to_env(void)
 {
 }
 
+void tcg_dump_info(FILE *f,
+   int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+
 void cpu_lock(void);
 void cpu_unlock(void);
 void cpu_loop_exit(void);
diff --git a/qemu/target-ia64/fake-exec.c b/qemu/target-ia64/fake-exec.c
index 0be4ffd..b6bd2cf 100644
--- a/qemu/target-ia64/fake-exec.c
+++ b/qemu/target-ia64/fake-exec.c
@@ -33,6 +33,12 @@ int cpu_ia64_gen_code(CPUState *env, TranslationBlock
*tb, int *gen_code_size_pt
 return 0;
 }
 
+void tcg_dump_info(FILE *f,
+   int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+{
+return;
+}
+
 void flush_icache_range(unsigned long start, unsigned long stop)
 {
 while (start < stop) {
-- 
1.5.5






Avi Kivity wrote:
> Xu, Anthony wrote:
>> From 7b2c2612b21b895cd14e632fea845c03b6e1dedc Mon Sep 17 00:00:00
>> 2001 From: Anthony Xu  <[EMAIL PROTECTED]>
>> Date: Thu, 29 May 2008 13:14:56 -0400
>> Subject: [PATCH] Make qemu compile for kvm-ia64
>> 
>> Since merging with Qemu upsteram, it can't be compiled
>> for kvm-ia64
>> 
>> @@ -620,7 +622,10 @@ int cpu_exec(CPUState *env1)
>>  env = cpu_single_env;
>>  #define env cpu_single_env
>>  #endif
>> +
>> +#if !defined(TARGET_IA64)
>>  next_tb = tcg_qemu_tb_exec(tc_ptr); +#endif
>> 
> 
> Instead of sprinkling all those ifdefs, how about proving stubs
> instead?  It will reduce the merge overhead, and will be easier to fix
> if/when tcg can target ia64.


0001-Make-qemu-compile-for-kvm-ia64.patch
Description: 0001-Make-qemu-compile-for-kvm-ia64.patch


[PATCH] irq assignment

2008-06-17 Thread Xu, Anthony
>From 67806cc642cb666fb59fec60115fee37b80ecb3a Mon Sep 17 00:00:00 2001
From: Anthony Xu <[EMAIL PROTECTED]>
Date: Wed, 18 Jun 2008 14:32:46 -0400
Subject: [PATCH] Irq assignment

1. use bimodal _PRT
2. pci device can use irq > 15, reduce interrupt sharing
3. test by running linux guest in kvm-ia64, kvm-i32(w/ wo/ -no-kvm)

signed-off-by: Anthony Xu <[EMAIL PROTECTED]>
---
 bios/acpi-dsdt.dsl |   59
+++-
 qemu/hw/apic.c |   22 ++-
 qemu/hw/ipf.c  |   24 +
 qemu/hw/pc.c   |2 +-
 qemu/hw/pc.h   |4 +++
 qemu/hw/pci.c  |8 +++
 6 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index d1bfa2c..62dd612 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -89,6 +89,12 @@ DefinitionBlock (
 }
 }
 
+Name (PICD, 0)
+
+Method(_PIC, 1)
+{
+Store(Arg0, PICD)
+}
 
 /* PCI Bus definition */
 Scope(\_SB) {
@@ -96,7 +102,15 @@ DefinitionBlock (
 Name (_HID, EisaId ("PNP0A03"))
 Name (_ADR, 0x00)
 Name (_UID, 1)
-Name(_PRT, Package() {
+
+Method (_PRT,0) {
+If (PICD) {
+Return (PRTA)
+}
+Return (PRTP)
+}
+
+Name (PRTP, Package() {
 /* PCI IRQ routing table, example from ACPI 2.0a
specification,
section 6.2.8.1 */
 /* Note: we provide the same info as the PCI routing
@@ -147,6 +161,49 @@ DefinitionBlock (
prt_slot3(0x001f),
 })
 
+Name (PRTA, Package() {
+/* IOAPIC use fixed connection */
+
+#define aprt_slot(nr, irq) \
+   Package() { nr##, 0, 0, irq }, \
+   Package() { nr##, 1, 0, irq }, \
+   Package() { nr##, 2, 0, irq }, \
+   Package() { nr##, 3, 0, irq }
+
+   aprt_slot(0x, 16),
+   aprt_slot(0x0001, 17),
+   aprt_slot(0x0002, 18),
+   aprt_slot(0x0003, 19),
+   aprt_slot(0x0004, 20),
+   aprt_slot(0x0005, 21),
+   aprt_slot(0x0006, 22),
+   aprt_slot(0x0007, 23),
+   aprt_slot(0x0008, 16),
+   aprt_slot(0x0009, 17),
+   aprt_slot(0x000a, 18),
+   aprt_slot(0x000b, 19),
+   aprt_slot(0x000c, 20),
+   aprt_slot(0x000d, 21),
+   aprt_slot(0x000e, 22),
+   aprt_slot(0x000f, 23),
+   aprt_slot(0x0010, 16),
+   aprt_slot(0x0011, 17),
+   aprt_slot(0x0012, 18),
+   aprt_slot(0x0013, 19),
+   aprt_slot(0x0014, 20),
+   aprt_slot(0x0015, 21),
+   aprt_slot(0x0016, 22),
+   aprt_slot(0x0017, 23),
+   aprt_slot(0x0018, 16),
+   aprt_slot(0x0019, 17),
+   aprt_slot(0x001a, 18),
+   aprt_slot(0x001b, 19),
+   aprt_slot(0x001c, 20),
+   aprt_slot(0x001d, 21),
+   aprt_slot(0x001e, 22),
+   aprt_slot(0x001f, 23),
+})
+
 OperationRegion(PCST, SystemIO, 0xae00, 0x08)
 Field (PCST, DWordAcc, NoLock, WriteAsZeros)
{
diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index 4ebf1ff..98bf3ad 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -1053,10 +1053,30 @@ static void ioapic_service(IOAPICState *s)
 }
 }
 
+int ioapic_map_irq(int devfn, int irq_num)
+{
+int irq;
+irq = ((devfn >> 3) & 7) + 16;
+return irq;
+}
+
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
 void ioapic_set_irq(void *opaque, int vector, int level)
 {
 IOAPICState *s = opaque;
-
+if( vector >= 16 ){
+ if( level )
+ ioapic_irq_count[vector] += 1;
+ else
+ ioapic_irq_count[vector] -= 1;
+ level = (ioapic_irq_count[vector] != 0);
+}
+#ifdef KVM_CAP_IRQCHIP
+if (kvm_enabled())
+   if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+   return;
+#endif
 if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
 uint32_t mask = 1 << vector;
 uint64_t entry = s->ioredtbl[vector];
diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c
index b11e328..dabd7cc 100644
--- a/qemu/hw/ipf.c
+++ b/qemu/hw/ipf.c
@@ -672,3 +672,27 @@ QEMUMachine ipf_machine = {
 ipf_init_pci,
 VGA_RAM_SIZE + VGA_RAM_SIZE,
 };
+
+#define IOAPIC_NUM_PINS 48
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
+int ioapic_map_irq(int devfn, int irq_num)
+{
+int irq, dev;
+dev = devfn >> 3;
+irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
+return irq;
+}
+
+void ioapic_set_irq(void *opaque, int vector, int level)
+{
+if( level )
+ioapic_irq_count[vector] += 1;
+else
+ioapic_irq_count[vector] -= 1;
+
+if (kvm_enabled())
+  

[PATCH] make qemu compile for kvm/ia64

2008-06-17 Thread Xu, Anthony
>From 7b2c2612b21b895cd14e632fea845c03b6e1dedc Mon Sep 17 00:00:00 2001
From: Anthony Xu  <[EMAIL PROTECTED]>
Date: Thu, 29 May 2008 13:14:56 -0400
Subject: [PATCH] Make qemu compile for kvm-ia64

Since merging with Qemu upsteram, it can't be compiled
for kvm-ia64

Signed-off-by: Anthony Xu < [EMAIL PROTECTED] >
---
 qemu/Makefile.target   |8 +++-
 qemu/cpu-exec.c|5 +
 qemu/exec.c|4 
 qemu/target-ia64/cpu.h |3 ---
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index ac5eda1..a1491d0 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -201,8 +201,10 @@ ifdef CONFIG_DYNGEN_OP
 LIBOBJS+=op.o
 endif
 # TCG code generator
+ifneq ($(TARGET_ARCH), ia64)
 LIBOBJS+= tcg/tcg.o tcg/tcg-dyngen.o tcg/tcg-runtime.o
 CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH)
+endif
 ifeq ($(ARCH),sparc64)
 CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc
 endif
@@ -239,7 +241,11 @@ LIBOBJS+= qemu-kvm-powerpc.o
 endif
 endif
 
-LIBOBJS+= op_helper.o helper.o
+LIBOBJS+= op_helper.o 
+
+ifneq ($(TARGET_ARCH), ia64)
+LIBOBJS+= helper.o
+endif
 
 ifeq ($(TARGET_BASE_ARCH), arm)
 LIBOBJS+= neon_helper.o iwmmxt_helper.o
diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
index 252927f..b61e9aa 100644
--- a/qemu/cpu-exec.c
+++ b/qemu/cpu-exec.c
@@ -21,7 +21,9 @@
 #define CPU_NO_GLOBAL_REGS
 #include "exec.h"
 #include "disas.h"
+#if !defined(TARGET_IA64)
 #include "tcg.h"
+#endif
 
 #if !defined(CONFIG_SOFTMMU)
 #undef EAX
@@ -620,7 +622,10 @@ int cpu_exec(CPUState *env1)
 env = cpu_single_env;
 #define env cpu_single_env
 #endif
+
+#if !defined(TARGET_IA64)
 next_tb = tcg_qemu_tb_exec(tc_ptr);
+#endif
 env->current_tb = NULL;
 /* reset soft MMU for next block (it can currently
only be set by a memory fault) */
diff --git a/qemu/exec.c b/qemu/exec.c
index 37a956b..3dd49b5 100644
--- a/qemu/exec.c
+++ b/qemu/exec.c
@@ -37,7 +37,9 @@
 #include "exec-all.h"
 #include "qemu-common.h"
 
+#if !defined(TARGET_IA64)
 #include "tcg.h"
+#endif
 #include "qemu-kvm.h"
 
 #if defined(CONFIG_USER_ONLY)
@@ -3197,7 +3199,9 @@ void dump_exec_info(FILE *f,
 cpu_fprintf(f, "TB flush count  %d\n", tb_flush_count);
 cpu_fprintf(f, "TB invalidate count %d\n",
tb_phys_invalidate_count);
 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
+#if !defined(TARGET_IA64)
 tcg_dump_info(f, cpu_fprintf);
+#endif
 }
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/qemu/target-ia64/cpu.h b/qemu/target-ia64/cpu.h
index f8e5e8a..12718f5 100644
--- a/qemu/target-ia64/cpu.h
+++ b/qemu/target-ia64/cpu.h
@@ -43,9 +43,6 @@
 #include "softfloat.h"
 typedef struct CPUIA64State {
 CPU_COMMON;
-/* exception/interrupt handling */
-   jmp_buf jmp_env;
-   int exception_index;
 
int interrupt_request;
int user_mode_only;
-- 
1.5.5


0001-Make-qemu-compile-for-kvm-ia64.patch
Description: 0001-Make-qemu-compile-for-kvm-ia64.patch


RE: [RFC] kvm irq assignment

2008-06-15 Thread Xu, Anthony
> This does look like common code to me, so it might be a good idea to
> share it with ia32. AFAIK apics are the same on ia32 and ia64.
> [Anthony] I wanted to do the same thing as what you said, while
> currently KVM/ia64 doesn't include apic.c file, I tried to add
> apic.c, but failed to complie   
> I'll try to make it work.

Further investigation show there are many ia32 specific codes in apic.c file.
It may incur a lot of modifications if we want to share apic.c with IA64.

For example,
s->apicbase = (val & 0xf000) |
(s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
/* if disabled, cannot be enabled again */
if (!(val & MSR_IA32_APICBASE_ENABLE)) {
s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
env->cpuid_features &= ~CPUID_APIC;
s->spurious_vec &= ~APIC_SV_ENABLE;

BTW IA64 is using SAPIC not APIC, there are some difference.
For KVM/IA64, only two functions ioapic_map_irq and ioapic_set_irq are needed, 
and
Native qemu doesn't support guest IA64 platform yet.

So, the simple way is to put these two functions in ia64 specific file, ipf.c


Thanks,
Anthony


Xu, Anthony wrote:
> 
> 
> From: Alexander Graf [mailto:[EMAIL PROTECTED]
> Sent: 2008年6月13日 22:31
> To: Xu, Anthony
> Cc: Avi Kivity; Marcelo Tosatti; Jes Sorensen; kvm@vger.kernel.org;
> [EMAIL PROTECTED] 
> Subject: Re: [RFC] kvm irq assignment
> 
> 
> 
> On Jun 12, 2008, at 11:38 PM, Xu, Anthony wrote:
> 
> 
>   Hi Avi and all
> 
>   This is the revised one,
> 
>   All PCI devices send interrupt to both PIC and IOAPIC,
>   a). When PIC is enabled and IOAPIC is disabled,  all redirect
>   entries in IOAPIC are masked.
>   B) When PIC is disabled and IPAPIC is enabled, link entry bit7 is
>   set, means this link entry is disable.
>   Guest OS need to guarantee PIC and IOAPIC are not enabled in the same
>   time. Otherwise cause many suspicious interrupt to guest.
> 
> 
> At boottime the IOAPIC is completely masked, while the PIC is
> completely unmasked. Somewhere during bootup ACPI compliant OSs can
> call _PIC to switch to IOAPIC mode and somehow deactivate the PIC
> lines and activate IOAPIC lines for the devices they are interested
> in.
> 
> I sent a tool called "apicdump" to this list some time ago that
> allows you to read the PIC and IOAPIC from within Linux. 
> 
> 
> [Anthony] nice tool, I'll try that.
> 
> 
> 
>   Test by running guest linux in kvm/ia32 and kvm/ia64.
> 
> 
>   Thanks,
>   Anthony
> 
> 
> 
>   diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
>   index 21fc76a..e12fd66 100755
>   --- a/bios/acpi-dsdt.dsl
>   +++ b/bios/acpi-dsdt.dsl
> 
> 
> ACPI changes look fine to me.
> 
> 
> 
> 
> 
> 
> 
> [snip]
> 
> 
>   diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
>   index a14cab2..c3014fa 100644
>   --- a/qemu/hw/apic.c
>   +++ b/qemu/hw/apic.c
>   @@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s)
>   }
>   }
> 
>   +int ioapic_map_irq(int devfn, int irq_num)
>   +{
>   +int irq;
>   +irq = ((devfn >> 3) & 7) + 16;
>   +return irq;
>   +}
>   +#ifdef KVM_CAP_IRQCHIP
>   +static int ioapic_irq_count[IOAPIC_NUM_PINS];
>   +#endif
>   +
>   void ioapic_set_irq(void *opaque, int vector, int level)
>   {
>   IOAPICState *s = opaque;
>   +#ifdef KVM_CAP_IRQCHIP
>   +ioapic_irq_count[vector] += level;
>   +if (kvm_enabled())
>   + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
>   +return;
>   +#endif
> 
>   if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
>   uint32_t mask = 1 << vector;
>   diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c
>   index b11e328..4761463 100644
>   --- a/qemu/hw/ipf.c
>   +++ b/qemu/hw/ipf.c
>   @@ -672,3 +672,23 @@ QEMUMachine ipf_machine = {
>   ipf_init_pci,
>   VGA_RAM_SIZE + VGA_RAM_SIZE,
>   };
>   +
>   +#define IOAPIC_NUM_PINS 48
>   +static int ioapic_irq_count[IOAPIC_NUM_PINS];
>   +
>   +int ioapic_map_irq(int devfn, int irq_num)
>   +{
>   +int irq, dev;
>   +dev = devfn >> 3;
>   +irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
>   +return irq;
>   +}
> 
> 
> Why does this look different from the one in apic.c?
> [Anthony] it is used for ia64,  I would like to

RE: [RFC] kvm irq assignment

2008-06-15 Thread Xu, Anthony
>> I think you should avoid any changes to pci.c. Perhaps create a new
>> ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change
>> pc.c to use that instead of piix_set_irq.
> I'll consider how to do this
> 

But pci.c includes below code section, which implements irq_num mapping
through interrupt link, While ioapic_set_irq doesn't
need this modification.
Seems, it's unavoidable to modify pci.c

Thanks,
Anthony



pci_dev->irq_state[irq_num] = level;
for (;;) {
bus = pci_dev->bus;
irq_num = bus->map_irq(pci_dev, irq_num);
if (bus->set_irq)
        break;
pci_dev = bus->parent_dev;


Xu, Anthony wrote:
> Marcelo Tosatti wrote:
>> Hi Anthony,
>> 
>> On Fri, Jun 13, 2008 at 02:38:08PM +0800, Xu, Anthony wrote:
>>> Hi Avi and all
>>> 
>>> This is the revised one,
>>> 
>>> All PCI devices send interrupt to both PIC and IOAPIC,
>>> a). When PIC is enabled and IOAPIC is disabled,  all redirect
>>> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is
>>> enabled, link entry bit7 is set, means this link entry is disable.
>>> Guest OS need to guarantee PIC and IOAPIC are not enabled in the
>>> same time. Otherwise cause many suspicious interrupt to guest.
>>> 
>>> Test by running guest linux in kvm/ia32 and kvm/ia64.
>> 
>> Interrupt sharing is stable under Linux, PCI hotplug is happy, and
>> Windows is happy. Ship it!
> Thanks, I will
> 
>> 
>> I had to apply your patch by hand, your mailer eats newlines and
>> other nasty things, please fix that (or send attached patches).
>> Sorry, I'll attach patch. 
> 
>> 
>>> 
>>> +Name (PICD, 0)
>>> 
>>> -/* PCI Bus definition */
>>> +Method(_PIC, 1)
>>> +{
>>> +Store(Arg0, PICD)
>>> +}
>>> +
>>> +/*PCI Bus definition */
>> 
>> Why did you take off the space before the "P" of PCI? Before you ask
>> me, no, I don't have anything better to do :) typo
> 
>> 
>>>  Scope(\_SB) {
>>>  Device(PCI0) {
>>>  Name (_HID, EisaId ("PNP0A03"))
>>>  Name (_ADR, 0x00)
>>>  Name (_UID, 1)
>>> -Name(_PRT, Package() {
>>> +
>>> +Method(_PRT,0){
>>> +If(PICD){
>> 
>> Put some spaces there too.
> Okay
> 
>> 
>>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
>>> index a23a466..f96fbb5 100644
>>> --- a/qemu/hw/pci.c
>>> +++ b/qemu/hw/pci.c
>>> @@ -27,6 +27,8 @@
>>>  #include "net.h"
>>>  #include "pc.h"
>>> 
>>> +#include "qemu-kvm.h"
>>> +
>>>  //#define DEBUG_PCI
>>> 
>>>  struct PCIBus {
>>> @@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int
>>>  irq_num, int level) PCIDevice *pci_dev = (PCIDevice *)opaque; 
>>>  PCIBus *bus; int change;
>>> -
>>> +#ifdef KVM_CAP_IRQCHIP
>>> +int irq;
>>> +#endif
>>>  change = level - pci_dev->irq_state[irq_num];
>>>  if (!change)
>>>  return;
>>> 
>>>  pci_dev->irq_state[irq_num] = level;
>>> +#ifdef KVM_CAP_IRQCHIP
>>> +irq = ioapic_map_irq(pci_dev->devfn, irq_num);
>>> +ioapic_set_irq(opaque, irq, change);
>>> +#endif
>> 
>> I think you should avoid any changes to pci.c. Perhaps create a new
>> ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change
>> pc.c to use that instead of piix_set_irq.
> I'll consider how to do this
> 
>> 
>> Other than that (and KVM_CAP_IRQCHIP mentioned by Avi, along with
>> making sure this works with "-no-kvm") looks great. Agree
> 
>> 
>> Regarding the non-PIIX link devices I mentioned, that can be done
>> later if necessary.
> Agree, if we need to support dynamic irq, or support multiple IOAPIC.
> 
> 
> 
> 
> Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC] kvm irq assignment

2008-06-15 Thread Xu, Anthony
Avi Kivity wrote:
> Xu, Anthony wrote:
>> Hi Avi and all
>> 
>> This is the revised one,
>> 
>> All PCI devices send interrupt to both PIC and IOAPIC,
>> a). When PIC is enabled and IOAPIC is disabled,  all redirect
>> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is
>> enabled, link entry bit7 is set, means this link entry is disable.
>> Guest OS need to guarantee PIC and IOAPIC are not enabled in the same
>> time. Otherwise cause many suspicious interrupt to guest.
>> 
>> Test by running guest linux in kvm/ia32 and kvm/ia64.
>> 
>> 
>> 
> 
> Looks good.
> 
>> diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
>> index a14cab2..c3014fa 100644
>> --- a/qemu/hw/apic.c
>> +++ b/qemu/hw/apic.c
>> @@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s)   
>>  } }
>> 
>> +int ioapic_map_irq(int devfn, int irq_num)
>> +{
>> +int irq;
>> +irq = ((devfn >> 3) & 7) + 16;
>> +return irq;
>> +}
>> +#ifdef KVM_CAP_IRQCHIP
>> +static int ioapic_irq_count[IOAPIC_NUM_PINS];
>> +#endif
>> +
>>  void ioapic_set_irq(void *opaque, int vector, int level)  {
>>  IOAPICState *s = opaque;
>> +#ifdef KVM_CAP_IRQCHIP
>> +ioapic_irq_count[vector] += level;
>> +if (kvm_enabled())
>> +if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) +

>> return; +#endif
>> 
> 
> I think this can be done more cleanly using the qemu_irq
> infrastructure. qem_irq_invert can do the inversion, and to get the
> "irq fork", you can have a qemu_irq instance that forwards its
> argument to two other qemu_irq instances.
I'm thinking how to make it clean and generic

> 
> There shouldn't be any #ifdef KVM_CAP_IRQCHIPs in there - it should
> work for plain qemu as well (well, if we fix qemu ioapic polarity
> code). 
Agree

> 
>> diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
>> index 90cb3a6..96316ca 100644
>> --- a/qemu/hw/piix_pci.c
>> +++ b/qemu/hw/piix_pci.c
>> @@ -225,6 +226,9 @@ static void piix3_set_irq(qemu_irq *pic, int
>>  irq_num, int level) /* now we change the pic irq level
>>  according to the piix irq mappings */ /* XXX: optimize */
>>  pic_irq = piix3_dev->config[0x60 + irq_num];
>> +/* if bit7 set 1, this link is disabled */
>> +if (pic_irq & 0x80)
>> +return;
>> 
> 
> Already caught by the test below... hacky.
Didn't see that, thanks

> 
>>  if (pic_irq < 16) {
>>  /* The pic level is the logical OR of all the PCI irqs
>> mapped to it */ 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC] kvm irq assignment

2008-06-15 Thread Xu, Anthony
Marcelo Tosatti wrote:
> Hi Anthony,
> 
> On Fri, Jun 13, 2008 at 02:38:08PM +0800, Xu, Anthony wrote:
>> Hi Avi and all
>> 
>> This is the revised one,
>> 
>> All PCI devices send interrupt to both PIC and IOAPIC,
>> a). When PIC is enabled and IOAPIC is disabled,  all redirect
>> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is
>> enabled, link entry bit7 is set, means this link entry is disable.
>> Guest OS need to guarantee PIC and IOAPIC are not enabled in the same
>> time. Otherwise cause many suspicious interrupt to guest.
>> 
>> Test by running guest linux in kvm/ia32 and kvm/ia64.
> 
> Interrupt sharing is stable under Linux, PCI hotplug is happy, and
> Windows is happy. Ship it!
Thanks, I will

> 
> I had to apply your patch by hand, your mailer eats newlines and other
> nasty things, please fix that (or send attached patches).
Sorry, I'll attach patch.

> 
>> 
>> +Name (PICD, 0)
>> 
>> -/* PCI Bus definition */
>> +Method(_PIC, 1)
>> +{
>> +Store(Arg0, PICD)
>> +}
>> +
>> +/*PCI Bus definition */
> 
> Why did you take off the space before the "P" of PCI? Before you ask
> me, no, I don't have anything better to do :)
typo

> 
>>  Scope(\_SB) {
>>  Device(PCI0) {
>>  Name (_HID, EisaId ("PNP0A03"))
>>  Name (_ADR, 0x00)
>>  Name (_UID, 1)
>> -Name(_PRT, Package() {
>> +
>> +Method(_PRT,0){
>> +If(PICD){
> 
> Put some spaces there too.
Okay

> 
>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
>> index a23a466..f96fbb5 100644
>> --- a/qemu/hw/pci.c
>> +++ b/qemu/hw/pci.c
>> @@ -27,6 +27,8 @@
>>  #include "net.h"
>>  #include "pc.h"
>> 
>> +#include "qemu-kvm.h"
>> +
>>  //#define DEBUG_PCI
>> 
>>  struct PCIBus {
>> @@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int
>>  irq_num, int level) PCIDevice *pci_dev = (PCIDevice *)opaque;
>>  PCIBus *bus;
>>  int change;
>> -
>> +#ifdef KVM_CAP_IRQCHIP
>> +int irq;
>> +#endif
>>  change = level - pci_dev->irq_state[irq_num];
>>  if (!change)
>>  return;
>> 
>>  pci_dev->irq_state[irq_num] = level;
>> +#ifdef KVM_CAP_IRQCHIP
>> +irq = ioapic_map_irq(pci_dev->devfn, irq_num);
>> +ioapic_set_irq(opaque, irq, change);
>> +#endif
> 
> I think you should avoid any changes to pci.c. Perhaps create a new
> ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change
> pc.c to use that instead of piix_set_irq.
I'll consider how to do this

> 
> Other than that (and KVM_CAP_IRQCHIP mentioned by Avi, along with
> making sure this works with "-no-kvm") looks great.
Agree

> 
> Regarding the non-PIIX link devices I mentioned, that can be done
> later if necessary.
Agree, if we need to support dynamic irq, or support multiple IOAPIC.




Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC] kvm irq assignment

2008-06-15 Thread Xu, Anthony
 



From: Alexander Graf [mailto:[EMAIL PROTECTED] 
Sent: 2008年6月13日 22:31
To: Xu, Anthony
Cc: Avi Kivity; Marcelo Tosatti; Jes Sorensen; kvm@vger.kernel.org; [EMAIL 
PROTECTED]
Subject: Re: [RFC] kvm irq assignment



On Jun 12, 2008, at 11:38 PM, Xu, Anthony wrote:


Hi Avi and all

This is the revised one,

All PCI devices send interrupt to both PIC and IOAPIC,  
a). When PIC is enabled and IOAPIC is disabled,  all redirect entries in
IOAPIC are masked.
B) When PIC is disabled and IPAPIC is enabled, link entry bit7 is set,
means this link entry is disable.
Guest OS need to guarantee PIC and IOAPIC are not enabled in the same
time. Otherwise cause many suspicious interrupt to guest.


At boottime the IOAPIC is completely masked, while the PIC is completely 
unmasked. Somewhere during bootup ACPI compliant OSs can call _PIC to switch to 
IOAPIC mode and somehow deactivate the PIC lines and activate IOAPIC lines for 
the devices they are interested in.

I sent a tool called "apicdump" to this list some time ago that allows you to 
read the PIC and IOAPIC from within Linux.


[Anthony] nice tool, I'll try that.



Test by running guest linux in kvm/ia32 and kvm/ia64.


Thanks,
Anthony



diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index 21fc76a..e12fd66 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl


ACPI changes look fine to me.







[snip]


diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index a14cab2..c3014fa 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s)
}
}

+int ioapic_map_irq(int devfn, int irq_num)
+{
+int irq;
+irq = ((devfn >> 3) & 7) + 16;
+return irq;
+}
+#ifdef KVM_CAP_IRQCHIP
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+#endif
+
void ioapic_set_irq(void *opaque, int vector, int level)
{
IOAPICState *s = opaque;
+#ifdef KVM_CAP_IRQCHIP
+ioapic_irq_count[vector] += level;
+if (kvm_enabled())
+ if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+return;
+#endif

if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
uint32_t mask = 1 << vector;
diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c
index b11e328..4761463 100644
--- a/qemu/hw/ipf.c
+++ b/qemu/hw/ipf.c
@@ -672,3 +672,23 @@ QEMUMachine ipf_machine = {
ipf_init_pci,
VGA_RAM_SIZE + VGA_RAM_SIZE,
};
+
+#define IOAPIC_NUM_PINS 48
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
+int ioapic_map_irq(int devfn, int irq_num)
+{
+int irq, dev;
+dev = devfn >> 3;
+irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
+return irq;
+}


Why does this look different from the one in apic.c?
[Anthony] it is used for ia64,  I would like to use same guest Firmware of 
XEN/ia64 in KVM/IA64,
I "copy" the mapping from XEN/IA64.




+
+void ioapic_set_irq(void *opaque, int vector, int level)
+{
+ioapic_irq_count[vector] += level;
+if (kvm_enabled())
+ if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+return;
+}
+


This does look like common code to me, so it might be a good idea to share it 
with ia32. AFAIK apics are the same on ia32 and ia64.
[Anthony] I wanted to do the same thing as what you said, while currently 
KVM/ia64 doesn't include apic.c file, I tried to add apic.c, but failed to 
complie
I'll try to make it work.




diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index c284bf1..ef09a78 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -47,6 +47,7 @@ int apic_accept_pic_intr(CPUState *env);
void apic_local_deliver(CPUState *env, int vector);
int apic_get_interrupt(CPUState *env);
IOAPICState *ioapic_init(void);
+int ioapic_map_irq(int devfn, int irq_num);
void ioapic_set_irq(void *opaque, int vector, int level);

/* i8254.c */
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index a23a466..f96fbb5 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -27,6 +27,8 @@
#include "net.h"
#include "pc.h"

+#include "qemu-kvm.h"
+
//#define DEBUG_PCI

struct PCIBus {
@@ -534,12 +536,18 @@ stati

RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-15 Thread Xu, Anthony
Marcelo Tosatti wrote:
> On Fri, Jun 13, 2008 at 12:15:23AM +0800, Xu, Anthony wrote:
>>> I think it would be better to avoid static PCI pin -> IOAPIC pin
>>> assignments, if PCI link devices can be used (allowing the OS to
>>> route IRQ's as it wishes to).
>> Seems PCI link device only support irq-pin < 16,
>> IOAPIC pin 16~23 can not be used.
>> 
>> 
>> 
>>> 
>>> Take a look at http://www.microsoft.com/whdc/archive/acpi-mp.mspx.
>>> It seems cleaner to use "bimodal link nodes" (using the parlance
>>> from URL above) instead of "bimodal _PRT" as your present GSI patch
>>> is using. 
>> Bimodal _PRT is a great idea, I never thought of it before, thanks.
>> 
>> While in PIIX platform there are only 4 PCI link entries, how can we
>> introduce more? Where to put these added entries?
>> still in ISA bridge configure space.
> 
> Would have to write an ACPI-IOAPIC "IRQ router" to replace PIIX. It
> would be queried via a SystemIO region, so QEMU can know what IRQ
> has been assigned to a particular slot/func (OS can then change IRQ
> assignment via link device _SRS method).
> 
> That seems to be necessary for dynamic IRQ assignment of
> slots/function once you have more than one IOAPIC (note we can also
> assign one IRQ to each function inside each slot, currently there's
> one IRQ per _slot_). 

Agree, it is necessary for dynamic IRQ assignment.



> 
>> Another concern is, can this link use irq-pin > 15?
>> In the example ASL code in the web page you provided, they use
>> irq-pin <=15
> 
> Sure it can, as long as the OS has notified its not using PIIX's PIC
> (via the _PIC method).
Good.


Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC] kvm irq assignment

2008-06-12 Thread Xu, Anthony
q_count[vector] == 0))
+   return;
+#endif
 
 if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
 uint32_t mask = 1 << vector;
diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c
index b11e328..4761463 100644
--- a/qemu/hw/ipf.c
+++ b/qemu/hw/ipf.c
@@ -672,3 +672,23 @@ QEMUMachine ipf_machine = {
 ipf_init_pci,
 VGA_RAM_SIZE + VGA_RAM_SIZE,
 };
+
+#define IOAPIC_NUM_PINS 48
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
+int ioapic_map_irq(int devfn, int irq_num)
+{
+int irq, dev;
+dev = devfn >> 3;
+irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
+return irq;
+}
+
+void ioapic_set_irq(void *opaque, int vector, int level)
+{
+ioapic_irq_count[vector] += level;
+if (kvm_enabled())
+   if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+   return;
+}
+
diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index c284bf1..ef09a78 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -47,6 +47,7 @@ int apic_accept_pic_intr(CPUState *env);
 void apic_local_deliver(CPUState *env, int vector);
 int apic_get_interrupt(CPUState *env);
 IOAPICState *ioapic_init(void);
+int ioapic_map_irq(int devfn, int irq_num);
 void ioapic_set_irq(void *opaque, int vector, int level);
 
 /* i8254.c */
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index a23a466..f96fbb5 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -27,6 +27,8 @@
 #include "net.h"
 #include "pc.h"
 
+#include "qemu-kvm.h"
+
 //#define DEBUG_PCI
 
 struct PCIBus {
@@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int irq_num,
int level)
 PCIDevice *pci_dev = (PCIDevice *)opaque;
 PCIBus *bus;
 int change;
-
+#ifdef KVM_CAP_IRQCHIP
+int irq;
+#endif 
 change = level - pci_dev->irq_state[irq_num];
 if (!change)
 return;
 
 pci_dev->irq_state[irq_num] = level;
+#ifdef KVM_CAP_IRQCHIP
+irq = ioapic_map_irq(pci_dev->devfn, irq_num);
+ioapic_set_irq(opaque, irq, change);
+#endif
 for (;;) {
 bus = pci_dev->bus;
 irq_num = bus->map_irq(pci_dev, irq_num);
diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
index 90cb3a6..96316ca 100644
--- a/qemu/hw/piix_pci.c
+++ b/qemu/hw/piix_pci.c
@@ -225,6 +226,9 @@ static void piix3_set_irq(qemu_irq *pic, int
irq_num, int level)
 /* now we change the pic irq level according to the piix irq
mappings */
 /* XXX: optimize */
 pic_irq = piix3_dev->config[0x60 + irq_num];
+/* if bit7 set 1, this link is disabled */
+if (pic_irq & 0x80)  
+return;
 if (pic_irq < 16) {
 /* The pic level is the logical OR of all the PCI irqs mapped
to it */





Xu, Anthony wrote:
> Avi Kivity wrote:
>> Xu, Anthony wrote:
>>> Hi all,
>>> Thanks for your comments.
>>> 
>>> I made this new patch based on your comments
>>> 
>>> 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23
>>> the mapping is simple,  slot  ->  (slot&7)+16 IOAPIC pin,
>>> someone may provide good mapping ?
>>> 
>> 
>> I think it's fine. If we find a better one later, or if we add
>> another ioapic, we can easily change it since the bios and qemu are
>> shipped as a unit. 
>> 
>>> 2. use ISA-bridge configure space 0x64 byte as a communication
>>> mechansim. When guest BIOS invokes _PIC, the value is passed to
>>>  qemu through byte 0x64. qemu know whether it is PIC
>>> mode and APIC mode by checking byte 0x64. 
>>> 3. pci_slot_get_pirq and piix3_set_irq adopt different operation
>>> based on PIC mode/APIC mode 
>>> 
>> 
>> I'm not sure how real hardware works, but I _think_ that it routes
>> irqs unconditionally to both the legacy path and directly to the
>> ioapic. So for example if slot 5 asserts an interrupt, we map it
>> through the pci link mapping and generate an active high interrupt to
>> one of {5, 10, 11} (both pic and ioapic), and simultaneously an
>> active low interrupt to ioapic pin 21.
> I think what you described is correct.
> 
> 
>> 
>> The _PIC method should disable the link interrupts if ioapic mode is
>> disabled.
> Typo!  If ioapic mode is enabled.
> 
> From x86 BIOS, OS disable link interrupt through link device _DIS
> mothod. 
> 
> 
>> 
>> This removes the need for communication between the bios and qemu.
>> Agree 
> 
>> 
>>> 
>>> +/* APIC and PIC flag */
>>> +OperationRegion (P40D, PCI_Config, 0x64, 0x01) +
>>> 
>> 
>> This is actually SERIRQC, serial irq control.
>> 
>>> +
>>> +#ifdef KVM_CAP_IRQCHIP
>>> 
>> 
>> This should be unconditional.
>> 
>

RE: [RFC] kvm irq assignment

2008-06-12 Thread Xu, Anthony
Avi Kivity wrote:
> Xu, Anthony wrote:
>> Hi all,
>> Thanks for your comments.
>> 
>> I made this new patch based on your comments
>> 
>> 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23
>>  the mapping is simple,  slot  ->  (slot&7)+16 IOAPIC pin,
>> someone may provide good mapping ?
>> 
> 
> I think it's fine. If we find a better one later, or if we add another
> ioapic, we can easily change it since the bios and qemu are shipped
> as a unit.
> 
>> 2. use ISA-bridge configure space 0x64 byte as a communication
>>  mechansim. When guest BIOS invokes _PIC, the value is passed to
qemu
>> through byte 0x64.
>>  qemu know whether it is PIC mode and APIC mode by
>> checking byte 0x64. 
>> 3. pci_slot_get_pirq and piix3_set_irq adopt different operation
>> based on PIC mode/APIC mode 
>> 
> 
> I'm not sure how real hardware works, but I _think_ that it routes
> irqs unconditionally to both the legacy path and directly to the
> ioapic. So for example if slot 5 asserts an interrupt, we map it
> through the pci link mapping and generate an active high interrupt to
> one of {5, 10, 11} (both pic and ioapic), and simultaneously an
> active low interrupt to ioapic pin 21.
I think what you described is correct.


> 
> The _PIC method should disable the link interrupts if ioapic mode is
> disabled.
Typo!  If ioapic mode is enabled.

>From x86 BIOS, OS disable link interrupt through link device _DIS
mothod.


> 
> This removes the need for communication between the bios and qemu.
Agree

> 
>> 
>> +/* APIC and PIC flag */
>> +OperationRegion (P40D, PCI_Config, 0x64, 0x01) +
>> 
> 
> This is actually SERIRQC, serial irq control.
> 
>> +
>> +#ifdef KVM_CAP_IRQCHIP
>> 
> 
> This should be unconditional.
> 
>> +static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +{
>> +int slot_addend;
>> +if( piix3_dev->config[0x64])  // APIC mode
>> +return ((pci_dev->devfn >> 3) & 7)+16;
>> +else {  // PIC mode
>> +slot_addend = (pci_dev->devfn >> 3) - 1;
>> +return (irq_num + slot_addend) & 3;
>> +}
>> +}
>> 
> 
> What I'm suggesting is to "fork" the interrupt into two lines, one
> legacy path and the ioapic path.

I'll try this way.

Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC] kvm irq assignment

2008-06-12 Thread Xu, Anthony
Hi all,
Thanks for your comments.

I made this new patch based on your comments

1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23
the mapping is simple,  slot  ->  (slot&7)+16 IOAPIC pin,
someone may provide good mapping ?
2. use ISA-bridge configure space 0x64 byte as a communication
mechansim.
When guest BIOS invokes _PIC, the value is passed to qemu
through byte 0x64.
 qemu know whether it is PIC mode and APIC mode by checking
byte 0x64.
3. pci_slot_get_pirq and piix3_set_irq adopt different operation based
on PIC mode/APIC mode
4. All pci devices are still using active high level-triggered
interrupt, while IOAPIC pin 16~23 use active low level-triggered
interrupt.
   piix3_set_irq is responsible to reserve the level if it is in APIC
mode.
5. interrupt sharing for PCI devices is still supported


Test by runing guest linux, NIC is using IOAPIC pin > 15
  0: 980211IO-APIC-edge  timer
  1:179IO-APIC-edge  i8042
  8:  0IO-APIC-edge  rtc
  9:  0   IO-APIC-level  acpi
 12:289IO-APIC-edge  i8042
 14:   3266IO-APIC-edge  ide0
 15:  13489IO-APIC-edge  ide1
185:485   IO-APIC-level  eth0

Didn't try guest linux only with PIC, I think it works, because the path
for PIC is not changed at all.


Please comment!

Thanks,
Anthony






diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index 21fc76a..64219f2 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -201,14 +201,24 @@ DefinitionBlock (
 }
 }
 
+Name (PICD, 0)
 
-/* PCI Bus definition */
+
+/*PCI Bus definition */
 Scope(\_SB) {
 Device(PCI0) {
 Name (_HID, EisaId ("PNP0A03"))
 Name (_ADR, 0x00)
 Name (_UID, 1)
-Name(_PRT, Package() {
+
+Method(_PRT,0){
+If(PICD){
+Return(PRTA)
+}
+Return(PRTP)
+}
+
+Name(PRTP, Package() {
 /* PCI IRQ routing table, example from ACPI 2.0a
specification,
section 6.2.8.1 */
 /* Note: we provide the same info as the PCI routing
@@ -407,6 +417,202 @@ DefinitionBlock (
 Package() {0x001f, 3, LNKB, 0},
 })
 
+Name(PRTA, Package() {
+/* IOAPIC use fixed connection */
+
+// PCI Slot 0
+Package() {0x, 0, 0, 16},
+Package() {0x, 1, 0, 16},
+Package() {0x, 2, 0, 16},
+Package() {0x, 3, 0, 16},
+
+// PCI Slot 1
+Package() {0x0001, 0, 0, 17},
+Package() {0x0001, 1, 0, 17},
+Package() {0x0001, 2, 0, 17},
+Package() {0x0001, 3, 0, 17},
+
+// PCI Slot 2
+Package() {0x0002, 0, 0, 18},
+Package() {0x0002, 1, 0, 18},
+Package() {0x0002, 2, 0, 18},
+Package() {0x0002, 3, 0, 18},
+
+// PCI Slot 3
+Package() {0x0003, 0, 0, 19},
+Package() {0x0003, 1, 0, 19},
+Package() {0x0003, 2, 0, 19},
+Package() {0x0003, 3, 0, 19},
+
+// PCI Slot 4
+Package() {0x0004, 0, 0, 20},
+Package() {0x0004, 1, 0, 20},
+Package() {0x0004, 2, 0, 20},
+Package() {0x0004, 3, 0, 20},
+
+// PCI Slot 5
+Package() {0x0005, 0, 0, 21},
+Package() {0x0005, 1, 0, 21},
+Package() {0x0005, 2, 0, 21},
+Package() {0x0005, 3, 0, 21},
+
+// PCI Slot 6
+Package() {0x0006, 0, 0, 22},
+Package() {0x0006, 1, 0, 22},
+Package() {0x0006, 2, 0, 22},
+Package() {0x0006, 3, 0, 22},
+
+// PCI Slot 7
+Package() {0x0007, 0, 0, 23},
+Package() {0x0007, 1, 0, 23},
+Package() {0x0007, 2, 0, 23},
+Package() {0x0007, 3, 0, 23},
+
+// PCI Slot 8
+Package() {0x0008, 0, 0, 16},
+Package() {0x0008, 1, 0, 16},
+Package() {0x0008, 2, 0, 16},
+Package() {0x0008, 3, 0, 16},
+
+// PCI Slot 9
+Package() {0x0009, 0, 0, 17},
+Package() {0x0009, 1, 0, 17},
+Package() {0x0009, 2, 0, 17},
+Package() {0x0009, 3, 0, 17},
+
+// PCI Slot 10
+Package() {0x000a, 0, 0, 18},
+Package() {0x000a, 1, 0, 18},
+Package() {0x000a, 2, 0, 18},
+Package() {0x000a, 3, 

RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-12 Thread Xu, Anthony
Avi Kivity wrote:
> Xu, Anthony wrote:
>> Thanks for comments
>> 
>> Basically we are on the same page, while I didn't find your patch
>> about irq assignment, can you post it in this thread again, thx?
>> Below patch makes all PCI devices use level-trigger , active low
>> interrupt, it worked well when running linux guest, I didn't try
>> windows guest yet. (didn't have windows image in hand)
>> 
>> Please comment!
>> 
>> If this is acceptabled, we can figure out how to use IOAPIC in
>> kvm/ia32 based on this. Which will reduce irq sharing dramatically.
>> 
>> 
>> Thanks,
>> Anthony
>> 
>> 
>> 
>> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
>> index 21fc76a..4b5e824 100755
>> --- a/bios/acpi-dsdt.dsl
>> +++ b/bios/acpi-dsdt.dsl
>> @@ -974,7 +974,7 @@ DefinitionBlock (
>>  Name(_HID, EISAID("PNP0C0F")) // PCI interrupt
>>  link Name(_UID, 1)
>>  Name(_PRS, ResourceTemplate(){
>> -Interrupt (, Level, ActiveHigh, Shared)
>> +Interrupt (, Level, ActiveLow, Shared)
>>  { 5, 10, 11 }
>> 
> 
> I think this will fail for guests which use the PIC, since the PIC is
> always active high.
> 
> For x86 the interrupts will have to be active high since that's how
> piix works.

Understand this concern.


Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-12 Thread Xu, Anthony
Marcelo Tosatti wrote:
> On Tue, Jun 10, 2008 at 03:57:30PM +0800, Xu, Anthony wrote:
>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
>> index a23a466..df0ea33 100644
>> --- a/qemu/hw/pci.c
>> +++ b/qemu/hw/pci.c
>> @@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int
>>  irq_num, int level) pci_dev = bus->parent_dev;
>>  }
>>  bus->irq_count[irq_num] += change;
>> -bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num]
>> != 0); +bus->set_irq(bus->irq_opaque, irq_num,
>>  !(bus->irq_count[irq_num] != 0)); }
> 
> Ideally this should detect if the PCI interrupts are active low or
> high and act accordingly (so that older BIOSes still work and no
> assumptions are made). Will probably have to be done anyway for
> merging into QEMU. 

As I mentioned before, all PCI devices in Qemu used active high
level-triggerred interrupt.
While IOAPIC pin with fixed connection to slot uses active low
level-triggerred interrupt by default.
Seems we need to find a place to reverse it.



> 
> One way of doing it would be to talk to ACPI via a new SystemIO
> region, but there must be an easier way.  
Good point, qemu needs to know whether it is in PIC mode or in APIC
mode.
We need define the communication mechanism, SystemIO region is one
choice.


Thanks,
Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-12 Thread Xu, Anthony
Marcelo Tosatti wrote:
> On Wed, Jun 11, 2008 at 07:24:09AM -0700, Alexander Graf wrote:
>> 
>> On Jun 10, 2008, at 12:57 AM, Xu, Anthony wrote:
>> 
>>> Thanks for comments
>>> 
>>> Basically we are on the same page, while I didn't find your patch
>>> about irq assignment, can you post it in this thread again, thx?
>> 
>> I'll attach it to this mail.
>> 
>>> Below patch makes all PCI devices use level-trigger , active low
>>> interrupt, it worked well when running linux guest, I didn't try
>>> windows guest yet. (didn't have windows image in hand)
>>> 
>>> Please comment!
>>> 
>>> If this is acceptabled, we can figure out how to use IOAPIC in
>>> kvm/ia32 based on this. Which will reduce irq sharing dramatically.
>>> 
>>> 
>>> Thanks,
>>> Anthony
>>> 
>>> 
>>> 
>>> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
>>> index 21fc76a..4b5e824 100755
>>> --- a/bios/acpi-dsdt.dsl
>>> +++ b/bios/acpi-dsdt.dsl
>>> @@ -974,7 +974,7 @@ DefinitionBlock (
>>> Name(_HID, EISAID("PNP0C0F")) // PCI interrupt
>>> link Name(_UID, 1) Name(_PRS,
>>> ResourceTemplate(){ -Interrupt (, Level,
>>> ActiveHigh, Shared) +Interrupt (, Level,
>>> ActiveLow, Shared) 
>> 
>> This looks pretty much correct to me ;-). You might also want to add
>> the GSI functionality I have in my patch. The only thing we have not
>> discussed so far is, how do interrupts get routed when _PIC is not
>> set to 1, aka the "boot case"?
> 
> Hi Alexander, Anthony,
> 
> I think it would be better to avoid static PCI pin -> IOAPIC pin
> assignments, if PCI link devices can be used (allowing the OS to route
> IRQ's as it wishes to).
Seems PCI link device only support irq-pin < 16,
IOAPIC pin 16~23 can not be used.



> 
> Take a look at http://www.microsoft.com/whdc/archive/acpi-mp.mspx. It
> seems cleaner to use "bimodal link nodes" (using the parlance from URL
> above) instead of "bimodal _PRT" as your present GSI patch is using.
Bimodal _PRT is a great idea, I never thought of it before, thanks.

While in PIIX platform there are only 4 PCI link entries, how can we
introduce more? Where to put these added entries?
still in ISA bridge configure space.

Another concern is, can this link use irq-pin > 15?
In the example ASL code in the web page you provided, they use irq-pin
<=15


- Anthony




> 
> My current inclination is:
> 
> - Move the PCI interrupt link device code to a method in a separate
>   file (with arguments such as _UID, IRQ list, etc).
> - Create separate PCI interrupt link devices for each PCI pin of each
>   slot (see the example table at end of URL).
> - Assign all available IRQ's covered by the single IOAPIC (0-24) in
>   the interrupt list for these interrupt link devices.
> 
> I'll try Anthony's patch with Windows.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-12 Thread Xu, Anthony
Alexander Graf wrote:
> On Jun 10, 2008, at 12:57 AM, Xu, Anthony wrote:
> 
>> Thanks for comments
>> 
>> Basically we are on the same page, while I didn't find your patch
>> about irq assignment, can you post it in this thread again, thx?
> 
> I'll attach it to this mail.
This patch is stilling use legacy LNKA~LNKD for ioapic,
As you said irq-pins > 15  are not used.


> 
>> Below patch makes all PCI devices use level-trigger , active low
>> interrupt, it worked well when running linux guest, I didn't try
>> windows guest yet.
>> (didn't have windows image in hand)
>> 
>> Please comment!
>> 
>> If this is acceptabled, we can figure out how to use IOAPIC in kvm/
>> ia32 based on this. Which will reduce irq sharing dramatically.
>> 
>> 
>> Thanks,
>> Anthony
>> 
>> 
>> 
>> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
>> index 21fc76a..4b5e824 100755
>> --- a/bios/acpi-dsdt.dsl
>> +++ b/bios/acpi-dsdt.dsl
>> @@ -974,7 +974,7 @@ DefinitionBlock (
>> Name(_HID, EISAID("PNP0C0F")) // PCI interrupt
>> link Name(_UID, 1)
>> Name(_PRS, ResourceTemplate(){
>> -Interrupt (, Level, ActiveHigh, Shared)
>> +Interrupt (, Level, ActiveLow, Shared)
> 
> This looks pretty much correct to me ;-). You might also want to add
> the GSI functionality I have in my patch. The only thing we have not
> discussed so far is, how do interrupts get routed when _PIC is not set
> to 1, aka the "boot case"?

Here Avi is correct,
PIC only support activehigh level-triggered interrupt. From spec, PCI
device uses activelow level-triggered interrupt.
I guess it is interrupt Link to reverse it.


> 
>> 
>> { 5, 10, 11 }
>> })
>> Method (_STA, 0, NotSerialized)
> 
> [...snip...]
> 
>> 
>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
>> index a23a466..df0ea33 100644
>> --- a/qemu/hw/pci.c
>> +++ b/qemu/hw/pci.c
>> @@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int
>> irq_num, int level) pci_dev = bus->parent_dev;
>> }
>> bus->irq_count[irq_num] += change;
>> -bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num]
>> != 0); +bus->set_irq(bus->irq_opaque, irq_num, !(bus-
>>> irq_count[irq_num] !=
>> 0));
>> }
> 
> I don't think this is the right place to do it. Probably it would be a
> better idea to have either the APIC emulation know that the levels are
> reversed or make every device trigger ActiveLow interrupts.

Maybe it not a right place:-)
Making every device trigger activelow interrupt introduce a  lot of
modifications, it's last resort.
APIC emulation is inside kernel now, it is not a good idea to introduce
qemu-special piece of code into kernel as Avi mentioned.


Thanks.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] do not use extra env field.

2008-06-10 Thread Xu, Anthony
Avi Kivity wrote:
> Glauber Costa wrote:
>> There's no need to polute the already poluted CPUState
>> with "ready_for_interrupt_injection". We can compute it
>> in the few times we use it, and be fine.
>> 
> 
>>  #define CPUState CPUIA64State
>> diff --git a/qemu/target-ia64/op_helper.c
>> b/qemu/target-ia64/op_helper.c index 4969ef7..f582023 100644 ---
>> a/qemu/target-ia64/op_helper.c +++ b/qemu/target-ia64/op_helper.c
>> @@ -36,7 +36,6 @@ CPUState *cpu_ia64_init(char *cpu_model){ 
>>  cpu_reset(env); if (kvm_enabled()) {
>>  kvm_qemu_init_env(env);
>> -env->ready_for_interrupt_injection = 1;
>>  kvm_init_new_ap(env->cpu_index, env);
>>  }
>>  return env;
>> 
> 
> This bit means that this patch is not an identity transformation.
> Anthony/Xiantao, can you confirm that this patch is safe for ia64?

Give a simple test
This patch is safe for ia64.

Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-10 Thread Xu, Anthony
Thanks for comments

Basically we are on the same page, while I didn't find your patch about
irq assignment, can you post it in this thread again, thx?
Below patch makes all PCI devices use level-trigger , active low
interrupt, it worked well when running linux guest, I didn't try windows
guest yet.
(didn't have windows image in hand)

Please comment!

If this is acceptabled, we can figure out how to use IOAPIC in kvm/ia32
based on this. Which will reduce irq sharing dramatically.


Thanks,
Anthony



diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index 21fc76a..4b5e824 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -974,7 +974,7 @@ DefinitionBlock (
 Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
 Name(_UID, 1)
 Name(_PRS, ResourceTemplate(){
-Interrupt (, Level, ActiveHigh, Shared)
+Interrupt (, Level, ActiveLow, Shared)
 { 5, 10, 11 }
 })
 Method (_STA, 0, NotSerialized)
@@ -1019,7 +1019,7 @@ DefinitionBlock (
 Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
 Name(_UID, 2)
 Name(_PRS, ResourceTemplate(){
-Interrupt (, Level, ActiveHigh, Shared)
+Interrupt (, Level, ActiveLow, Shared)
 { 5, 10, 11 }
 })
 Method (_STA, 0, NotSerialized)
@@ -1064,7 +1064,7 @@ DefinitionBlock (
 Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
 Name(_UID, 3)
 Name(_PRS, ResourceTemplate(){
-Interrupt (, Level, ActiveHigh, Shared)
+Interrupt (, Level, ActiveLow, Shared)
 { 5, 10, 11 }
 })
 Method (_STA, 0, NotSerialized)
@@ -1109,7 +1109,7 @@ DefinitionBlock (
 Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
 Name(_UID, 4)
 Name(_PRS, ResourceTemplate(){
-Interrupt (, Level, ActiveHigh, Shared)
+Interrupt (, Level, ActiveLow, Shared)
 { 5, 10, 11 }
 })
 Method (_STA, 0, NotSerialized)
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index a23a466..df0ea33 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int irq_num,
int level)
 pci_dev = bus->parent_dev;
 }
 bus->irq_count[irq_num] += change;
-bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] !=
0);
+bus->set_irq(bus->irq_opaque, irq_num, !(bus->irq_count[irq_num] !=
0));
 }

 /***********/



Alexander Graf wrote:
> On Jun 10, 2008, at 8:33 AM, Xu, Anthony wrote:
> 
>> Avi Kivity wrote:
>>> 
>>> I suggest modifying the firmware to report the interrupts as active
>>> high.  Since Xen does not emulate polarity, the change will not
>>> affect it and the firmware can continue to be shared.  I'd also
>>> recommend fixing Xen to emulate the polarity correctly, if possible.
>> 
>> Thanks for your comments
>> I agree modifying common code is not a good method.
>> 
>> While your suggestion seems be infeasible too.
>> According to acpi spec, only irq <=15 can be configured, such as
>> trigger level, polarity.
>> For irq >15 , means connect to IOAPIC directly, it can't be
>> configured, it must be level triger, active low.
>> 
>> I can't find any mechanism in firmware to configure irqs (> 15).
>> Please enlighten me if you have.
> 
> 
> You can change the defaults on that IOAPIC-wise. IIRC this was in the
> MADT, but I'd have to check.
> 
>> From some experimental, Firmware both in IA64 and IA32 doesn't
>> program IOAPIC, It's Guest OS to program.
>> Guest OS gets PRT first,
>> If the PCI device connected to IOAPIC pin directly, looks like below
>>/* Device 1, INTA - INTD */
>>Package(){0x0001, 0, 0, 20},
>>Package(){0x0001, 1, 0, 21},
>>Package(){0x0001, 2, 0, 22},
>>Package(){0x0001, 3, 0, 23},
>> Guest OS configures this IOAPIC pin with level triger, active low
>> unconditionally.
> 
> Yes, the Guest OS reads the PRT and configures the IOAPIC and LAPIC
> accordingly.
> 
>> If the PCI device connected to IOAPIC pin through interrupt link, the
>> irq attribute(level, polarity) is decided by interrupt link
>> attribute, Below is the interrupt link attribute in kvm/ia32
>>Device

RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-09 Thread Xu, Anthony
Avi Kivity wrote:
> 
> I suggest modifying the firmware to report the interrupts as active
> high.  Since Xen does not emulate polarity, the change will not affect
> it and the firmware can continue to be shared.  I'd also recommend
> fixing Xen to emulate the polarity correctly, if possible.

Thanks for your comments
I agree modifying common code is not a good method.

While your suggestion seems be infeasible too.
According to acpi spec, only irq <=15 can be configured, such as trigger
level, polarity.
For irq >15 , means connect to IOAPIC directly, it can't be configured,
it must be level triger, active low.
I can't find any mechanism in firmware to configure irqs (> 15). Please
enlighten me if you have.

>From some experimental, Firmware both in IA64 and IA32 doesn't program
IOAPIC,
It's Guest OS to program.
Guest OS gets PRT first,
If the PCI device connected to IOAPIC pin directly, looks like below
/* Device 1, INTA - INTD */
Package(){0x0001, 0, 0, 20},
Package(){0x0001, 1, 0, 21},
Package(){0x0001, 2, 0, 22},
Package(){0x0001, 3, 0, 23},
Guest OS configures this IOAPIC pin with level triger, active low
unconditionally.

If the PCI device connected to IOAPIC pin through interrupt link, the
irq attribute(level, polarity) is decided by interrupt link attribute, 
Below is the interrupt link attribute in kvm/ia32
Device(LNKA){
Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link
Name(_UID, 1)
Name(_PRS, ResourceTemplate(){
Interrupt (, Level, ActiveHigh, Shared)
{ 5, 10, 11 }
})
It's defined as level trigger, activehigh, that's the reason why pci
device worked well in kvm/ia32.

I think below scheme is feasible,
1. all PCI devices in Qemu uses level trigger, active low interrupt.
(not include ide, even though it is a PCI device, it uses legacy
interrupt mechanism)

2. in Guest Firmware, all PCI devices' interrupts are configured as
level trigger, active low 
   for KVM/IA32 Guest firmware, just a little modifications
Name(_PRS, ResourceTemplate(){
Interrupt (, Level, ActiveHigh, Shared)--> Interrupt
(, Level, ActiveLow, Shared)


There are some modifications in Qemu, But I think it's a worthwhile,
it's a thoroghly solution both for KVM/IA32 and KVM/IA64.

- Thanks
Anthony


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm-ia64 irq assignment 1/2 kernel

2008-06-06 Thread Xu, Anthony
In kvm-ia64, we use the same guest firmware (GFW)as in Xen, 
GFW uses PRT to present PCI interrupt routing, all PCI devices'
interrupt pins
connect to IOAPIC, which doesn't match with kvm-ia64 Qemu PCI interrupt
routing.

This patch modify Qemu PCI interupt routing code to match with GFW, 
Then PCI devices in qemu can work in kvm-ia64, for exmaple, NIC


Signed-off-by: Anthony Xu < [EMAIL PROTECTED] >


diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 99a1736..80c116c 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -272,7 +272,11 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic,
int irq, int level)

if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
-   level ^= entry.fields.polarity;
+// polarity of all devices in qemu is active high
+//  regardless of ioapic setting
+
+// level ^= entry.fields.polarity;
+
if (!level)
ioapic->irr &= ~mask;
else {
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm-ia64 irq assignment 2/2 userspace

2008-06-06 Thread Xu, Anthony
In kvm-ia64, PCI devices use 48-pin virtual IOAPIC to deliver interrup.


Signed-off-by: Anthony Xu < [EMAIL PROTECTED] >

diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
index 90cb3a6..797ece7 100644
--- a/qemu/hw/piix_pci.c
+++ b/qemu/hw/piix_pci.c
@@ -28,6 +28,7 @@

 typedef uint32_t pci_addr_t;
 #include "pci_host.h"
+#include "qemu-kvm.h"

 typedef PCIHostState I440FXState;

@@ -51,6 +52,11 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num,
int level);
 static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
 {
 int slot_addend;
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64)
+int dev;
+dev = pci_dev->devfn >> 3;
+return (dev) << 2) + ((dev) >> 3) + (irq_num)) & 31) + 16);
+#endif
 slot_addend = (pci_dev->devfn >> 3) - 1;
 return (irq_num + slot_addend) & 3;
 }
@@ -171,12 +177,18 @@ static int i440fx_load(QEMUFile* f, void *opaque,
int version_id)

 PCIBus *i440fx_init(PCIDevice **pi440fx_state, qemu_irq *pic)
 {
+int nirq;
 PCIBus *b;
 PCIDevice *d;
 I440FXState *s;

+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64)
+nirq = 48;
+#else
+nirq = 4;
+#endif
 s = qemu_mallocz(sizeof(I440FXState));
-b = pci_register_bus(piix3_set_irq, pci_slot_get_pirq, pic, 0, 4);
+b = pci_register_bus(piix3_set_irq, pci_slot_get_pirq, pic, 0,
nirq);
 s->bus = b;

 register_ioport_write(0xcf8, 4, 4, i440fx_addr_writel, s);
@@ -220,6 +232,11 @@ static void piix3_set_irq(qemu_irq *pic, int
irq_num, int level)
 {
 int i, pic_irq, pic_level;

+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64)
+if(kvm_enabled())
+kvm_set_irq(irq_num, level);
+return;
+#endif
 pci_irq_levels[irq_num] = level;

 /* now we change the pic irq level according to the piix irq
mappings */
 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [kvm-ia64-devel] IRQ assignment

2008-05-21 Thread Xu, Anthony
Avi Kivity wrote:
> 
> x86 and ia64 have different DSDTs, so I don't see the need for dynamic
> generation.  The x86 DSDT can return different routing tables
> depending 
> on whether one or two ioapics are present (this can be detected at
> runtime). 

Okay, use fixed DSDT

- Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [kvm-ia64-devel] IRQ assignment

2008-05-21 Thread Xu, Anthony
Avi Kivity wrote:
> 
> With 24 free pins, that's fine.  With 8 free pins, less so.  We'll
> need to mix in more high bits.
> 
> I guess we need to increase the number of pins on x86 too.
> 
>> If use this method, we can share same IA64 guest BIOS between
>> XEN/IA64 and KVM/IA64. 
>> 
> 
> You can use this method for ia64, and we'll have a different function
> for x86 (perhaps two functions, if we later increase the number of
> pins to 48 (or even more); the DSDT will need to select the
> appropriate routing table according to what's present on the
> hardware). 
It can work if X86 and ia64 implement different "->map" function.

Use this kind of "fixed" algorithm may waste IOAPIC interrupt pin, due
to not every pci device will use up 4 irq.

Is it possible to let qemu dynamically build route table in DSDT?

Anthony


> 
> --
> error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html