[Xen-devel][Qemu][e1000] fix a TSE bug
Qemu e1000 NIC didn't work well for guest windows on xen/ia64. After investigation, it's a e1000 TSE bug. Previously, all data descriptors used TSE context descriptor by default, It's not correct, per spec, data descriptor uses TSE bit to indicate whether use TSE, Legacy data descripter never use TSE. This patch fixed this bug. Signed-off-by; Anthony Xu < [EMAIL PROTECTED] > diff -r 11318234588e tools/ioemu/hw/e1000.c --- a/tools/ioemu/hw/e1000.cThu Jun 19 12:48:04 2008 +0900 +++ b/tools/ioemu/hw/e1000.cMon Jul 07 16:23:42 2008 +0800 @@ -103,6 +103,7 @@ typedef struct E1000State_st { char tse; char ip; char tcp; +char cptse; //current packet tse bit } tx; struct { @@ -306,7 +307,7 @@ xmit_seg(E1000State *s) unsigned int frames = s->tx.tso_frames, css, sofar, n; struct e1000_tx *tp = &s->tx; -if (tp->tse) { +if (tp->tse && tp->cptse) { css = tp->ipcss; DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", frames, tp->size, css); @@ -380,37 +381,49 @@ process_tx_desc(E1000State *s, struct e1 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); } return; -} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) +} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)){ +// data descriptor tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; +tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; +} else +// legacy descriptor +tp->cptse = 0; addr = le64_to_cpu(dp->buffer_addr); -if (tp->tse) { +if (tp->tse && tp->cptse) { hdr = tp->hdr_len; msh = hdr + tp->mss; +do { +bytes = split_size; +if (tp->size + bytes > msh) +bytes = msh - tp->size; +cpu_physical_memory_read(addr, tp->data + tp->size, bytes); +if ((sz = tp->size + bytes) >= hdr && tp->size < hdr) +memmove(tp->header, tp->data, hdr); +tp->size = sz; +addr += bytes; +if (sz == msh) { +xmit_seg(s); +memmove(tp->data, tp->header, hdr); +tp->size = hdr; +} +} while (split_size -= bytes); +} else if (!tp->tse && tp->cptse) { +// context descriptor TSE is not set, while data descriptor TSE is set +DBGOUT(TXERR, "TCP segmentaion Error\n"); +} else { +cpu_physical_memory_read(addr, tp->data + tp->size, split_size); +tp->size += split_size; } -do { -bytes = split_size; -if (tp->size + bytes > msh) -bytes = msh - tp->size; -cpu_physical_memory_read(addr, tp->data + tp->size, bytes); -if ((sz = tp->size + bytes) >= hdr && tp->size < hdr) -memmove(tp->header, tp->data, hdr); -tp->size = sz; -addr += bytes; -if (sz == msh) { -xmit_seg(s); -memmove(tp->data, tp->header, hdr); -tp->size = hdr; -} -} while (split_size -= bytes); if (!(txd_lower & E1000_TXD_CMD_EOP)) return; -if (tp->size > hdr) +if (!(tp->tse && tp->cptse && tp->size < hdr)) xmit_seg(s); tp->tso_frames = 0; tp->sum_needed = 0; tp->size = 0; +tp->cptse = 0; } static uint32_t qemu-e1000.patch Description: qemu-e1000.patch
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Hi Jes, You can use this one, I'm considering how to make it more generic per AVI comment. Anthony Jes Sorensen wrote: > Hi Anthony, > > Looking back through this thread - do you happen to have an updated > patch for the irq problem? If not, could you let me know which is your > latest version and I will try and look at it. > > Cheers, > Jes 0002-Irq-assignment.patch Description: 0002-Irq-assignment.patch
RE: [PATCH] make qemu compile for kvm/ia64
Thanks for your comments It is the revised one >From 1a30adfc5ded3608ac2f09499b42234cf7d54a19 Mon Sep 17 00:00:00 2001 From: Anthony Xu <[EMAIL PROTECTED]> Date: Fri, 20 Jun 2008 10:45:13 -0400 Subject: [PATCH] Make qemu compile for kvm-ia64 Since merging with Qemu upsteram, it can't be compiled for kvm-ia64 Signed-off-by: Anthony Xu <[EMAIL PROTECTED]> --- qemu/Makefile.target |8 +++- qemu/cpu-exec.c |2 ++ qemu/exec.c |2 ++ qemu/target-ia64/cpu.h |3 --- qemu/target-ia64/exec.h |5 + qemu/target-ia64/fake-exec.c |6 ++ 6 files changed, 22 insertions(+), 4 deletions(-) diff --git a/qemu/Makefile.target b/qemu/Makefile.target index ac5eda1..a1491d0 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -201,8 +201,10 @@ ifdef CONFIG_DYNGEN_OP LIBOBJS+=op.o endif # TCG code generator +ifneq ($(TARGET_ARCH), ia64) LIBOBJS+= tcg/tcg.o tcg/tcg-dyngen.o tcg/tcg-runtime.o CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH) +endif ifeq ($(ARCH),sparc64) CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif @@ -239,7 +241,11 @@ LIBOBJS+= qemu-kvm-powerpc.o endif endif -LIBOBJS+= op_helper.o helper.o +LIBOBJS+= op_helper.o + +ifneq ($(TARGET_ARCH), ia64) +LIBOBJS+= helper.o +endif ifeq ($(TARGET_BASE_ARCH), arm) LIBOBJS+= neon_helper.o iwmmxt_helper.o diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c index 252927f..e8d1cce 100644 --- a/qemu/cpu-exec.c +++ b/qemu/cpu-exec.c @@ -21,7 +21,9 @@ #define CPU_NO_GLOBAL_REGS #include "exec.h" #include "disas.h" +#if !defined(TARGET_IA64) #include "tcg.h" +#endif #if !defined(CONFIG_SOFTMMU) #undef EAX diff --git a/qemu/exec.c b/qemu/exec.c index 37a956b..544b963 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -37,7 +37,9 @@ #include "exec-all.h" #include "qemu-common.h" +#if !defined(TARGET_IA64) #include "tcg.h" +#endif #include "qemu-kvm.h" #if defined(CONFIG_USER_ONLY) diff --git a/qemu/target-ia64/cpu.h b/qemu/target-ia64/cpu.h index f8e5e8a..12718f5 100644 --- a/qemu/target-ia64/cpu.h +++ b/qemu/target-ia64/cpu.h @@ -43,9 +43,6 @@ #include "softfloat.h" typedef struct CPUIA64State { CPU_COMMON; -/* exception/interrupt handling */ - jmp_buf jmp_env; - int exception_index; int interrupt_request; int user_mode_only; diff --git a/qemu/target-ia64/exec.h b/qemu/target-ia64/exec.h index 155cfa6..db1149a 100644 --- a/qemu/target-ia64/exec.h +++ b/qemu/target-ia64/exec.h @@ -26,6 +26,8 @@ uint32_t T0; uint32_t T1; uint32_t T2; +#define tcg_qemu_tb_exec(tb_ptr) 0 + static inline void env_to_regs(void) { } @@ -34,6 +36,9 @@ static inline void regs_to_env(void) { } +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); + void cpu_lock(void); void cpu_unlock(void); void cpu_loop_exit(void); diff --git a/qemu/target-ia64/fake-exec.c b/qemu/target-ia64/fake-exec.c index 0be4ffd..b6bd2cf 100644 --- a/qemu/target-ia64/fake-exec.c +++ b/qemu/target-ia64/fake-exec.c @@ -33,6 +33,12 @@ int cpu_ia64_gen_code(CPUState *env, TranslationBlock *tb, int *gen_code_size_pt return 0; } +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ +return; +} + void flush_icache_range(unsigned long start, unsigned long stop) { while (start < stop) { -- 1.5.5 Avi Kivity wrote: > Xu, Anthony wrote: >> From 7b2c2612b21b895cd14e632fea845c03b6e1dedc Mon Sep 17 00:00:00 >> 2001 From: Anthony Xu <[EMAIL PROTECTED]> >> Date: Thu, 29 May 2008 13:14:56 -0400 >> Subject: [PATCH] Make qemu compile for kvm-ia64 >> >> Since merging with Qemu upsteram, it can't be compiled >> for kvm-ia64 >> >> @@ -620,7 +622,10 @@ int cpu_exec(CPUState *env1) >> env = cpu_single_env; >> #define env cpu_single_env >> #endif >> + >> +#if !defined(TARGET_IA64) >> next_tb = tcg_qemu_tb_exec(tc_ptr); +#endif >> > > Instead of sprinkling all those ifdefs, how about proving stubs > instead? It will reduce the merge overhead, and will be easier to fix > if/when tcg can target ia64. 0001-Make-qemu-compile-for-kvm-ia64.patch Description: 0001-Make-qemu-compile-for-kvm-ia64.patch
[PATCH] irq assignment
>From 67806cc642cb666fb59fec60115fee37b80ecb3a Mon Sep 17 00:00:00 2001 From: Anthony Xu <[EMAIL PROTECTED]> Date: Wed, 18 Jun 2008 14:32:46 -0400 Subject: [PATCH] Irq assignment 1. use bimodal _PRT 2. pci device can use irq > 15, reduce interrupt sharing 3. test by running linux guest in kvm-ia64, kvm-i32(w/ wo/ -no-kvm) signed-off-by: Anthony Xu <[EMAIL PROTECTED]> --- bios/acpi-dsdt.dsl | 59 +++- qemu/hw/apic.c | 22 ++- qemu/hw/ipf.c | 24 + qemu/hw/pc.c |2 +- qemu/hw/pc.h |4 +++ qemu/hw/pci.c |8 +++ 6 files changed, 116 insertions(+), 3 deletions(-) diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index d1bfa2c..62dd612 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -89,6 +89,12 @@ DefinitionBlock ( } } +Name (PICD, 0) + +Method(_PIC, 1) +{ +Store(Arg0, PICD) +} /* PCI Bus definition */ Scope(\_SB) { @@ -96,7 +102,15 @@ DefinitionBlock ( Name (_HID, EisaId ("PNP0A03")) Name (_ADR, 0x00) Name (_UID, 1) -Name(_PRT, Package() { + +Method (_PRT,0) { +If (PICD) { +Return (PRTA) +} +Return (PRTP) +} + +Name (PRTP, Package() { /* PCI IRQ routing table, example from ACPI 2.0a specification, section 6.2.8.1 */ /* Note: we provide the same info as the PCI routing @@ -147,6 +161,49 @@ DefinitionBlock ( prt_slot3(0x001f), }) +Name (PRTA, Package() { +/* IOAPIC use fixed connection */ + +#define aprt_slot(nr, irq) \ + Package() { nr##, 0, 0, irq }, \ + Package() { nr##, 1, 0, irq }, \ + Package() { nr##, 2, 0, irq }, \ + Package() { nr##, 3, 0, irq } + + aprt_slot(0x, 16), + aprt_slot(0x0001, 17), + aprt_slot(0x0002, 18), + aprt_slot(0x0003, 19), + aprt_slot(0x0004, 20), + aprt_slot(0x0005, 21), + aprt_slot(0x0006, 22), + aprt_slot(0x0007, 23), + aprt_slot(0x0008, 16), + aprt_slot(0x0009, 17), + aprt_slot(0x000a, 18), + aprt_slot(0x000b, 19), + aprt_slot(0x000c, 20), + aprt_slot(0x000d, 21), + aprt_slot(0x000e, 22), + aprt_slot(0x000f, 23), + aprt_slot(0x0010, 16), + aprt_slot(0x0011, 17), + aprt_slot(0x0012, 18), + aprt_slot(0x0013, 19), + aprt_slot(0x0014, 20), + aprt_slot(0x0015, 21), + aprt_slot(0x0016, 22), + aprt_slot(0x0017, 23), + aprt_slot(0x0018, 16), + aprt_slot(0x0019, 17), + aprt_slot(0x001a, 18), + aprt_slot(0x001b, 19), + aprt_slot(0x001c, 20), + aprt_slot(0x001d, 21), + aprt_slot(0x001e, 22), + aprt_slot(0x001f, 23), +}) + OperationRegion(PCST, SystemIO, 0xae00, 0x08) Field (PCST, DWordAcc, NoLock, WriteAsZeros) { diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c index 4ebf1ff..98bf3ad 100644 --- a/qemu/hw/apic.c +++ b/qemu/hw/apic.c @@ -1053,10 +1053,30 @@ static void ioapic_service(IOAPICState *s) } } +int ioapic_map_irq(int devfn, int irq_num) +{ +int irq; +irq = ((devfn >> 3) & 7) + 16; +return irq; +} + +static int ioapic_irq_count[IOAPIC_NUM_PINS]; + void ioapic_set_irq(void *opaque, int vector, int level) { IOAPICState *s = opaque; - +if( vector >= 16 ){ + if( level ) + ioapic_irq_count[vector] += 1; + else + ioapic_irq_count[vector] -= 1; + level = (ioapic_irq_count[vector] != 0); +} +#ifdef KVM_CAP_IRQCHIP +if (kvm_enabled()) + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) + return; +#endif if (vector >= 0 && vector < IOAPIC_NUM_PINS) { uint32_t mask = 1 << vector; uint64_t entry = s->ioredtbl[vector]; diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c index b11e328..dabd7cc 100644 --- a/qemu/hw/ipf.c +++ b/qemu/hw/ipf.c @@ -672,3 +672,27 @@ QEMUMachine ipf_machine = { ipf_init_pci, VGA_RAM_SIZE + VGA_RAM_SIZE, }; + +#define IOAPIC_NUM_PINS 48 +static int ioapic_irq_count[IOAPIC_NUM_PINS]; + +int ioapic_map_irq(int devfn, int irq_num) +{ +int irq, dev; +dev = devfn >> 3; +irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16); +return irq; +} + +void ioapic_set_irq(void *opaque, int vector, int level) +{ +if( level ) +ioapic_irq_count[vector] += 1; +else +ioapic_irq_count[vector] -= 1; + +if (kvm_enabled()) +
[PATCH] make qemu compile for kvm/ia64
>From 7b2c2612b21b895cd14e632fea845c03b6e1dedc Mon Sep 17 00:00:00 2001 From: Anthony Xu <[EMAIL PROTECTED]> Date: Thu, 29 May 2008 13:14:56 -0400 Subject: [PATCH] Make qemu compile for kvm-ia64 Since merging with Qemu upsteram, it can't be compiled for kvm-ia64 Signed-off-by: Anthony Xu < [EMAIL PROTECTED] > --- qemu/Makefile.target |8 +++- qemu/cpu-exec.c|5 + qemu/exec.c|4 qemu/target-ia64/cpu.h |3 --- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/qemu/Makefile.target b/qemu/Makefile.target index ac5eda1..a1491d0 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -201,8 +201,10 @@ ifdef CONFIG_DYNGEN_OP LIBOBJS+=op.o endif # TCG code generator +ifneq ($(TARGET_ARCH), ia64) LIBOBJS+= tcg/tcg.o tcg/tcg-dyngen.o tcg/tcg-runtime.o CPPFLAGS+=-I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/$(ARCH) +endif ifeq ($(ARCH),sparc64) CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif @@ -239,7 +241,11 @@ LIBOBJS+= qemu-kvm-powerpc.o endif endif -LIBOBJS+= op_helper.o helper.o +LIBOBJS+= op_helper.o + +ifneq ($(TARGET_ARCH), ia64) +LIBOBJS+= helper.o +endif ifeq ($(TARGET_BASE_ARCH), arm) LIBOBJS+= neon_helper.o iwmmxt_helper.o diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c index 252927f..b61e9aa 100644 --- a/qemu/cpu-exec.c +++ b/qemu/cpu-exec.c @@ -21,7 +21,9 @@ #define CPU_NO_GLOBAL_REGS #include "exec.h" #include "disas.h" +#if !defined(TARGET_IA64) #include "tcg.h" +#endif #if !defined(CONFIG_SOFTMMU) #undef EAX @@ -620,7 +622,10 @@ int cpu_exec(CPUState *env1) env = cpu_single_env; #define env cpu_single_env #endif + +#if !defined(TARGET_IA64) next_tb = tcg_qemu_tb_exec(tc_ptr); +#endif env->current_tb = NULL; /* reset soft MMU for next block (it can currently only be set by a memory fault) */ diff --git a/qemu/exec.c b/qemu/exec.c index 37a956b..3dd49b5 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -37,7 +37,9 @@ #include "exec-all.h" #include "qemu-common.h" +#if !defined(TARGET_IA64) #include "tcg.h" +#endif #include "qemu-kvm.h" #if defined(CONFIG_USER_ONLY) @@ -3197,7 +3199,9 @@ void dump_exec_info(FILE *f, cpu_fprintf(f, "TB flush count %d\n", tb_flush_count); cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); +#if !defined(TARGET_IA64) tcg_dump_info(f, cpu_fprintf); +#endif } #if !defined(CONFIG_USER_ONLY) diff --git a/qemu/target-ia64/cpu.h b/qemu/target-ia64/cpu.h index f8e5e8a..12718f5 100644 --- a/qemu/target-ia64/cpu.h +++ b/qemu/target-ia64/cpu.h @@ -43,9 +43,6 @@ #include "softfloat.h" typedef struct CPUIA64State { CPU_COMMON; -/* exception/interrupt handling */ - jmp_buf jmp_env; - int exception_index; int interrupt_request; int user_mode_only; -- 1.5.5 0001-Make-qemu-compile-for-kvm-ia64.patch Description: 0001-Make-qemu-compile-for-kvm-ia64.patch
RE: [RFC] kvm irq assignment
> This does look like common code to me, so it might be a good idea to > share it with ia32. AFAIK apics are the same on ia32 and ia64. > [Anthony] I wanted to do the same thing as what you said, while > currently KVM/ia64 doesn't include apic.c file, I tried to add > apic.c, but failed to complie > I'll try to make it work. Further investigation show there are many ia32 specific codes in apic.c file. It may incur a lot of modifications if we want to share apic.c with IA64. For example, s->apicbase = (val & 0xf000) | (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); /* if disabled, cannot be enabled again */ if (!(val & MSR_IA32_APICBASE_ENABLE)) { s->apicbase &= ~MSR_IA32_APICBASE_ENABLE; env->cpuid_features &= ~CPUID_APIC; s->spurious_vec &= ~APIC_SV_ENABLE; BTW IA64 is using SAPIC not APIC, there are some difference. For KVM/IA64, only two functions ioapic_map_irq and ioapic_set_irq are needed, and Native qemu doesn't support guest IA64 platform yet. So, the simple way is to put these two functions in ia64 specific file, ipf.c Thanks, Anthony Xu, Anthony wrote: > > > From: Alexander Graf [mailto:[EMAIL PROTECTED] > Sent: 2008年6月13日 22:31 > To: Xu, Anthony > Cc: Avi Kivity; Marcelo Tosatti; Jes Sorensen; kvm@vger.kernel.org; > [EMAIL PROTECTED] > Subject: Re: [RFC] kvm irq assignment > > > > On Jun 12, 2008, at 11:38 PM, Xu, Anthony wrote: > > > Hi Avi and all > > This is the revised one, > > All PCI devices send interrupt to both PIC and IOAPIC, > a). When PIC is enabled and IOAPIC is disabled, all redirect > entries in IOAPIC are masked. > B) When PIC is disabled and IPAPIC is enabled, link entry bit7 is > set, means this link entry is disable. > Guest OS need to guarantee PIC and IOAPIC are not enabled in the same > time. Otherwise cause many suspicious interrupt to guest. > > > At boottime the IOAPIC is completely masked, while the PIC is > completely unmasked. Somewhere during bootup ACPI compliant OSs can > call _PIC to switch to IOAPIC mode and somehow deactivate the PIC > lines and activate IOAPIC lines for the devices they are interested > in. > > I sent a tool called "apicdump" to this list some time ago that > allows you to read the PIC and IOAPIC from within Linux. > > > [Anthony] nice tool, I'll try that. > > > > Test by running guest linux in kvm/ia32 and kvm/ia64. > > > Thanks, > Anthony > > > > diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl > index 21fc76a..e12fd66 100755 > --- a/bios/acpi-dsdt.dsl > +++ b/bios/acpi-dsdt.dsl > > > ACPI changes look fine to me. > > > > > > > > [snip] > > > diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c > index a14cab2..c3014fa 100644 > --- a/qemu/hw/apic.c > +++ b/qemu/hw/apic.c > @@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s) > } > } > > +int ioapic_map_irq(int devfn, int irq_num) > +{ > +int irq; > +irq = ((devfn >> 3) & 7) + 16; > +return irq; > +} > +#ifdef KVM_CAP_IRQCHIP > +static int ioapic_irq_count[IOAPIC_NUM_PINS]; > +#endif > + > void ioapic_set_irq(void *opaque, int vector, int level) > { > IOAPICState *s = opaque; > +#ifdef KVM_CAP_IRQCHIP > +ioapic_irq_count[vector] += level; > +if (kvm_enabled()) > + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) > +return; > +#endif > > if (vector >= 0 && vector < IOAPIC_NUM_PINS) { > uint32_t mask = 1 << vector; > diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c > index b11e328..4761463 100644 > --- a/qemu/hw/ipf.c > +++ b/qemu/hw/ipf.c > @@ -672,3 +672,23 @@ QEMUMachine ipf_machine = { > ipf_init_pci, > VGA_RAM_SIZE + VGA_RAM_SIZE, > }; > + > +#define IOAPIC_NUM_PINS 48 > +static int ioapic_irq_count[IOAPIC_NUM_PINS]; > + > +int ioapic_map_irq(int devfn, int irq_num) > +{ > +int irq, dev; > +dev = devfn >> 3; > +irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16); > +return irq; > +} > > > Why does this look different from the one in apic.c? > [Anthony] it is used for ia64, I would like to
RE: [RFC] kvm irq assignment
>> I think you should avoid any changes to pci.c. Perhaps create a new >> ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change >> pc.c to use that instead of piix_set_irq. > I'll consider how to do this > But pci.c includes below code section, which implements irq_num mapping through interrupt link, While ioapic_set_irq doesn't need this modification. Seems, it's unavoidable to modify pci.c Thanks, Anthony pci_dev->irq_state[irq_num] = level; for (;;) { bus = pci_dev->bus; irq_num = bus->map_irq(pci_dev, irq_num); if (bus->set_irq) break; pci_dev = bus->parent_dev; Xu, Anthony wrote: > Marcelo Tosatti wrote: >> Hi Anthony, >> >> On Fri, Jun 13, 2008 at 02:38:08PM +0800, Xu, Anthony wrote: >>> Hi Avi and all >>> >>> This is the revised one, >>> >>> All PCI devices send interrupt to both PIC and IOAPIC, >>> a). When PIC is enabled and IOAPIC is disabled, all redirect >>> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is >>> enabled, link entry bit7 is set, means this link entry is disable. >>> Guest OS need to guarantee PIC and IOAPIC are not enabled in the >>> same time. Otherwise cause many suspicious interrupt to guest. >>> >>> Test by running guest linux in kvm/ia32 and kvm/ia64. >> >> Interrupt sharing is stable under Linux, PCI hotplug is happy, and >> Windows is happy. Ship it! > Thanks, I will > >> >> I had to apply your patch by hand, your mailer eats newlines and >> other nasty things, please fix that (or send attached patches). >> Sorry, I'll attach patch. > >> >>> >>> +Name (PICD, 0) >>> >>> -/* PCI Bus definition */ >>> +Method(_PIC, 1) >>> +{ >>> +Store(Arg0, PICD) >>> +} >>> + >>> +/*PCI Bus definition */ >> >> Why did you take off the space before the "P" of PCI? Before you ask >> me, no, I don't have anything better to do :) typo > >> >>> Scope(\_SB) { >>> Device(PCI0) { >>> Name (_HID, EisaId ("PNP0A03")) >>> Name (_ADR, 0x00) >>> Name (_UID, 1) >>> -Name(_PRT, Package() { >>> + >>> +Method(_PRT,0){ >>> +If(PICD){ >> >> Put some spaces there too. > Okay > >> >>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c >>> index a23a466..f96fbb5 100644 >>> --- a/qemu/hw/pci.c >>> +++ b/qemu/hw/pci.c >>> @@ -27,6 +27,8 @@ >>> #include "net.h" >>> #include "pc.h" >>> >>> +#include "qemu-kvm.h" >>> + >>> //#define DEBUG_PCI >>> >>> struct PCIBus { >>> @@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int >>> irq_num, int level) PCIDevice *pci_dev = (PCIDevice *)opaque; >>> PCIBus *bus; int change; >>> - >>> +#ifdef KVM_CAP_IRQCHIP >>> +int irq; >>> +#endif >>> change = level - pci_dev->irq_state[irq_num]; >>> if (!change) >>> return; >>> >>> pci_dev->irq_state[irq_num] = level; >>> +#ifdef KVM_CAP_IRQCHIP >>> +irq = ioapic_map_irq(pci_dev->devfn, irq_num); >>> +ioapic_set_irq(opaque, irq, change); >>> +#endif >> >> I think you should avoid any changes to pci.c. Perhaps create a new >> ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change >> pc.c to use that instead of piix_set_irq. > I'll consider how to do this > >> >> Other than that (and KVM_CAP_IRQCHIP mentioned by Avi, along with >> making sure this works with "-no-kvm") looks great. Agree > >> >> Regarding the non-PIIX link devices I mentioned, that can be done >> later if necessary. > Agree, if we need to support dynamic irq, or support multiple IOAPIC. > > > > > Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC] kvm irq assignment
Avi Kivity wrote: > Xu, Anthony wrote: >> Hi Avi and all >> >> This is the revised one, >> >> All PCI devices send interrupt to both PIC and IOAPIC, >> a). When PIC is enabled and IOAPIC is disabled, all redirect >> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is >> enabled, link entry bit7 is set, means this link entry is disable. >> Guest OS need to guarantee PIC and IOAPIC are not enabled in the same >> time. Otherwise cause many suspicious interrupt to guest. >> >> Test by running guest linux in kvm/ia32 and kvm/ia64. >> >> >> > > Looks good. > >> diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c >> index a14cab2..c3014fa 100644 >> --- a/qemu/hw/apic.c >> +++ b/qemu/hw/apic.c >> @@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s) >> } } >> >> +int ioapic_map_irq(int devfn, int irq_num) >> +{ >> +int irq; >> +irq = ((devfn >> 3) & 7) + 16; >> +return irq; >> +} >> +#ifdef KVM_CAP_IRQCHIP >> +static int ioapic_irq_count[IOAPIC_NUM_PINS]; >> +#endif >> + >> void ioapic_set_irq(void *opaque, int vector, int level) { >> IOAPICState *s = opaque; >> +#ifdef KVM_CAP_IRQCHIP >> +ioapic_irq_count[vector] += level; >> +if (kvm_enabled()) >> +if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) + >> return; +#endif >> > > I think this can be done more cleanly using the qemu_irq > infrastructure. qem_irq_invert can do the inversion, and to get the > "irq fork", you can have a qemu_irq instance that forwards its > argument to two other qemu_irq instances. I'm thinking how to make it clean and generic > > There shouldn't be any #ifdef KVM_CAP_IRQCHIPs in there - it should > work for plain qemu as well (well, if we fix qemu ioapic polarity > code). Agree > >> diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c >> index 90cb3a6..96316ca 100644 >> --- a/qemu/hw/piix_pci.c >> +++ b/qemu/hw/piix_pci.c >> @@ -225,6 +226,9 @@ static void piix3_set_irq(qemu_irq *pic, int >> irq_num, int level) /* now we change the pic irq level >> according to the piix irq mappings */ /* XXX: optimize */ >> pic_irq = piix3_dev->config[0x60 + irq_num]; >> +/* if bit7 set 1, this link is disabled */ >> +if (pic_irq & 0x80) >> +return; >> > > Already caught by the test below... hacky. Didn't see that, thanks > >> if (pic_irq < 16) { >> /* The pic level is the logical OR of all the PCI irqs >> mapped to it */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC] kvm irq assignment
Marcelo Tosatti wrote: > Hi Anthony, > > On Fri, Jun 13, 2008 at 02:38:08PM +0800, Xu, Anthony wrote: >> Hi Avi and all >> >> This is the revised one, >> >> All PCI devices send interrupt to both PIC and IOAPIC, >> a). When PIC is enabled and IOAPIC is disabled, all redirect >> entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is >> enabled, link entry bit7 is set, means this link entry is disable. >> Guest OS need to guarantee PIC and IOAPIC are not enabled in the same >> time. Otherwise cause many suspicious interrupt to guest. >> >> Test by running guest linux in kvm/ia32 and kvm/ia64. > > Interrupt sharing is stable under Linux, PCI hotplug is happy, and > Windows is happy. Ship it! Thanks, I will > > I had to apply your patch by hand, your mailer eats newlines and other > nasty things, please fix that (or send attached patches). Sorry, I'll attach patch. > >> >> +Name (PICD, 0) >> >> -/* PCI Bus definition */ >> +Method(_PIC, 1) >> +{ >> +Store(Arg0, PICD) >> +} >> + >> +/*PCI Bus definition */ > > Why did you take off the space before the "P" of PCI? Before you ask > me, no, I don't have anything better to do :) typo > >> Scope(\_SB) { >> Device(PCI0) { >> Name (_HID, EisaId ("PNP0A03")) >> Name (_ADR, 0x00) >> Name (_UID, 1) >> -Name(_PRT, Package() { >> + >> +Method(_PRT,0){ >> +If(PICD){ > > Put some spaces there too. Okay > >> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c >> index a23a466..f96fbb5 100644 >> --- a/qemu/hw/pci.c >> +++ b/qemu/hw/pci.c >> @@ -27,6 +27,8 @@ >> #include "net.h" >> #include "pc.h" >> >> +#include "qemu-kvm.h" >> + >> //#define DEBUG_PCI >> >> struct PCIBus { >> @@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int >> irq_num, int level) PCIDevice *pci_dev = (PCIDevice *)opaque; >> PCIBus *bus; >> int change; >> - >> +#ifdef KVM_CAP_IRQCHIP >> +int irq; >> +#endif >> change = level - pci_dev->irq_state[irq_num]; >> if (!change) >> return; >> >> pci_dev->irq_state[irq_num] = level; >> +#ifdef KVM_CAP_IRQCHIP >> +irq = ioapic_map_irq(pci_dev->devfn, irq_num); >> +ioapic_set_irq(opaque, irq, change); >> +#endif > > I think you should avoid any changes to pci.c. Perhaps create a new > ioapic_and_pic_map / ioapic_and_pic_set pair of functions and change > pc.c to use that instead of piix_set_irq. I'll consider how to do this > > Other than that (and KVM_CAP_IRQCHIP mentioned by Avi, along with > making sure this works with "-no-kvm") looks great. Agree > > Regarding the non-PIIX link devices I mentioned, that can be done > later if necessary. Agree, if we need to support dynamic irq, or support multiple IOAPIC. Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC] kvm irq assignment
From: Alexander Graf [mailto:[EMAIL PROTECTED] Sent: 2008年6月13日 22:31 To: Xu, Anthony Cc: Avi Kivity; Marcelo Tosatti; Jes Sorensen; kvm@vger.kernel.org; [EMAIL PROTECTED] Subject: Re: [RFC] kvm irq assignment On Jun 12, 2008, at 11:38 PM, Xu, Anthony wrote: Hi Avi and all This is the revised one, All PCI devices send interrupt to both PIC and IOAPIC, a). When PIC is enabled and IOAPIC is disabled, all redirect entries in IOAPIC are masked. B) When PIC is disabled and IPAPIC is enabled, link entry bit7 is set, means this link entry is disable. Guest OS need to guarantee PIC and IOAPIC are not enabled in the same time. Otherwise cause many suspicious interrupt to guest. At boottime the IOAPIC is completely masked, while the PIC is completely unmasked. Somewhere during bootup ACPI compliant OSs can call _PIC to switch to IOAPIC mode and somehow deactivate the PIC lines and activate IOAPIC lines for the devices they are interested in. I sent a tool called "apicdump" to this list some time ago that allows you to read the PIC and IOAPIC from within Linux. [Anthony] nice tool, I'll try that. Test by running guest linux in kvm/ia32 and kvm/ia64. Thanks, Anthony diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index 21fc76a..e12fd66 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl ACPI changes look fine to me. [snip] diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c index a14cab2..c3014fa 100644 --- a/qemu/hw/apic.c +++ b/qemu/hw/apic.c @@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s) } } +int ioapic_map_irq(int devfn, int irq_num) +{ +int irq; +irq = ((devfn >> 3) & 7) + 16; +return irq; +} +#ifdef KVM_CAP_IRQCHIP +static int ioapic_irq_count[IOAPIC_NUM_PINS]; +#endif + void ioapic_set_irq(void *opaque, int vector, int level) { IOAPICState *s = opaque; +#ifdef KVM_CAP_IRQCHIP +ioapic_irq_count[vector] += level; +if (kvm_enabled()) + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) +return; +#endif if (vector >= 0 && vector < IOAPIC_NUM_PINS) { uint32_t mask = 1 << vector; diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c index b11e328..4761463 100644 --- a/qemu/hw/ipf.c +++ b/qemu/hw/ipf.c @@ -672,3 +672,23 @@ QEMUMachine ipf_machine = { ipf_init_pci, VGA_RAM_SIZE + VGA_RAM_SIZE, }; + +#define IOAPIC_NUM_PINS 48 +static int ioapic_irq_count[IOAPIC_NUM_PINS]; + +int ioapic_map_irq(int devfn, int irq_num) +{ +int irq, dev; +dev = devfn >> 3; +irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16); +return irq; +} Why does this look different from the one in apic.c? [Anthony] it is used for ia64, I would like to use same guest Firmware of XEN/ia64 in KVM/IA64, I "copy" the mapping from XEN/IA64. + +void ioapic_set_irq(void *opaque, int vector, int level) +{ +ioapic_irq_count[vector] += level; +if (kvm_enabled()) + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) +return; +} + This does look like common code to me, so it might be a good idea to share it with ia32. AFAIK apics are the same on ia32 and ia64. [Anthony] I wanted to do the same thing as what you said, while currently KVM/ia64 doesn't include apic.c file, I tried to add apic.c, but failed to complie I'll try to make it work. diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index c284bf1..ef09a78 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -47,6 +47,7 @@ int apic_accept_pic_intr(CPUState *env); void apic_local_deliver(CPUState *env, int vector); int apic_get_interrupt(CPUState *env); IOAPICState *ioapic_init(void); +int ioapic_map_irq(int devfn, int irq_num); void ioapic_set_irq(void *opaque, int vector, int level); /* i8254.c */ diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index a23a466..f96fbb5 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -27,6 +27,8 @@ #include "net.h" #include "pc.h" +#include "qemu-kvm.h" + //#define DEBUG_PCI struct PCIBus { @@ -534,12 +536,18 @@ stati
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Marcelo Tosatti wrote: > On Fri, Jun 13, 2008 at 12:15:23AM +0800, Xu, Anthony wrote: >>> I think it would be better to avoid static PCI pin -> IOAPIC pin >>> assignments, if PCI link devices can be used (allowing the OS to >>> route IRQ's as it wishes to). >> Seems PCI link device only support irq-pin < 16, >> IOAPIC pin 16~23 can not be used. >> >> >> >>> >>> Take a look at http://www.microsoft.com/whdc/archive/acpi-mp.mspx. >>> It seems cleaner to use "bimodal link nodes" (using the parlance >>> from URL above) instead of "bimodal _PRT" as your present GSI patch >>> is using. >> Bimodal _PRT is a great idea, I never thought of it before, thanks. >> >> While in PIIX platform there are only 4 PCI link entries, how can we >> introduce more? Where to put these added entries? >> still in ISA bridge configure space. > > Would have to write an ACPI-IOAPIC "IRQ router" to replace PIIX. It > would be queried via a SystemIO region, so QEMU can know what IRQ > has been assigned to a particular slot/func (OS can then change IRQ > assignment via link device _SRS method). > > That seems to be necessary for dynamic IRQ assignment of > slots/function once you have more than one IOAPIC (note we can also > assign one IRQ to each function inside each slot, currently there's > one IRQ per _slot_). Agree, it is necessary for dynamic IRQ assignment. > >> Another concern is, can this link use irq-pin > 15? >> In the example ASL code in the web page you provided, they use >> irq-pin <=15 > > Sure it can, as long as the OS has notified its not using PIIX's PIC > (via the _PIC method). Good. Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC] kvm irq assignment
q_count[vector] == 0)) + return; +#endif if (vector >= 0 && vector < IOAPIC_NUM_PINS) { uint32_t mask = 1 << vector; diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c index b11e328..4761463 100644 --- a/qemu/hw/ipf.c +++ b/qemu/hw/ipf.c @@ -672,3 +672,23 @@ QEMUMachine ipf_machine = { ipf_init_pci, VGA_RAM_SIZE + VGA_RAM_SIZE, }; + +#define IOAPIC_NUM_PINS 48 +static int ioapic_irq_count[IOAPIC_NUM_PINS]; + +int ioapic_map_irq(int devfn, int irq_num) +{ +int irq, dev; +dev = devfn >> 3; +irq = dev << 2) + (dev >> 3) + irq_num) & 31) + 16); +return irq; +} + +void ioapic_set_irq(void *opaque, int vector, int level) +{ +ioapic_irq_count[vector] += level; +if (kvm_enabled()) + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0)) + return; +} + diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index c284bf1..ef09a78 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -47,6 +47,7 @@ int apic_accept_pic_intr(CPUState *env); void apic_local_deliver(CPUState *env, int vector); int apic_get_interrupt(CPUState *env); IOAPICState *ioapic_init(void); +int ioapic_map_irq(int devfn, int irq_num); void ioapic_set_irq(void *opaque, int vector, int level); /* i8254.c */ diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index a23a466..f96fbb5 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -27,6 +27,8 @@ #include "net.h" #include "pc.h" +#include "qemu-kvm.h" + //#define DEBUG_PCI struct PCIBus { @@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int irq_num, int level) PCIDevice *pci_dev = (PCIDevice *)opaque; PCIBus *bus; int change; - +#ifdef KVM_CAP_IRQCHIP +int irq; +#endif change = level - pci_dev->irq_state[irq_num]; if (!change) return; pci_dev->irq_state[irq_num] = level; +#ifdef KVM_CAP_IRQCHIP +irq = ioapic_map_irq(pci_dev->devfn, irq_num); +ioapic_set_irq(opaque, irq, change); +#endif for (;;) { bus = pci_dev->bus; irq_num = bus->map_irq(pci_dev, irq_num); diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c index 90cb3a6..96316ca 100644 --- a/qemu/hw/piix_pci.c +++ b/qemu/hw/piix_pci.c @@ -225,6 +226,9 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level) /* now we change the pic irq level according to the piix irq mappings */ /* XXX: optimize */ pic_irq = piix3_dev->config[0x60 + irq_num]; +/* if bit7 set 1, this link is disabled */ +if (pic_irq & 0x80) +return; if (pic_irq < 16) { /* The pic level is the logical OR of all the PCI irqs mapped to it */ Xu, Anthony wrote: > Avi Kivity wrote: >> Xu, Anthony wrote: >>> Hi all, >>> Thanks for your comments. >>> >>> I made this new patch based on your comments >>> >>> 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23 >>> the mapping is simple, slot -> (slot&7)+16 IOAPIC pin, >>> someone may provide good mapping ? >>> >> >> I think it's fine. If we find a better one later, or if we add >> another ioapic, we can easily change it since the bios and qemu are >> shipped as a unit. >> >>> 2. use ISA-bridge configure space 0x64 byte as a communication >>> mechansim. When guest BIOS invokes _PIC, the value is passed to >>> qemu through byte 0x64. qemu know whether it is PIC >>> mode and APIC mode by checking byte 0x64. >>> 3. pci_slot_get_pirq and piix3_set_irq adopt different operation >>> based on PIC mode/APIC mode >>> >> >> I'm not sure how real hardware works, but I _think_ that it routes >> irqs unconditionally to both the legacy path and directly to the >> ioapic. So for example if slot 5 asserts an interrupt, we map it >> through the pci link mapping and generate an active high interrupt to >> one of {5, 10, 11} (both pic and ioapic), and simultaneously an >> active low interrupt to ioapic pin 21. > I think what you described is correct. > > >> >> The _PIC method should disable the link interrupts if ioapic mode is >> disabled. > Typo! If ioapic mode is enabled. > > From x86 BIOS, OS disable link interrupt through link device _DIS > mothod. > > >> >> This removes the need for communication between the bios and qemu. >> Agree > >> >>> >>> +/* APIC and PIC flag */ >>> +OperationRegion (P40D, PCI_Config, 0x64, 0x01) + >>> >> >> This is actually SERIRQC, serial irq control. >> >>> + >>> +#ifdef KVM_CAP_IRQCHIP >>> >> >> This should be unconditional. >> >
RE: [RFC] kvm irq assignment
Avi Kivity wrote: > Xu, Anthony wrote: >> Hi all, >> Thanks for your comments. >> >> I made this new patch based on your comments >> >> 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23 >> the mapping is simple, slot -> (slot&7)+16 IOAPIC pin, >> someone may provide good mapping ? >> > > I think it's fine. If we find a better one later, or if we add another > ioapic, we can easily change it since the bios and qemu are shipped > as a unit. > >> 2. use ISA-bridge configure space 0x64 byte as a communication >> mechansim. When guest BIOS invokes _PIC, the value is passed to qemu >> through byte 0x64. >> qemu know whether it is PIC mode and APIC mode by >> checking byte 0x64. >> 3. pci_slot_get_pirq and piix3_set_irq adopt different operation >> based on PIC mode/APIC mode >> > > I'm not sure how real hardware works, but I _think_ that it routes > irqs unconditionally to both the legacy path and directly to the > ioapic. So for example if slot 5 asserts an interrupt, we map it > through the pci link mapping and generate an active high interrupt to > one of {5, 10, 11} (both pic and ioapic), and simultaneously an > active low interrupt to ioapic pin 21. I think what you described is correct. > > The _PIC method should disable the link interrupts if ioapic mode is > disabled. Typo! If ioapic mode is enabled. >From x86 BIOS, OS disable link interrupt through link device _DIS mothod. > > This removes the need for communication between the bios and qemu. Agree > >> >> +/* APIC and PIC flag */ >> +OperationRegion (P40D, PCI_Config, 0x64, 0x01) + >> > > This is actually SERIRQC, serial irq control. > >> + >> +#ifdef KVM_CAP_IRQCHIP >> > > This should be unconditional. > >> +static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +{ >> +int slot_addend; >> +if( piix3_dev->config[0x64]) // APIC mode >> +return ((pci_dev->devfn >> 3) & 7)+16; >> +else { // PIC mode >> +slot_addend = (pci_dev->devfn >> 3) - 1; >> +return (irq_num + slot_addend) & 3; >> +} >> +} >> > > What I'm suggesting is to "fork" the interrupt into two lines, one > legacy path and the ioapic path. I'll try this way. Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC] kvm irq assignment
Hi all, Thanks for your comments. I made this new patch based on your comments 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23 the mapping is simple, slot -> (slot&7)+16 IOAPIC pin, someone may provide good mapping ? 2. use ISA-bridge configure space 0x64 byte as a communication mechansim. When guest BIOS invokes _PIC, the value is passed to qemu through byte 0x64. qemu know whether it is PIC mode and APIC mode by checking byte 0x64. 3. pci_slot_get_pirq and piix3_set_irq adopt different operation based on PIC mode/APIC mode 4. All pci devices are still using active high level-triggered interrupt, while IOAPIC pin 16~23 use active low level-triggered interrupt. piix3_set_irq is responsible to reserve the level if it is in APIC mode. 5. interrupt sharing for PCI devices is still supported Test by runing guest linux, NIC is using IOAPIC pin > 15 0: 980211IO-APIC-edge timer 1:179IO-APIC-edge i8042 8: 0IO-APIC-edge rtc 9: 0 IO-APIC-level acpi 12:289IO-APIC-edge i8042 14: 3266IO-APIC-edge ide0 15: 13489IO-APIC-edge ide1 185:485 IO-APIC-level eth0 Didn't try guest linux only with PIC, I think it works, because the path for PIC is not changed at all. Please comment! Thanks, Anthony diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index 21fc76a..64219f2 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -201,14 +201,24 @@ DefinitionBlock ( } } +Name (PICD, 0) -/* PCI Bus definition */ + +/*PCI Bus definition */ Scope(\_SB) { Device(PCI0) { Name (_HID, EisaId ("PNP0A03")) Name (_ADR, 0x00) Name (_UID, 1) -Name(_PRT, Package() { + +Method(_PRT,0){ +If(PICD){ +Return(PRTA) +} +Return(PRTP) +} + +Name(PRTP, Package() { /* PCI IRQ routing table, example from ACPI 2.0a specification, section 6.2.8.1 */ /* Note: we provide the same info as the PCI routing @@ -407,6 +417,202 @@ DefinitionBlock ( Package() {0x001f, 3, LNKB, 0}, }) +Name(PRTA, Package() { +/* IOAPIC use fixed connection */ + +// PCI Slot 0 +Package() {0x, 0, 0, 16}, +Package() {0x, 1, 0, 16}, +Package() {0x, 2, 0, 16}, +Package() {0x, 3, 0, 16}, + +// PCI Slot 1 +Package() {0x0001, 0, 0, 17}, +Package() {0x0001, 1, 0, 17}, +Package() {0x0001, 2, 0, 17}, +Package() {0x0001, 3, 0, 17}, + +// PCI Slot 2 +Package() {0x0002, 0, 0, 18}, +Package() {0x0002, 1, 0, 18}, +Package() {0x0002, 2, 0, 18}, +Package() {0x0002, 3, 0, 18}, + +// PCI Slot 3 +Package() {0x0003, 0, 0, 19}, +Package() {0x0003, 1, 0, 19}, +Package() {0x0003, 2, 0, 19}, +Package() {0x0003, 3, 0, 19}, + +// PCI Slot 4 +Package() {0x0004, 0, 0, 20}, +Package() {0x0004, 1, 0, 20}, +Package() {0x0004, 2, 0, 20}, +Package() {0x0004, 3, 0, 20}, + +// PCI Slot 5 +Package() {0x0005, 0, 0, 21}, +Package() {0x0005, 1, 0, 21}, +Package() {0x0005, 2, 0, 21}, +Package() {0x0005, 3, 0, 21}, + +// PCI Slot 6 +Package() {0x0006, 0, 0, 22}, +Package() {0x0006, 1, 0, 22}, +Package() {0x0006, 2, 0, 22}, +Package() {0x0006, 3, 0, 22}, + +// PCI Slot 7 +Package() {0x0007, 0, 0, 23}, +Package() {0x0007, 1, 0, 23}, +Package() {0x0007, 2, 0, 23}, +Package() {0x0007, 3, 0, 23}, + +// PCI Slot 8 +Package() {0x0008, 0, 0, 16}, +Package() {0x0008, 1, 0, 16}, +Package() {0x0008, 2, 0, 16}, +Package() {0x0008, 3, 0, 16}, + +// PCI Slot 9 +Package() {0x0009, 0, 0, 17}, +Package() {0x0009, 1, 0, 17}, +Package() {0x0009, 2, 0, 17}, +Package() {0x0009, 3, 0, 17}, + +// PCI Slot 10 +Package() {0x000a, 0, 0, 18}, +Package() {0x000a, 1, 0, 18}, +Package() {0x000a, 2, 0, 18}, +Package() {0x000a, 3,
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Avi Kivity wrote: > Xu, Anthony wrote: >> Thanks for comments >> >> Basically we are on the same page, while I didn't find your patch >> about irq assignment, can you post it in this thread again, thx? >> Below patch makes all PCI devices use level-trigger , active low >> interrupt, it worked well when running linux guest, I didn't try >> windows guest yet. (didn't have windows image in hand) >> >> Please comment! >> >> If this is acceptabled, we can figure out how to use IOAPIC in >> kvm/ia32 based on this. Which will reduce irq sharing dramatically. >> >> >> Thanks, >> Anthony >> >> >> >> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl >> index 21fc76a..4b5e824 100755 >> --- a/bios/acpi-dsdt.dsl >> +++ b/bios/acpi-dsdt.dsl >> @@ -974,7 +974,7 @@ DefinitionBlock ( >> Name(_HID, EISAID("PNP0C0F")) // PCI interrupt >> link Name(_UID, 1) >> Name(_PRS, ResourceTemplate(){ >> -Interrupt (, Level, ActiveHigh, Shared) >> +Interrupt (, Level, ActiveLow, Shared) >> { 5, 10, 11 } >> > > I think this will fail for guests which use the PIC, since the PIC is > always active high. > > For x86 the interrupts will have to be active high since that's how > piix works. Understand this concern. Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Marcelo Tosatti wrote: > On Tue, Jun 10, 2008 at 03:57:30PM +0800, Xu, Anthony wrote: >> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c >> index a23a466..df0ea33 100644 >> --- a/qemu/hw/pci.c >> +++ b/qemu/hw/pci.c >> @@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int >> irq_num, int level) pci_dev = bus->parent_dev; >> } >> bus->irq_count[irq_num] += change; >> -bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] >> != 0); +bus->set_irq(bus->irq_opaque, irq_num, >> !(bus->irq_count[irq_num] != 0)); } > > Ideally this should detect if the PCI interrupts are active low or > high and act accordingly (so that older BIOSes still work and no > assumptions are made). Will probably have to be done anyway for > merging into QEMU. As I mentioned before, all PCI devices in Qemu used active high level-triggerred interrupt. While IOAPIC pin with fixed connection to slot uses active low level-triggerred interrupt by default. Seems we need to find a place to reverse it. > > One way of doing it would be to talk to ACPI via a new SystemIO > region, but there must be an easier way. Good point, qemu needs to know whether it is in PIC mode or in APIC mode. We need define the communication mechanism, SystemIO region is one choice. Thanks, Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Marcelo Tosatti wrote: > On Wed, Jun 11, 2008 at 07:24:09AM -0700, Alexander Graf wrote: >> >> On Jun 10, 2008, at 12:57 AM, Xu, Anthony wrote: >> >>> Thanks for comments >>> >>> Basically we are on the same page, while I didn't find your patch >>> about irq assignment, can you post it in this thread again, thx? >> >> I'll attach it to this mail. >> >>> Below patch makes all PCI devices use level-trigger , active low >>> interrupt, it worked well when running linux guest, I didn't try >>> windows guest yet. (didn't have windows image in hand) >>> >>> Please comment! >>> >>> If this is acceptabled, we can figure out how to use IOAPIC in >>> kvm/ia32 based on this. Which will reduce irq sharing dramatically. >>> >>> >>> Thanks, >>> Anthony >>> >>> >>> >>> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl >>> index 21fc76a..4b5e824 100755 >>> --- a/bios/acpi-dsdt.dsl >>> +++ b/bios/acpi-dsdt.dsl >>> @@ -974,7 +974,7 @@ DefinitionBlock ( >>> Name(_HID, EISAID("PNP0C0F")) // PCI interrupt >>> link Name(_UID, 1) Name(_PRS, >>> ResourceTemplate(){ -Interrupt (, Level, >>> ActiveHigh, Shared) +Interrupt (, Level, >>> ActiveLow, Shared) >> >> This looks pretty much correct to me ;-). You might also want to add >> the GSI functionality I have in my patch. The only thing we have not >> discussed so far is, how do interrupts get routed when _PIC is not >> set to 1, aka the "boot case"? > > Hi Alexander, Anthony, > > I think it would be better to avoid static PCI pin -> IOAPIC pin > assignments, if PCI link devices can be used (allowing the OS to route > IRQ's as it wishes to). Seems PCI link device only support irq-pin < 16, IOAPIC pin 16~23 can not be used. > > Take a look at http://www.microsoft.com/whdc/archive/acpi-mp.mspx. It > seems cleaner to use "bimodal link nodes" (using the parlance from URL > above) instead of "bimodal _PRT" as your present GSI patch is using. Bimodal _PRT is a great idea, I never thought of it before, thanks. While in PIIX platform there are only 4 PCI link entries, how can we introduce more? Where to put these added entries? still in ISA bridge configure space. Another concern is, can this link use irq-pin > 15? In the example ASL code in the web page you provided, they use irq-pin <=15 - Anthony > > My current inclination is: > > - Move the PCI interrupt link device code to a method in a separate > file (with arguments such as _UID, IRQ list, etc). > - Create separate PCI interrupt link devices for each PCI pin of each > slot (see the example table at end of URL). > - Assign all available IRQ's covered by the single IOAPIC (0-24) in > the interrupt list for these interrupt link devices. > > I'll try Anthony's patch with Windows. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Alexander Graf wrote: > On Jun 10, 2008, at 12:57 AM, Xu, Anthony wrote: > >> Thanks for comments >> >> Basically we are on the same page, while I didn't find your patch >> about irq assignment, can you post it in this thread again, thx? > > I'll attach it to this mail. This patch is stilling use legacy LNKA~LNKD for ioapic, As you said irq-pins > 15 are not used. > >> Below patch makes all PCI devices use level-trigger , active low >> interrupt, it worked well when running linux guest, I didn't try >> windows guest yet. >> (didn't have windows image in hand) >> >> Please comment! >> >> If this is acceptabled, we can figure out how to use IOAPIC in kvm/ >> ia32 based on this. Which will reduce irq sharing dramatically. >> >> >> Thanks, >> Anthony >> >> >> >> diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl >> index 21fc76a..4b5e824 100755 >> --- a/bios/acpi-dsdt.dsl >> +++ b/bios/acpi-dsdt.dsl >> @@ -974,7 +974,7 @@ DefinitionBlock ( >> Name(_HID, EISAID("PNP0C0F")) // PCI interrupt >> link Name(_UID, 1) >> Name(_PRS, ResourceTemplate(){ >> -Interrupt (, Level, ActiveHigh, Shared) >> +Interrupt (, Level, ActiveLow, Shared) > > This looks pretty much correct to me ;-). You might also want to add > the GSI functionality I have in my patch. The only thing we have not > discussed so far is, how do interrupts get routed when _PIC is not set > to 1, aka the "boot case"? Here Avi is correct, PIC only support activehigh level-triggered interrupt. From spec, PCI device uses activelow level-triggered interrupt. I guess it is interrupt Link to reverse it. > >> >> { 5, 10, 11 } >> }) >> Method (_STA, 0, NotSerialized) > > [...snip...] > >> >> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c >> index a23a466..df0ea33 100644 >> --- a/qemu/hw/pci.c >> +++ b/qemu/hw/pci.c >> @@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int >> irq_num, int level) pci_dev = bus->parent_dev; >> } >> bus->irq_count[irq_num] += change; >> -bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] >> != 0); +bus->set_irq(bus->irq_opaque, irq_num, !(bus- >>> irq_count[irq_num] != >> 0)); >> } > > I don't think this is the right place to do it. Probably it would be a > better idea to have either the APIC emulation know that the levels are > reversed or make every device trigger ActiveLow interrupts. Maybe it not a right place:-) Making every device trigger activelow interrupt introduce a lot of modifications, it's last resort. APIC emulation is inside kernel now, it is not a good idea to introduce qemu-special piece of code into kernel as Avi mentioned. Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] do not use extra env field.
Avi Kivity wrote: > Glauber Costa wrote: >> There's no need to polute the already poluted CPUState >> with "ready_for_interrupt_injection". We can compute it >> in the few times we use it, and be fine. >> > >> #define CPUState CPUIA64State >> diff --git a/qemu/target-ia64/op_helper.c >> b/qemu/target-ia64/op_helper.c index 4969ef7..f582023 100644 --- >> a/qemu/target-ia64/op_helper.c +++ b/qemu/target-ia64/op_helper.c >> @@ -36,7 +36,6 @@ CPUState *cpu_ia64_init(char *cpu_model){ >> cpu_reset(env); if (kvm_enabled()) { >> kvm_qemu_init_env(env); >> -env->ready_for_interrupt_injection = 1; >> kvm_init_new_ap(env->cpu_index, env); >> } >> return env; >> > > This bit means that this patch is not an identity transformation. > Anthony/Xiantao, can you confirm that this patch is safe for ia64? Give a simple test This patch is safe for ia64. Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC]RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Thanks for comments Basically we are on the same page, while I didn't find your patch about irq assignment, can you post it in this thread again, thx? Below patch makes all PCI devices use level-trigger , active low interrupt, it worked well when running linux guest, I didn't try windows guest yet. (didn't have windows image in hand) Please comment! If this is acceptabled, we can figure out how to use IOAPIC in kvm/ia32 based on this. Which will reduce irq sharing dramatically. Thanks, Anthony diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index 21fc76a..4b5e824 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -974,7 +974,7 @@ DefinitionBlock ( Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link Name(_UID, 1) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -1019,7 +1019,7 @@ DefinitionBlock ( Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link Name(_UID, 2) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -1064,7 +1064,7 @@ DefinitionBlock ( Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link Name(_UID, 3) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -1109,7 +1109,7 @@ DefinitionBlock ( Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link Name(_UID, 4) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 10, 11 } }) Method (_STA, 0, NotSerialized) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index a23a466..df0ea33 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -548,7 +548,7 @@ static void pci_set_irq(void *opaque, int irq_num, int level) pci_dev = bus->parent_dev; } bus->irq_count[irq_num] += change; -bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); +bus->set_irq(bus->irq_opaque, irq_num, !(bus->irq_count[irq_num] != 0)); } /***********/ Alexander Graf wrote: > On Jun 10, 2008, at 8:33 AM, Xu, Anthony wrote: > >> Avi Kivity wrote: >>> >>> I suggest modifying the firmware to report the interrupts as active >>> high. Since Xen does not emulate polarity, the change will not >>> affect it and the firmware can continue to be shared. I'd also >>> recommend fixing Xen to emulate the polarity correctly, if possible. >> >> Thanks for your comments >> I agree modifying common code is not a good method. >> >> While your suggestion seems be infeasible too. >> According to acpi spec, only irq <=15 can be configured, such as >> trigger level, polarity. >> For irq >15 , means connect to IOAPIC directly, it can't be >> configured, it must be level triger, active low. >> >> I can't find any mechanism in firmware to configure irqs (> 15). >> Please enlighten me if you have. > > > You can change the defaults on that IOAPIC-wise. IIRC this was in the > MADT, but I'd have to check. > >> From some experimental, Firmware both in IA64 and IA32 doesn't >> program IOAPIC, It's Guest OS to program. >> Guest OS gets PRT first, >> If the PCI device connected to IOAPIC pin directly, looks like below >>/* Device 1, INTA - INTD */ >>Package(){0x0001, 0, 0, 20}, >>Package(){0x0001, 1, 0, 21}, >>Package(){0x0001, 2, 0, 22}, >>Package(){0x0001, 3, 0, 23}, >> Guest OS configures this IOAPIC pin with level triger, active low >> unconditionally. > > Yes, the Guest OS reads the PRT and configures the IOAPIC and LAPIC > accordingly. > >> If the PCI device connected to IOAPIC pin through interrupt link, the >> irq attribute(level, polarity) is decided by interrupt link >> attribute, Below is the interrupt link attribute in kvm/ia32 >>Device
RE: [PATCH] kvm-ia64 irq assignment 1/2 kernel
Avi Kivity wrote: > > I suggest modifying the firmware to report the interrupts as active > high. Since Xen does not emulate polarity, the change will not affect > it and the firmware can continue to be shared. I'd also recommend > fixing Xen to emulate the polarity correctly, if possible. Thanks for your comments I agree modifying common code is not a good method. While your suggestion seems be infeasible too. According to acpi spec, only irq <=15 can be configured, such as trigger level, polarity. For irq >15 , means connect to IOAPIC directly, it can't be configured, it must be level triger, active low. I can't find any mechanism in firmware to configure irqs (> 15). Please enlighten me if you have. >From some experimental, Firmware both in IA64 and IA32 doesn't program IOAPIC, It's Guest OS to program. Guest OS gets PRT first, If the PCI device connected to IOAPIC pin directly, looks like below /* Device 1, INTA - INTD */ Package(){0x0001, 0, 0, 20}, Package(){0x0001, 1, 0, 21}, Package(){0x0001, 2, 0, 22}, Package(){0x0001, 3, 0, 23}, Guest OS configures this IOAPIC pin with level triger, active low unconditionally. If the PCI device connected to IOAPIC pin through interrupt link, the irq attribute(level, polarity) is decided by interrupt link attribute, Below is the interrupt link attribute in kvm/ia32 Device(LNKA){ Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link Name(_UID, 1) Name(_PRS, ResourceTemplate(){ Interrupt (, Level, ActiveHigh, Shared) { 5, 10, 11 } }) It's defined as level trigger, activehigh, that's the reason why pci device worked well in kvm/ia32. I think below scheme is feasible, 1. all PCI devices in Qemu uses level trigger, active low interrupt. (not include ide, even though it is a PCI device, it uses legacy interrupt mechanism) 2. in Guest Firmware, all PCI devices' interrupts are configured as level trigger, active low for KVM/IA32 Guest firmware, just a little modifications Name(_PRS, ResourceTemplate(){ Interrupt (, Level, ActiveHigh, Shared)--> Interrupt (, Level, ActiveLow, Shared) There are some modifications in Qemu, But I think it's a worthwhile, it's a thoroghly solution both for KVM/IA32 and KVM/IA64. - Thanks Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm-ia64 irq assignment 1/2 kernel
In kvm-ia64, we use the same guest firmware (GFW)as in Xen, GFW uses PRT to present PCI interrupt routing, all PCI devices' interrupt pins connect to IOAPIC, which doesn't match with kvm-ia64 Qemu PCI interrupt routing. This patch modify Qemu PCI interupt routing code to match with GFW, Then PCI devices in qemu can work in kvm-ia64, for exmaple, NIC Signed-off-by: Anthony Xu < [EMAIL PROTECTED] > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 99a1736..80c116c 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -272,7 +272,11 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; - level ^= entry.fields.polarity; +// polarity of all devices in qemu is active high +// regardless of ioapic setting + +// level ^= entry.fields.polarity; + if (!level) ioapic->irr &= ~mask; else { -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm-ia64 irq assignment 2/2 userspace
In kvm-ia64, PCI devices use 48-pin virtual IOAPIC to deliver interrup. Signed-off-by: Anthony Xu < [EMAIL PROTECTED] > diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c index 90cb3a6..797ece7 100644 --- a/qemu/hw/piix_pci.c +++ b/qemu/hw/piix_pci.c @@ -28,6 +28,7 @@ typedef uint32_t pci_addr_t; #include "pci_host.h" +#include "qemu-kvm.h" typedef PCIHostState I440FXState; @@ -51,6 +52,11 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level); static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) { int slot_addend; +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64) +int dev; +dev = pci_dev->devfn >> 3; +return (dev) << 2) + ((dev) >> 3) + (irq_num)) & 31) + 16); +#endif slot_addend = (pci_dev->devfn >> 3) - 1; return (irq_num + slot_addend) & 3; } @@ -171,12 +177,18 @@ static int i440fx_load(QEMUFile* f, void *opaque, int version_id) PCIBus *i440fx_init(PCIDevice **pi440fx_state, qemu_irq *pic) { +int nirq; PCIBus *b; PCIDevice *d; I440FXState *s; +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64) +nirq = 48; +#else +nirq = 4; +#endif s = qemu_mallocz(sizeof(I440FXState)); -b = pci_register_bus(piix3_set_irq, pci_slot_get_pirq, pic, 0, 4); +b = pci_register_bus(piix3_set_irq, pci_slot_get_pirq, pic, 0, nirq); s->bus = b; register_ioport_write(0xcf8, 4, 4, i440fx_addr_writel, s); @@ -220,6 +232,11 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level) { int i, pic_irq, pic_level; +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_IA64) +if(kvm_enabled()) +kvm_set_irq(irq_num, level); +return; +#endif pci_irq_levels[irq_num] = level; /* now we change the pic irq level according to the piix irq mappings */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [kvm-ia64-devel] IRQ assignment
Avi Kivity wrote: > > x86 and ia64 have different DSDTs, so I don't see the need for dynamic > generation. The x86 DSDT can return different routing tables > depending > on whether one or two ioapics are present (this can be detected at > runtime). Okay, use fixed DSDT - Anthony -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [kvm-ia64-devel] IRQ assignment
Avi Kivity wrote: > > With 24 free pins, that's fine. With 8 free pins, less so. We'll > need to mix in more high bits. > > I guess we need to increase the number of pins on x86 too. > >> If use this method, we can share same IA64 guest BIOS between >> XEN/IA64 and KVM/IA64. >> > > You can use this method for ia64, and we'll have a different function > for x86 (perhaps two functions, if we later increase the number of > pins to 48 (or even more); the DSDT will need to select the > appropriate routing table according to what's present on the > hardware). It can work if X86 and ia64 implement different "->map" function. Use this kind of "fixed" algorithm may waste IOAPIC interrupt pin, due to not every pci device will use up 4 irq. Is it possible to let qemu dynamically build route table in DSDT? Anthony > > -- > error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html