Re: [PATCH] regression: vmalloc easily fail.
Nick Piggin wrote: Right... that was to add a guard page like the old vmalloc allocator. vmallocs still add their extra page too, so most of them will have a 2 page guard area, but I didn't think this would hurt significantly. I'm not against the patch, but I wonder exactly what is filling it up and how? (can you look at the vmalloc proc function to find out? Maybe we're allocating two guard pages, but freeing only one? -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Add VMRUN handler v5
On 28.10.2008, at 19:38, Mike Day wrote: On 20/10/08 19:04 +0200, Alexander Graf wrote: +static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + nsvm_printk("VMrun\n"); + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + if (nested_svm_do(svm, svm->vmcb->save.rax, 0, + NULL, nested_svm_vmrun)) + return 1; + + if (nested_svm_do(svm, svm->vmcb->control.msrpm_base_pa, 0, + NULL, nested_svm_vmrun_msrpm)) + return 1; + + return 1; +} A nitpick, but you could remove the last if() statement and one of the last two return statements. Unless you forsee more calls to nested_svm_do() in here. I had the IOPM merger in here and actually like the fall-through aspect of the function :-). But I guess this again is a personal taste thing. Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[01/03] [PATCH] KVM: ia64: Re-organize data sturure of guests' data area
>From 77601150901d7bb6b5542c14275709e81212062d Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <[EMAIL PROTECTED]> Date: Thu, 23 Oct 2008 14:56:44 +0800 Subject: [PATCH] KVM: ia64: Re-organize data sturure of guests' data area. 1. Increase the size of data area to 64M 2. Support more vcpus and memory, 128 vcpus and 256G memory are supported for guests. 3. Add the boundary check for memory and vcpu allocation. With this patch, kvm guest's data area looks as follow: * *+--+ --- KVM_VM_DATA_SIZE *| vcpu[n]'s data | | ___KVM_STK_OFFSET *| | |/ | *|..| | /vcpu's struct&stack | *|..| | /-| 0 *| vcpu[5]'s data | | / vpd| *| vcpu[4]'s data | |/---| *| vcpu[3]'s data | / vtlb | *| vcpu[2]'s data | /|| *| vcpu[1]'s data |/ | vhpt | *| vcpu[0]'s data || *+--+ | *|memory dirty log | | *+--+ | *|vm's data struct | | *+--+ | *| | | *| | | *| | | *| | | *| | | *| | | *| | | *| vm's p2m table | | *| | | *| | | *| | | | * vm's data->| | | | *+--+ --- 0 * To support large memory, needs to increase the size of p2m. * To support more vcpus, needs to ensure it has enough space to * hold vcpus' data. */ Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]> --- arch/ia64/include/asm/kvm_host.h | 192 -- arch/ia64/kvm/kvm-ia64.c | 60 ++-- arch/ia64/kvm/kvm_minstate.h |4 +- arch/ia64/kvm/misc.h |3 +- arch/ia64/kvm/vcpu.c |5 +- arch/ia64/kvm/vtlb.c |4 +- 6 files changed, 161 insertions(+), 107 deletions(-) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index c60d324..678e264 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -23,17 +23,6 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H - -#include -#include -#include -#include -#include - -#include -#include - -#define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -52,68 +41,127 @@ #define EXIT_REASON_PTC_G 8 /*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (16UL<<20) +#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) #define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000UL -#define VMM_SIZE (8UL<<20) +#define KVM_VMM_BASE 0xD000 +#define VMM_SIZE (__IA64_UL_CONST(8)<<20) /* * Define vm_buffer, used by PAL Services, base address. - * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M + * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M */ #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (8UL<<20) - -/*Define Virtual machine data layout.*/ -#define KVM_VM_DATA_SHIFT 24 -#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) - - -#define KVM_P2M_BASEKVM_VM_DATA_BASE -#define KVM_P2M_OFS 0 -#define KVM_P2M_SIZE(8UL << 20) - -#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) -#define KVM_VHPT_OFSKVM_P2M_SIZE -#define KVM_VHPT_BLOCK_SIZE (2UL << 20) -#define VHPT_SHIFT 18 -#define VHPT_SIZE (1UL << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) - -#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_OFS(KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_BLOCK_SIZE (1UL<<20) -#define VTLB_SHIFT 17 -#define VTLB_SIZE (1UL<| | | | + * +--+ --- 0 + * To support large memory, needs to increase the size of p2m. + * To support more vcpus, needs to ensure it has enough space to + * hold vcpus' data. + */ + +#define KVM_VM_DATA_SHIFT 26 +#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) +#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SI
[03/03][PATCH] KVM: ia64: kvm halt logic doesn't need lock to protect.
>From 4858a5c47c5dce88a62a6edf427d8709f3ebda15 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <[EMAIL PROTECTED]> Date: Thu, 23 Oct 2008 15:03:38 +0800 Subject: [PATCH] KVM: ia64: kvm halt logic doesn't need lock to protect. Remove the lock protection for kvm halt logic, otherwise, once other vcpus want to acquire the lock, and they have to wait all vcpus are waken up from halt. Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]> --- arch/ia64/kvm/kvm-ia64.c |2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 6b1e31b..93c7f18 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -439,7 +439,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) expires = div64_u64(itc_diff, cyc_per_usec); kt = ktime_set(0, 1000 * expires); - down_read(&vcpu->kvm->slots_lock); vcpu->arch.ht_active = 1; hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); @@ -452,7 +451,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - up_read(&vcpu->kvm->slots_lock); if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; -- 1.6.0 0003-KVM-ia64-kvm-halt-logic-doesn-t-need-lock-to-prote.patch Description: 0003-KVM-ia64-kvm-halt-logic-doesn-t-need-lock-to-prote.patch
[02/03][PATCH] KVM: ia64: Ensure SIGINT delivered to main thread (vcpu 0).
>From dd0f4f43e038d33472dbbf6d6b75d4d84d1bc3f9 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <[EMAIL PROTECTED]> Date: Thu, 23 Oct 2008 15:02:52 +0800 Subject: [PATCH] KVM: ia64: Ensure SIGINT delivered to main thread (vcpu 0). Before APs going to block status, it should make sure SIGINT is masked, otherwise, it may eat SIGINT from user killing the guest, and results in Qemu hanging there, becasue main thread can't get it to free guest's resource. Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]> --- arch/ia64/kvm/kvm-ia64.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 54a90b8..6b1e31b 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -673,16 +673,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); - vcpu_put(vcpu); - return -EAGAIN; + r = -EAGAIN; + goto out; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - if (vcpu->mmio_needed) { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); kvm_set_mmio_data(vcpu); @@ -690,7 +690,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; } r = __vcpu_run(vcpu, kvm_run); - +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); -- 1.6.0 0002-KVM-ia64-Ensure-SIGINT-delivered-to-main-thread-v.patch Description: 0002-KVM-ia64-Ensure-SIGINT-delivered-to-main-thread-v.patch
[00/03][PATCH] kvm-ia64 updates for linux-2.6.28-rc2
Hi, Avi Please review and apply the three patches! The last two are key fixes for linux-2.6.28-rc2, and please also push them into upstream. Thanks! Xiantao-- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] Add HPET emulation to qemu (v3)
Am 27.10.2008 um 08:07 schrieb Beth Kon <[EMAIL PROTECTED]>: On Tue, 2008-10-21 at 10:21 -0500, Anthony Liguori wrote: Beth Kon wrote: Thanks for the feedback, Anthony. I'll only respond where I have specific comments. Otherwise, I agree to your suggestions and will make the changes. +if(timer_enabled(timer) && hpet_enabled(timer->state)) { +qemu_irq_pulse(irq); +/* windows wants timer0 on irq2 and linux wants irq0, + * so we pulse both + */ +if (do_ioapic) +qemu_irq_pulse(timer->state->irqs[2]); This seems curious and not quite right. We should be able to detect whether the HPET is being used in IO APIC mode and raise the appropriate interrupt instead of generating a spurious irq0 interrupt. After digging further on this, it turns out that the need for the 2 interrupts was caused by what looks like a problem with the way qemu is generating interrupts for the ioapic. I will send out a separate patch for that issue, and make the necessary changes in this hpet code. +} +} + +static void hpet_save(QEMUFile *f, void *opaque) +{ +HPETState *s = opaque; +int i; +qemu_put_be64s(f, &s->config); +qemu_put_be64s(f, &s->isr); +/* save current counter value */ +s->hpet_counter = hpet_get_ticks(s); +qemu_put_be64s(f, &s->hpet_counter); + +for(i = 0; i < HPET_NUM_TIMERS; i++) { +qemu_put_8s(f, &s->timer[i].tn); +qemu_put_be64s(f, &s->timer[i].config); +qemu_put_be64s(f, &s->timer[i].cmp); +qemu_put_be64s(f, &s->timer[i].fsb); +qemu_put_be64s(f, &s->timer[i].period); +if (s->timer[i].qemu_timer) { +qemu_put_timer(f, s->timer[i].qemu_timer); +} Would qemu_timer ever be NULL? You're right... the answer is no. I'll fix that. + + +diff = hpet_calculate_diff(t, cur_tick); +qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) ++ (int64_t)ticks_to_ns(diff)); May want to convert ticks_to_ns to take and return an int64_t. The explicit casting could introduce very subtle bugs. It seems better this way to me, since muldiv64 in ticks_to_ns takes uint64_t. The likelihood of diff being big enough to create a problem seems small enough. Am I missing something? +case HPET_COUNTER: +if (hpet_enabled(s)) +cur_tick = hpet_get_ticks(s); Any reason for hpet_get_ticks(s) to not have this check integrated into it? When the hpet is being disabled, we need to get the actual count, even though the hpet_enabled check would return false. So if I made this change it would introduce an ordering issue in the disable code (i.e., get the ticks before setting the hpet to disabled) + +/* XXX this is a dirty hack for HPET support w/o LPC + Actually this is a config descriptor for the RCBA */ What's the dirty hack? This comment is left over from Alexander Graf's code. I'm not sure why it is in this location and will I'll remove it. But in comments on the first version of hpet code I produced, Alexander said, regarding the fixed assignment of HPET_BASE: "This is a dirty hack that I did to make Mac OS X happy. Actually the HPET base address gets specified in the RCBA on the LPC and is configured by the BIOS to point to a valid address, with 0xfed0 being the default (IIRC if you write 0 to the fields you end up with that address)." Basically IIRC on the ICH-7 the HPET base address is configured indirectly by writing an address to the RCBA, which is mmio based space configured in the LPC pci device config space. Since we don't have an LPC device, but a PIIX ISA bridge, there was no space to configure this on. That's why I faked and hardcoded some parts here, as the OS should read the acpi tables to get the address anyways. Please double-check that information please, as I don't have the specs with me atm. Alex But in other areas of qemu code I see base addresses being hardcoded and am not sure anything different needs to be done here. Comments? Regards, Anthony Liguori -- Elizabeth Kon (Beth) IBM Linux Technology Center Open Hypervisor Team email: [EMAIL PROTECTED] -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: [PATCH 3/3] Add KVM support to QEMU
Glauber Costa wrote: On Tue, Oct 28, 2008 at 7:51 PM, Anthony Liguori <[EMAIL PROTECTED]> wrote: This is part of the reason for this exercise. I'd rather introduce KVM support first and then look at abstracting things, than vice versa. A number of the hooks in the current QEMUAccel tree are there for the wrong reason (to support the out-of-tree IO thread, for instance). If you just introduce something with various hooks and say, these are hooks we'll need, it's not possible to really evaluate whether the hooks are needed because nothing in the tree makes use of them. We talked extensively on monday about it, and I'm in agreement with it. Something I was thinking about this morning, and I think the first place where we'll definitely need a hook, is how to deal with kvm_load_registers(). I think there's overlap between KVM and the IO thread here. There are two reasons (I can think of) that most of the device model code can't run in conjunction with TCG. The first is that TCG may modify CPUState in a non-atomic way. The device model may need to access CPUState although there are very few places that it does. The other reason is accessing guest memory. TCG does not preserve atomicity when a guest accesses device memory. There are probably only a few places in the device model (like virtio) that depend on atomicity. If we implemented an API that implemented a lock/unlock for CPUState and for portions of memory, then I think this could be used both as a hook for kvm_load_registers and as a way to introduce an IO thread with TCG. The CPUState lock/unlock is pretty straight forward. For the memory implementation to be efficient, I think you would have to acquire the lock when TCG brings a physical address into the TLB (preferrably, at a page granularity), or whenever someone tries to access memory (via cpu_physical_memory_rw). I think in the vast majority of the cases, there wouldn't be any contention and both could TCG could run along side the IO thread. Another place "hook" is updating a slot's dirty bitmap. Right now, with my patchset we don't have live migration or the VGA RAM optimization. There's nothing about the VGA RAM optimization that wouldn't work for QEMU. I'm not sure that it really is an optimization in the context of TCG, but I certainly don't think it's any worse. The only thing you really need is to query the KVM dirty bitmap when it comes time to enable start over querying the VGA dirty bits. The same is needed for live migration, so I think what we really need is to change the memory dirty bit tracking API to have a concept of refresh that we can use to hook for KVM. FWIW, I included a TODO in my patch if people are interesting in tackling any of these things. Regards, Anthony Liguori Regards, Anthony Liguori Regards, Anthony Liguori surprised, Gerd -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
On Tue, Oct 28, 2008 at 08:55:13PM -0200, Glauber Costa wrote: > Commit db64fe02258f1507e13fe5212a989922323685ce broke > KVM (the symptom) for me. The cause is that vmalloc > allocations fail, despite of the fact that /proc/meminfo > shows plenty of vmalloc space available. > > After some investigation, it seems to me that the current > way to compute the next addr in the rb-tree transversal > leaves a spare page between each allocation. After a few > allocations, regardless of their size, we run out of vmalloc > space. Right... that was to add a guard page like the old vmalloc allocator. vmallocs still add their extra page too, so most of them will have a 2 page guard area, but I didn't think this would hurt significantly. I'm not against the patch, but I wonder exactly what is filling it up and how? (can you look at the vmalloc proc function to find out?) > > Signed-off-by: Glauber Costa <[EMAIL PROTECTED]> > Cc: Jeremy Fitzhardinge <[EMAIL PROTECTED]> > Cc: Krzysztof Helt <[EMAIL PROTECTED]> > --- > mm/vmalloc.c |2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 0365369..a33b0d1 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -363,7 +363,7 @@ retry: > } > > while (addr + size >= first->va_start && addr + size <= vend) { > - addr = ALIGN(first->va_end + PAGE_SIZE, align); > + addr = ALIGN(first->va_end, align); > > n = rb_next(&first->rb_node); > if (n) > -- > 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: [PATCH 3/3] Add KVM support to QEMU
On Tue, Oct 28, 2008 at 7:51 PM, Anthony Liguori <[EMAIL PROTECTED]> wrote: > Gerd Hoffmann wrote: >> >> Anthony Liguori wrote: >> >>> >>> This patch only implements the bare minimum support to get a guest >>> booting. It >>> has very little impact the rest of QEMU and attempts to integrate nicely >>> with >>> the rest of QEMU. >>> >> >> Huh? That isn't based on the qemu-accel patches ... >> > > This is part of the reason for this exercise. I'd rather introduce KVM > support first and then look at abstracting things, than vice versa. A > number of the hooks in the current QEMUAccel tree are there for the wrong > reason (to support the out-of-tree IO thread, for instance). > > If you just introduce something with various hooks and say, these are hooks > we'll need, it's not possible to really evaluate whether the hooks are > needed because nothing in the tree makes use of them. We talked extensively on monday about it, and I'm in agreement with it. > > Regards, > > Anthony Liguori > >> surprised, >> Gerd >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to [EMAIL PROTECTED] >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > > > > -- Glauber Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act." -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm + kqemu enabled at the same time
Martin Kejík wrote: Hello, I've compiled the KVM enabled QEMU with support for both KVM and KQEMU. Both modules loaded and QEMU running saying "kvm: enabled" and "kqemu: enabled for user code". How does this work?? What does QEMU really do in this situation when we look closer to CPU?? You're using both accelerators at the same time and getting 2x acceleration. It will actually go faster than native now :-) Seriously, it's working based on sheer luck. If you look at the cpu_exec() loop (which is the core execution loop in QEMU, you'll see): #ifdef USE_KQEMU if (kqemu_is_ok(env) && env->interrupt_request == 0) { int ret; env->eflags = env->eflags | cc_table[CC_OP].compute_all() | (DF & DF_MASK); ret = kqemu_cpu_exec(env); /* put eflags in CPU temporary format */ CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); DF = 1 - (2 * ((env->eflags >> 10) & 1)); CC_OP = CC_OP_EFLAGS; env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); if (ret == 1) { /* exception */ longjmp(env->jmp_env, 1); } else if (ret == 2) { /* softmmu execution needed */ } else { if (env->interrupt_request != 0) { /* hardware interrupt will be executed just after */ } else { /* otherwise, we restart */ longjmp(env->jmp_env, 1); } } } #endif if (kvm_enabled()) { kvm_cpu_exec(env); longjmp(env->jmp_env, 1); } What's letting this work is kqemu_is_ok(env). This check looks like this: static inline int kqemu_is_ok(CPUState *env) { return(env->kqemu_enabled && (env->cr[0] & CR0_PE_MASK) && !(env->hflags & HF_INHIBIT_IRQ_MASK) && (env->eflags & IF_MASK) && !(env->eflags & VM_MASK) && (env->kqemu_enabled == 2 || ((env->hflags & HF_CPL_MASK) == 3 && (env->eflags & IOPL_MASK) != IOPL_MASK))); } This is checking whether you're in protected mode, not in an interrupt window, interrupts are enabled, you aren't in vm86 mode, and if not using kernel-kqemu, CPL == 3 and IOPL > CPL. As an optimization, KVM does not synchronize CPUState very often which means that env is very stale. This means that it's likely that CPUState is in the initial CPU state (in real mode). As long as kqemu never gets to execute, you should be ok. If you force KVM to sync CPUState, you'll see kqemu actually execute and bad things will happen. But the more important question is, why in the world are you doing this in the first place? Regards, Anthony Liguori thanx -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
> I suspect it's a case of off-by-one... ALIGN() might round down, and > the "+ (PAGE_SIZE-1)" was there to make it round up. > Except for that missing -1 ... ALIGN() has always rounded up, at least back to 2.4. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
kvm + kqemu enabled at the same time
Hello, I've compiled the KVM enabled QEMU with support for both KVM and KQEMU. Both modules loaded and QEMU running saying "kvm: enabled" and "kqemu: enabled for user code". How does this work?? What does QEMU really do in this situation when we look closer to CPU?? thanx -- Martin (Kejda) Kejík kejda(at)centrum(dot)cz signature.asc Description: This is a digitally signed message part.
Re: [PATCH 3/3] Add KVM support to QEMU
Gerd Hoffmann wrote: Anthony Liguori wrote: This patch only implements the bare minimum support to get a guest booting. It has very little impact the rest of QEMU and attempts to integrate nicely with the rest of QEMU. Huh? That isn't based on the qemu-accel patches ... This is part of the reason for this exercise. I'd rather introduce KVM support first and then look at abstracting things, than vice versa. A number of the hooks in the current QEMUAccel tree are there for the wrong reason (to support the out-of-tree IO thread, for instance). If you just introduce something with various hooks and say, these are hooks we'll need, it's not possible to really evaluate whether the hooks are needed because nothing in the tree makes use of them. Regards, Anthony Liguori surprised, Gerd -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
On Tue, 28 Oct 2008 14:22:16 -0700 Roland Dreier <[EMAIL PROTECTED]> wrote: > > I'm guessing that the missing comment explains that this is > > intentional, to trap buffer overflows? > > Actually, speaking of comments, it's interesting that > __get_vm_area_node() -- which is called from vmalloc() -- does: > > /* >* We always allocate a guard page. >*/ > size += PAGE_SIZE; > > va = alloc_vmap_area(size, align, start, end, node, gfp_mask); > > and alloc_vmap_area() adds another PAGE_SIZE, as the original email > pointed out: > > while (addr + size >= first->va_start && addr + size > <= vend) { addr = ALIGN(first->va_end + PAGE_SIZE, align); > > I wonder if the double padding is causing a problem when things get > too fragmented? I suspect it's a case of off-by-one... ALIGN() might round down, and the "+ (PAGE_SIZE-1)" was there to make it round up. Except for that missing -1 ... -- Arjan van de VenIntel Open Source Technology Centre For development, discussion and tips for power savings, visit http://www.lesswatts.org -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Add KVM support to QEMU
Anthony Liguori wrote: > This patch only implements the bare minimum support to get a guest booting. > It > has very little impact the rest of QEMU and attempts to integrate nicely with > the rest of QEMU. Huh? That isn't based on the qemu-accel patches ... surprised, Gerd -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
hello, >> I'm guessing that the missing comment explains that this is intentional, >> to trap buffer overflows? yes, IIRC the pages between vmalloc areas are there for safety reasons. (like the interval inserted before the first area, defined by VMALLOC_OFFSET) regards Matias -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
> I'm guessing that the missing comment explains that this is > intentional, to trap buffer overflows? Actually, speaking of comments, it's interesting that __get_vm_area_node() -- which is called from vmalloc() -- does: /* * We always allocate a guard page. */ size += PAGE_SIZE; va = alloc_vmap_area(size, align, start, end, node, gfp_mask); and alloc_vmap_area() adds another PAGE_SIZE, as the original email pointed out: while (addr + size >= first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end + PAGE_SIZE, align); I wonder if the double padding is causing a problem when things get too fragmented? - R. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 3/3] Add KVM support to QEMU
Hollis Blanchard wrote: Just a quick skim... On Tue, Oct 28, 2008 at 3:13 PM, Anthony Liguori <[EMAIL PROTECTED]> wrote: +int kvm_cpu_exec(CPUState *env) +{ +struct kvm_run *run = env->kvm_run; +int ret; + +dprintf("kvm_cpu_exec()\n"); + +do { +kvm_arch_pre_run(env, run); + +if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { +dprintf("interrupt exit requested\n"); +ret = 0; +break; +} + +dprintf("setting tpr\n"); +run->cr8 = cpu_get_apic_tpr(env); This belongs in the arch_pre_run hook above. Good catch, I've updated the patch. How did you decide which exit handlers should go into architecture-specific code? Looking at just the KVM architecture set: Based on whether the implementation required target-specific code. IO: x86 and ia64, not PowerPC or s390 cpu_{in,out}[bwl] are defined in vl.c and are available for all architectures. They are no-ops on most architectures because they are never used. MMIO: everybody except s390 cpu_physical_memory_rw() is defined by everyone. DCRs: PowerPC only This will have to be an architecture specific handler. IRQ window: not sure It's a no-op implementation. I would think that this would be needed on PPC. If you want to inject an interrupt, but the guest is unable to handle an interrupt, you need to exit to userspace when the guest re-enables interrupts. Otherwise, you may never return to userspace for the interrupt to be injected. How do you handle that now? Does PPC have something that makes this unnecessary? Regards, Anthony Liguori -Hollis -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
On Tue, Oct 28, 2008 at 11:03:22PM +0200, Avi Kivity wrote: > Glauber Costa wrote: >> Commit db64fe02258f1507e13fe5212a989922323685ce broke >> KVM (the symptom) for me. The cause is that vmalloc >> allocations fail, despite of the fact that /proc/meminfo >> shows plenty of vmalloc space available. >> >> After some investigation, it seems to me that the current >> way to compute the next addr in the rb-tree transversal >> leaves a spare page between each allocation. After a few >> allocations, regardless of their size, we run out of vmalloc >> space. >> >> while (addr + size >= first->va_start && addr + size <= vend) { >> -addr = ALIGN(first->va_end + PAGE_SIZE, align); >> +addr = ALIGN(first->va_end, align); >> n = rb_next(&first->rb_node); >> if (n) >> > > I'm guessing that the missing comment explains that this is intentional, > to trap buffer overflows? > > (okay that was a cheap shot. I don't comment nearly enough either) > > Even if you leave a page between allocations, I don't see how you can > fail a one page allocation, unless you've allocated at least N/2 pages > (where N is the size of the vmalloc space in pages). I'm hoping Nick will comment on it. I might well be wrong. but it nicely fixes the problem for me, and actually, you don't need "at least N/2 pages". The size of the allocations hardly matters, just the amount of allocations we did. Since kvm does some small vmalloc allocations, that may be the reason for we triggering it. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] regression: vmalloc easily fail.
Glauber Costa wrote: Commit db64fe02258f1507e13fe5212a989922323685ce broke KVM (the symptom) for me. The cause is that vmalloc allocations fail, despite of the fact that /proc/meminfo shows plenty of vmalloc space available. After some investigation, it seems to me that the current way to compute the next addr in the rb-tree transversal leaves a spare page between each allocation. After a few allocations, regardless of their size, we run out of vmalloc space. while (addr + size >= first->va_start && addr + size <= vend) { - addr = ALIGN(first->va_end + PAGE_SIZE, align); + addr = ALIGN(first->va_end, align); n = rb_next(&first->rb_node); if (n) I'm guessing that the missing comment explains that this is intentional, to trap buffer overflows? (okay that was a cheap shot. I don't comment nearly enough either) Even if you leave a page between allocations, I don't see how you can fail a one page allocation, unless you've allocated at least N/2 pages (where N is the size of the vmalloc space in pages). -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] regression: vmalloc easily fail.
Commit db64fe02258f1507e13fe5212a989922323685ce broke KVM (the symptom) for me. The cause is that vmalloc allocations fail, despite of the fact that /proc/meminfo shows plenty of vmalloc space available. After some investigation, it seems to me that the current way to compute the next addr in the rb-tree transversal leaves a spare page between each allocation. After a few allocations, regardless of their size, we run out of vmalloc space. Signed-off-by: Glauber Costa <[EMAIL PROTECTED]> Cc: Jeremy Fitzhardinge <[EMAIL PROTECTED]> Cc: Krzysztof Helt <[EMAIL PROTECTED]> --- mm/vmalloc.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0365369..a33b0d1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -363,7 +363,7 @@ retry: } while (addr + size >= first->va_start && addr + size <= vend) { - addr = ALIGN(first->va_end + PAGE_SIZE, align); + addr = ALIGN(first->va_end, align); n = rb_next(&first->rb_node); if (n) -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 3/3] Add KVM support to QEMU
Just a quick skim... On Tue, Oct 28, 2008 at 3:13 PM, Anthony Liguori <[EMAIL PROTECTED]> wrote: > +int kvm_cpu_exec(CPUState *env) > +{ > +struct kvm_run *run = env->kvm_run; > +int ret; > + > +dprintf("kvm_cpu_exec()\n"); > + > +do { > +kvm_arch_pre_run(env, run); > + > +if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { > +dprintf("interrupt exit requested\n"); > +ret = 0; > +break; > +} > + > +dprintf("setting tpr\n"); > +run->cr8 = cpu_get_apic_tpr(env); This belongs in the arch_pre_run hook above. > +ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); > +kvm_arch_post_run(env, run); > + > +if (ret == -EINTR || ret == -EAGAIN) { > +dprintf("io window exit\n"); > +ret = 0; > +break; > +} > + > +if (ret < 0) { > +dprintf("kvm run failed %s\n", strerror(-ret)); > +abort(); > +} > + > +ret = 0; /* exit loop */ > +switch (run->exit_reason) { > +case KVM_EXIT_IO: > +dprintf("handle_io\n"); > +ret = kvm_handle_io(env, run->io.port, > +(uint8_t *)run + run->io.data_offset, > +run->io.direction, > +run->io.size, > +run->io.count); > +break; > +case KVM_EXIT_MMIO: > +dprintf("handle_mmio\n"); > +cpu_physical_memory_rw(run->mmio.phys_addr, > + run->mmio.data, > + run->mmio.len, > + run->mmio.is_write); > +ret = 1; > +break; > +case KVM_EXIT_IRQ_WINDOW_OPEN: > +dprintf("irq_window_open\n"); > +break; > +case KVM_EXIT_SHUTDOWN: > +dprintf("shutdown\n"); > +qemu_system_reset_request(); > +ret = 1; > +break; > +case KVM_EXIT_UNKNOWN: > +dprintf("kvm_exit_unknown\n"); > +break; > +case KVM_EXIT_FAIL_ENTRY: > +dprintf("kvm_exit_fail_entry\n"); > +break; > +case KVM_EXIT_EXCEPTION: > +dprintf("kvm_exit_exception\n"); > +break; > +case KVM_EXIT_DEBUG: > +dprintf("kvm_exit_debug\n"); > +break; > +default: > +dprintf("kvm_arch_handle_exit\n"); > +ret = kvm_arch_handle_exit(env, run); > +break; > +} > +} while (ret > 0); > + > +return ret; > +} How did you decide which exit handlers should go into architecture-specific code? Looking at just the KVM architecture set: IO: x86 and ia64, not PowerPC or s390 MMIO: everybody except s390 DCRs: PowerPC only IRQ window: not sure -Hollis -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM: MMU: increase per-vcpu rmap cache alloc size
The page fault path can use two rmap_desc structures, if: - walk_addr's dirty pte update allocates one rmap_desc. - mmu_lock is dropped, sptes are zapped resulting in rmap_desc being freed. - fetch->mmu_set_spte allocates another rmap_desc. Increase to 4 for safety. Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 79cb4a9..2477a24 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -316,7 +316,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 4); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] kvm: disable virtualization on kdump
Eduardo Habkost <[EMAIL PROTECTED]> writes: > I am still wondering if a simple function pointer (instead of a full > notifier interface) would be good enough. It looks like a reasonable > tradeoff. Oh sorry. As long as we do the whole rcu protected thing so it is safe to call the function without taking locks it should work. I'm not thrilled about a function pointer but it should work. > I think I will get flamed if I try to pull to the core a bunch of code > that always lived in the KVM module. 8) Why is KVM modular anyway? That seems like some pretty core cpu functionality... > And even if we pull those functions to the core, we will still have > a function pointer on the new code anyway, because we would need to > support vmx and svm. Depending. It doesn't sound like svm has the problem where init doesn't work so svm really doesn't need to do this. Eric -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] Add KVM support to QEMU
This patch adds very basic KVM support. KVM is a kernel module for Linux that allows userspace programs to make use of hardware virtualization support. It current supports x86 hardware virtualization using Intel VT-x or AMD-V. It also supports IA64 VT-i, PPC 440, and S390. This patch only implements the bare minimum support to get a guest booting. It has very little impact the rest of QEMU and attempts to integrate nicely with the rest of QEMU. Even though this implementation is basic, it is significantly faster than TCG. Booting and shutting down a Linux guest: w/TCG: 1:32.36 elapsed 84% CPU w/KVM: 0:31.14 elapsed 59% CPU Right now, KVM is disabled by default and must be explicitly enabled with -enable-kvm. We can enable it by default later when we have had better testing. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/KVM_TODO b/KVM_TODO new file mode 100644 index 000..9529049 --- /dev/null +++ b/KVM_TODO @@ -0,0 +1,9 @@ +1) Add hooks for load/save of register state + o Fixes gdbstub, save/restore, and vmport +2) Add VGA optimization +3) Add IO thread +4) Add guest SMP support +5) Add TPR optimization +6) Add support for in-kernel APIC +7) Add support for in-kernel PIT +8) Merge in additional changes in kvm-userspace tree diff --git a/Makefile.target b/Makefile.target index e2edf9d..903d66d 100644 --- a/Makefile.target +++ b/Makefile.target @@ -183,6 +183,9 @@ CFLAGS+=-I/opt/SUNWspro/prod/include/cc endif endif +kvm.o: CFLAGS+=$(KVM_CFLAGS) +kvm-all.o: CFLAGS+=$(KVM_CFLAGS) + all: $(PROGS) # @@ -475,6 +478,9 @@ ifndef CONFIG_USER_ONLY OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o +ifdef CONFIG_KVM +OBJS+=kvm.o kvm-all.o +endif ifdef CONFIG_WIN32 OBJS+=block-raw-win32.o else diff --git a/configure b/configure index aefa69b..7aed99d 100755 --- a/configure +++ b/configure @@ -113,6 +113,7 @@ aio="yes" nptl="yes" mixemu="no" bluez="yes" +kvm="yes" # OS specific targetos=`uname -s` @@ -300,6 +301,8 @@ for opt do ;; --disable-bluez) bluez="no" ;; + --disable-kvm) kvm="no" + ;; --enable-profiler) profiler="yes" ;; --enable-cocoa) @@ -439,6 +442,7 @@ echo " --disable-brlapi disable BrlAPI" echo " --disable-vnc-tlsdisable TLS encryption for VNC server" echo " --disable-curses disable curses output" echo " --disable-bluez disable bluez stack connectivity" +echo " --disable-kvmdisable KVM acceleration support" echo " --disable-nptl disable usermode NPTL support" echo " --enable-system enable all system emulation targets" echo " --disable-system disable all system emulation targets" @@ -933,6 +937,30 @@ EOF fi ## +# kvm probe +if test "$kvm" = "yes" ; then +cat > $TMPC < +#if !defined(KVM_API_VERSION) || \ +KVM_API_VERSION < 12 || \ +KVM_API_VERSION > 12 || \ +!defined(KVM_CAP_USER_MEMORY) || \ +!defined(KVM_CAP_SET_TSS_ADDR) +#error Invalid KVM version +#endif +int main(void) { return 0; } +EOF + # FIXME make this configurable + kvm_cflags=-I/lib/modules/`uname -r`/build/include + if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $kvm_cflags $TMPC \ + 2>/dev/null ; then +: + else +kvm="no" + fi +fi + +## # AIO probe if test "$aio" = "yes" ; then aio=no @@ -1018,6 +1046,7 @@ echo "uname -r $uname_release" echo "NPTL support $nptl" echo "vde support $vde" echo "AIO support $aio" +echo "KVM support $kvm" if test $sdl_too_old = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -1388,6 +1417,15 @@ interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"` echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h gdb_xml_files="" +# FIXME allow i386 to build on x86_64 and vice versa +if test "$kvm" = "yes" -a "$target_cpu" != "$cpu" ; then + kvm="no" +fi +# Disable KVM for linux-user +if test "$kvm" = "yes" -a "$target_softmmu" = "no" ; then + kvm="no" +fi + case "$target_cpu" in i386) echo "TARGET_ARCH=i386" >> $config_mak @@ -1397,6 +1435,11 @@ case "$target_cpu" in then echo "#define USE_KQEMU 1" >> $config_h fi +if test "$kvm" = "yes" ; then + echo "CONFIG_KVM=yes" >> $config_mak + echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak + echo "#define CONFIG_KVM" >> $config_h +fi gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print $3 }' | cut -f2 -d.` if test -n "$gcc3minver" && test $gcc3minver -gt 3 then @@ -1414,6 +1457,11 @@ case "$target_cpu" in then echo "#define USE_KQEMU 1" >> $config_h fi +if test "$kvm" = "yes" ; then + echo "CONFIG_KVM=yes" >> $config_mak + echo "KVM_CFL
[PATCH 2/3] Split CPUID from op_helper
KVM needs to call CPUID from outside of the TCG code. This patch splits out the CPUID logic into a separate helper that both the op helper and KVM can call. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/target-i386/cpu.h b/target-i386/cpu.h index b1678ef..263a477 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -730,6 +730,10 @@ void cpu_smm_update(CPUX86State *env); /* will be suppressed */ void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0); +void cpu_x86_cpuid(CPUX86State *env, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); + /* used to debug */ #define X86_DUMP_FPU 0x0001 /* dump FPU state too */ #define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */ diff --git a/target-i386/helper.c b/target-i386/helper.c index c2e1a88..905ae9b 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1287,3 +1287,169 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) return paddr; } #endif /* !CONFIG_USER_ONLY */ + +void cpu_x86_cpuid(CPUX86State *env, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ +/* test if maximum index reached */ +if (index & 0x8000) { +if (index > env->cpuid_xlevel) +index = env->cpuid_level; +} else { +if (index > env->cpuid_level) +index = env->cpuid_level; +} + +switch(index) { +case 0: +*eax = env->cpuid_level; +*ebx = env->cpuid_vendor1; +*edx = env->cpuid_vendor2; +*ecx = env->cpuid_vendor3; +break; +case 1: +*eax = env->cpuid_version; +*ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */ +*ecx = env->cpuid_ext_features; +*edx = env->cpuid_features; +break; +case 2: +/* cache info: needed for Pentium Pro compatibility */ +*eax = 1; +*ebx = 0; +*ecx = 0; +*edx = 0x2c307d; +break; +case 4: +/* cache info: needed for Core compatibility */ +switch (*ecx) { +case 0: /* L1 dcache info */ +*eax = 0x121; +*ebx = 0x1c0003f; +*ecx = 0x03f; +*edx = 0x001; +break; +case 1: /* L1 icache info */ +*eax = 0x122; +*ebx = 0x1c0003f; +*ecx = 0x03f; +*edx = 0x001; +break; +case 2: /* L2 cache info */ +*eax = 0x143; +*ebx = 0x3c0003f; +*ecx = 0xfff; +*edx = 0x001; +break; +default: /* end of info */ +*eax = 0; +*ebx = 0; +*ecx = 0; +*edx = 0; +break; +} + +break; +case 5: +/* mwait info: needed for Core compatibility */ +*eax = 0; /* Smallest monitor-line size in bytes */ +*ebx = 0; /* Largest monitor-line size in bytes */ +*ecx = CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +*edx = 0; +break; +case 6: +/* Thermal and Power Leaf */ +*eax = 0; +*ebx = 0; +*ecx = 0; +*edx = 0; +break; +case 9: +/* Direct Cache Access Information Leaf */ +*eax = 0; /* Bits 0-31 in DCA_CAP MSR */ +*ebx = 0; +*ecx = 0; +*edx = 0; +break; +case 0xA: +/* Architectural Performance Monitoring Leaf */ +*eax = 0; +*ebx = 0; +*ecx = 0; +*edx = 0; +break; +case 0x8000: +*eax = env->cpuid_xlevel; +*ebx = env->cpuid_vendor1; +*edx = env->cpuid_vendor2; +*ecx = env->cpuid_vendor3; +break; +case 0x8001: +*eax = env->cpuid_features; +*ebx = 0; +*ecx = env->cpuid_ext3_features; +*edx = env->cpuid_ext2_features; +break; +case 0x8002: +case 0x8003: +case 0x8004: +*eax = env->cpuid_model[(index - 0x8002) * 4 + 0]; +*ebx = env->cpuid_model[(index - 0x8002) * 4 + 1]; +*ecx = env->cpuid_model[(index - 0x8002) * 4 + 2]; +*edx = env->cpuid_model[(index - 0x8002) * 4 + 3]; +break; +case 0x8005: +/* cache info (L1 cache) */ +*eax = 0x01ff01ff; +*ebx = 0x01ff01ff; +*ecx = 0x40020140; +*edx = 0x40020140; +break; +case 0x8006: +/* cache info (L2 cache) */ +*eax = 0; +*ebx = 0x42004200; +*ecx = 0x02008140; +*edx = 0; +break; +case 0x8008: +/* virtual & phys address size in low 2 bytes. */ +/* XXX: This value must match the one used in the MMU code. */ +if
[PATCH 1/3] Add additional CPU flag definitions
Some x86 CPU definitions that KVM needs Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 3c11e0f..b1678ef 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -159,9 +159,11 @@ #define HF_MP_MASK (1 << HF_MP_SHIFT) #define HF_EM_MASK (1 << HF_EM_SHIFT) #define HF_TS_MASK (1 << HF_TS_SHIFT) +#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT) #define HF_LMA_MASK (1 << HF_LMA_SHIFT) #define HF_CS64_MASK (1 << HF_CS64_SHIFT) #define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT) +#define HF_VM_MASK (1 << HF_VM_SHIFT) #define HF_SMM_MASK (1 << HF_SMM_SHIFT) #define HF_SVME_MASK (1 << HF_SVME_SHIFT) #define HF_SVMI_MASK (1 << HF_SVMI_SHIFT) @@ -178,6 +180,9 @@ #define HF2_NMI_MASK (1 << HF2_NMI_SHIFT) #define HF2_VINTR_MASK(1 << HF2_VINTR_SHIFT) +#define CR0_PE_SHIFT 0 +#define CR0_MP_SHIFT 1 + #define CR0_PE_MASK (1 << 0) #define CR0_MP_MASK (1 << 1) #define CR0_EM_MASK (1 << 2) @@ -196,7 +201,8 @@ #define CR4_PAE_MASK (1 << 5) #define CR4_PGE_MASK (1 << 7) #define CR4_PCE_MASK (1 << 8) -#define CR4_OSFXSR_MASK (1 << 9) +#define CR4_OSFXSR_SHIFT 9 +#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT) #define CR4_OSXMMEXCPT_MASK (1 << 10) #define PG_PRESENT_BIT 0 @@ -229,6 +235,7 @@ #define PG_ERROR_RSVD_MASK 0x08 #define PG_ERROR_I_D_MASK 0x10 +#define MSR_IA32_TSC0x10 #define MSR_IA32_APICBASE 0x1b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE(1<<11) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] kvm: disable virtualization on kdump
On Mon, Oct 27, 2008 at 10:32:43AM -0700, Eric W. Biederman wrote: > Avi Kivity <[EMAIL PROTECTED]> writes: > > > > I wouldn't mind notifiers (with a nice comment explaining that you must know > > what you're doing, though that's the case with most kernel APIs). I'm fine > > with > > either approach. > > This is the 3rd request I have seen for a notifier. This is the first > request I have seen for code that must be executed in the kexec on > panic path. So history suggest to me that notifiers make it > unreasonably easy to get code onto the kexec on panic code path. > > Occasionally the kexec on panic code path is tested to see how > well it works in strange situations like being called from > a stack overflow etc. > > The rest of the history is that previous attempts like lkcd > had very programmer friendly interfaces, that worked fine > in test environments giving beautiful core dumps, but when things > broke in the field they were essentially useless. The kdump > approach is still not completely reliable but it does work > well enough that people get useful crash dumps sometimes. > > I feel anything that makes the kexec on panic code path harder > to verify it will work when things are crazy broken, like > a notifier is something we should avoid. I am still wondering if a simple function pointer (instead of a full notifier interface) would be good enough. It looks like a reasonable tradeoff. I think I will get flamed if I try to pull to the core a bunch of code that always lived in the KVM module. 8) And even if we pull those functions to the core, we will still have a function pointer on the new code anyway, because we would need to support vmx and svm. -- Eduardo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: RFC: VMX: initialize TSC offset relative to vm creation time
Marcelo Tosatti wrote: > On Sat, Sep 13, 2008 at 07:55:02AM +0300, Avi Kivity wrote: >> Marcelo Tosatti wrote: >>> VMX initializes the TSC offset for each vcpu at different times, and >>> also reinitializes it for vcpus other than 0 on APIC SIPI message. >>> >>> This bug causes the TSC's to appear unsynchronized in the guest, even if >>> the host is good. >>> >>> Older Linux kernels don't handle the situation very well, so >>> gettimeofday is likely to go backwards in time: >>> >>> http://www.mail-archive.com/kvm@vger.kernel.org/msg02955.html >>> http://sourceforge.net/tracker/index.php?func=detail&aid=2025534&group_id=180599&atid=893831 >>> >>> Fix it by initializating the offset of each vcpu relative to vm creation >>> time, and moving it from vmx_vcpu_reset to vmx_vcpu_setup, out of the >>> APIC MP init path. >>> >>> >>> >> This is good in principle, but we need to detect if we're on a multiple >> board host (or a host with unsynced tscs) and do something else in that >> case. > > I think this is a separate, and difficult, problem. For instance older > Linux guests that correct the TSC across CPU's are broken at the moment > in the unsynced TSC case. > > That is, the fact that KVM does not handle unsynced TSC's on the host is > not an argument against this patch which clearly fixes a bug. > > Take commit 019960ae9933161c2809fa4ee608ba30d9639fd2 for example. > Has anything changed "recently" with the TSC code? Recently here being the past 2 months since you first crafted the patch. I ask because in the past few runs based on kvm.git trees (e.g., as recently as a pull on 10/26), this tsc offset patch no longer fixes the problem. The following one does fix the problem with kvm.git pulled on 10/26/08: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64e2439..d5da717 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -860,7 +860,7 @@ static void guest_write_tsc(u64 guest_tsc) u64 host_tsc; rdtscll(host_tsc); - vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); + vmcs_write64(TSC_OFFSET, 0); } /* This is the vmx counterpart (or at least to my understanding) to a suggestion Ben had for the svm code. david -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Add VMRUN handler v5
On 20/10/08 19:04 +0200, Alexander Graf wrote: > +static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) > +{ > + nsvm_printk("VMrun\n"); > + if (nested_svm_check_permissions(svm)) > + return 1; > + > + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; > + skip_emulated_instruction(&svm->vcpu); > + > + if (nested_svm_do(svm, svm->vmcb->save.rax, 0, > + NULL, nested_svm_vmrun)) > + return 1; > + > + if (nested_svm_do(svm, svm->vmcb->control.msrpm_base_pa, 0, > + NULL, nested_svm_vmrun_msrpm)) > + return 1; > + > + return 1; > +} A nitpick, but you could remove the last if() statement and one of the last two return statements. Unless you forsee more calls to nested_svm_do() in here. Mike -- Mike Day http://www.ncultra.org AIM: ncmikeday | Yahoo IM: ultra.runner PGP key: http://www.ncultra.org/ncmike/pubkey.asc -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
On Tue, 2008-10-28 at 12:06 +0200, [EMAIL PROTECTED] wrote: ... > +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, > + uint8_t r_dev, uint8_t r_func) > +{ > +char dir[128], name[128]; > +int fd, r = 0; > +FILE *f; > +unsigned long long start, end, size, flags; > +PCIRegion *rp; > +PCIDevRegions *dev = &pci_dev->real_device; > + > +dev->region_number = 0; > + > +snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/:%02x:%02x.%x/", > + r_bus, r_dev, r_func); > + > +snprintf(name, sizeof(name), "%sconfig", dir); > + > +fd = open(name, O_RDWR); > +if (fd == -1) { > +fprintf(stderr, "%s: %s: %m\n", __func__, name); > +return 1; > +} > +dev->config_fd = fd; > +again: > +r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config)); > +if (r < 0) { > +if (errno == EINTR || errno == EAGAIN) > +goto again; > +fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno); > +} > + > +snprintf(name, sizeof(name), "%sresource", dir); > + > +f = fopen(name, "r"); > +if (f == NULL) { > +fprintf(stderr, "%s: %s: %m\n", __func__, name); > +return 1; > +} > +r = -1; > +while (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3) { > +r++; > +rp = dev->regions + r; Could, in theory, overflow dev->regions here. Suggest: +for (r = 0; r < MAX_IO_REGIONS; r++) { +if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3) +break; > +rp->valid = 0; > +size = end - start + 1; > +flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; > +if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) > +continue; > +if (flags & IORESOURCE_MEM) { > +flags &= ~IORESOURCE_IO; > + snprintf(name, sizeof(name), "%sresource%d", dir, r); > +fd = open(name, O_RDWR); > +if (fd == -1) > +continue; /* probably ROM */ > +rp->resource_fd = fd; > +} else > +flags &= ~IORESOURCE_PREFETCH; > + > +rp->type = flags; > +rp->valid = 1; > +rp->base_addr = start; > +rp->size = size; > +DEBUG("region %d size %d start 0x%x type %d resource_fd %d\n", > + r, rp->size, start, rp->type, rp->resource_fd); > +} > +fclose(f); > + > +dev->region_number = r; > +return 0; > +} > + > +static int disable_iommu; Why is this global? The flag is set per-device on the command-line and only affects whether we pass KVM_DEV_ASSIGN_ENABLE_IOMMU to kvm_assign_pci_device() > +int nr_assigned_devices; > +static LIST_HEAD(, AssignedDevInfo) adev_head; > + > +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) > +{ > +return (uint32_t)bus << 8 | (uint32_t)devfn; > +} > + > +static AssignedDevice *register_real_device(PCIBus *e_bus, > +const char *e_dev_name, > +int e_devfn, uint8_t r_bus, > +uint8_t r_dev, uint8_t r_func) > +{ > +int r; > +AssignedDevice *pci_dev; > +uint8_t e_device, e_intx; > + > +DEBUG("Registering real physical device %s (devfn=0x%x)\n", > + e_dev_name, e_devfn); > + > +pci_dev = (AssignedDevice *) > +pci_register_device(e_bus, e_dev_name, sizeof(AssignedDevice), > +e_devfn, assigned_dev_pci_read_config, > +assigned_dev_pci_write_config); > +if (NULL == pci_dev) { > +fprintf(stderr, "%s: Error: Couldn't register real device %s\n", > +__func__, e_dev_name); > +return NULL; > +} > +if (get_real_device(pci_dev, r_bus, r_dev, r_func)) { > +fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n", > +__func__, e_dev_name); > +goto out; > +} > + > +/* handle real device's MMIO/PIO BARs */ > +if (assigned_dev_register_regions(pci_dev->real_device.regions, > + pci_dev->real_device.region_number, > + pci_dev)) > +goto out; > + > +/* handle interrupt routing */ > +e_device = (pci_dev->dev.devfn >> 3) & 0x1f; > +e_intx = pci_dev->dev.config[0x3d] - 1; > +pci_dev->intpin = e_intx; > +pci_dev->run = 0; > +pci_dev->girq = 0; > +pci_dev->h_busnr = r_bus; > +pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func); > + > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > +if (kvm_enabled()) { > +struct kvm_assigned_pci_dev assigned_dev_data; > + > +memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > +assigned_dev_data.assigned_dev_id = > +calc_assigned_dev_id(pci_dev->h_busnr, > + (uint32_t)pci_dev->h_devfn); > +
Re: [PATCH 3/6] qemu: piix: Introduce functions to get pin number from irq and vice versa
On Tue, Oct 28, 2008 at 06:21:35PM +0200, Avi Kivity wrote: > Muli Ben-Yehuda wrote: > > >>> Well, what is this needed for in the first place? >>> >> >> This specific function is not used. I assume Amit added it for >> completeness with piix_get_irq. piix_get_irq, as far as I can tell, is >> used in only one place (when the guest updates a device's >> configuration space interrupt register) to go from interrupt pin >> (intx) to guest IRQ line. >> > > In that case, a solution suggests itself... Yes, of course! I don't know how I missed it! Err... What is it? Seriously, I removed piix3_get_pin as soon as I noticed it wasn't actually used, but I am not convinced that there are no aliasing issues remaining with piix_get_irq---most likely because I do not understand PCI interrupt routing to any sufficient degree. Do you see problems remaining with pixx_get_irq? Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch] v4 - fold struct vcpu_info into CPUState
Hi, Here's an updated version of the patch. It should fix the problems Hollis ran into, and also compile on x86_64 again :-) I managed to get rid of all the runtime use of qemu_kvm_cpu_env(), except for the hotplug code. But I think it's reasonable to do the walk of the linked list in that case. However, the more I have looked at this, the more obvious to me it becomes that it is right to expose struct CPUState to libkvm, and avoid passing around int vcpu. Comments and test reports very welcome! Cheers, Jes Merge vcpu_info into CPUState. Moves definition of vcpu related structs to new header qemu-kvm-vcpu.h and declares this struct in i386/ia64/ppc CPUState structs if USE_KVM is defined. In addition conver qemu-kvm.c to pull vcpu_info out of CPUState. This eliminates ugly static sized array of struct vcpu_info. Signed-off-by: Jes Sorensen <[EMAIL PROTECTED]> --- libkvm/kvm-common.h |8 +- libkvm/libkvm.c | 28 libkvm/libkvm.h | 10 +-- qemu/hw/acpi.c | 18 + qemu/qemu-kvm-ia64.c|4 - qemu/qemu-kvm-powerpc.c |5 - qemu/qemu-kvm-vcpu.h| 34 ++ qemu/qemu-kvm-x86.c | 11 +-- qemu/qemu-kvm.c | 151 ++-- qemu/qemu-kvm.h |6 - qemu/target-i386/cpu.h |4 + qemu/target-ia64/cpu.h |5 + qemu/target-ppc/cpu.h |5 + 13 files changed, 172 insertions(+), 117 deletions(-) Index: kvm-userspace.git/libkvm/kvm-common.h === --- kvm-userspace.git.orig/libkvm/kvm-common.h +++ kvm-userspace.git/libkvm/kvm-common.h @@ -84,11 +84,11 @@ void kvm_show_code(kvm_context_t kvm, int vcpu); int handle_halt(kvm_context_t kvm, int vcpu); -int handle_shutdown(kvm_context_t kvm, int vcpu); -void post_kvm_run(kvm_context_t kvm, int vcpu); -int pre_kvm_run(kvm_context_t kvm, int vcpu); +int handle_shutdown(kvm_context_t kvm, void *env); +void post_kvm_run(kvm_context_t kvm, void *env); +int pre_kvm_run(kvm_context_t kvm, void *env); int handle_io_window(kvm_context_t kvm); -int handle_debug(kvm_context_t kvm, int vcpu); +int handle_debug(kvm_context_t kvm, void *env); int try_push_interrupts(kvm_context_t kvm); #endif Index: kvm-userspace.git/libkvm/libkvm.c === --- kvm-userspace.git.orig/libkvm/libkvm.c +++ kvm-userspace.git/libkvm/libkvm.c @@ -738,9 +738,9 @@ return 0; } -int handle_debug(kvm_context_t kvm, int vcpu) +int handle_debug(kvm_context_t kvm, void *env) { - return kvm->callbacks->debug(kvm->opaque, vcpu); + return kvm->callbacks->debug(kvm->opaque, env); } int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs) @@ -822,9 +822,9 @@ return kvm->callbacks->halt(kvm->opaque, vcpu); } -int handle_shutdown(kvm_context_t kvm, int vcpu) +int handle_shutdown(kvm_context_t kvm, void *env) { - return kvm->callbacks->shutdown(kvm->opaque, vcpu); + return kvm->callbacks->shutdown(kvm->opaque, env); } int try_push_interrupts(kvm_context_t kvm) @@ -837,14 +837,14 @@ return kvm->callbacks->try_push_nmi(kvm->opaque); } -void post_kvm_run(kvm_context_t kvm, int vcpu) +void post_kvm_run(kvm_context_t kvm, void *env) { - kvm->callbacks->post_kvm_run(kvm->opaque, vcpu); + kvm->callbacks->post_kvm_run(kvm->opaque, env); } -int pre_kvm_run(kvm_context_t kvm, int vcpu) +int pre_kvm_run(kvm_context_t kvm, void *env) { - return kvm->callbacks->pre_kvm_run(kvm->opaque, vcpu); + return kvm->callbacks->pre_kvm_run(kvm->opaque, env); } int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu) @@ -872,7 +872,7 @@ #endif } -int kvm_run(kvm_context_t kvm, int vcpu) +int kvm_run(kvm_context_t kvm, int vcpu, void *env) { int r; int fd = kvm->vcpu_fd[vcpu]; @@ -886,19 +886,19 @@ if (!kvm->irqchip_in_kernel) run->request_interrupt_window = try_push_interrupts(kvm); #endif - r = pre_kvm_run(kvm, vcpu); + r = pre_kvm_run(kvm, env); if (r) return r; r = ioctl(fd, KVM_RUN, 0); if (r == -1 && errno != EINTR && errno != EAGAIN) { r = -errno; - post_kvm_run(kvm, vcpu); + post_kvm_run(kvm, env); fprintf(stderr, "kvm_run: %s\n", strerror(-r)); return r; } - post_kvm_run(kvm, vcpu); + post_kvm_run(kvm, env); #if defined(KVM_CAP_COALESCED_MMIO) if (kvm->coalesced_mmio) { @@ -948,7 +948,7 @@ r = handle_io(kvm, run, vcpu); break; case KVM_EXIT_DEBUG: - r = handle_debug(kvm, vcpu); + r = handle_debug(kvm, env); break; case KVM_EXIT_MMIO: r = handle_mmio(kvm, run); @@ -962,7 +962,7 @@ #endif
Re: [PATCH 3/6] qemu: piix: Introduce functions to get pin number from irq and vice versa
Muli Ben-Yehuda wrote: Well, what is this needed for in the first place? This specific function is not used. I assume Amit added it for completeness with piix_get_irq. piix_get_irq, as far as I can tell, is used in only one place (when the guest updates a device's configuration space interrupt register) to go from interrupt pin (intx) to guest IRQ line. In that case, a solution suggests itself... -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [PATCH] qemu: ppc: xer access prototypes no more used & implemented
From: Christian Ehrhardt <[EMAIL PROTECTED]> Revision 5500 of the qemu repository removed all code using ppc_load_xer & ppc_store_xer as well as their implementation. Another patch fixes it's usage in kvm-userspace for powerpc, but I think that header can now be cleaned up, therefore this patch to qemu-devel. Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] cpu.h |2 -- 1 file changed, 2 deletions(-) [diff] diff --git a/qemu/target-ppc/cpu.h b/qemu/target-ppc/cpu.h --- a/qemu/target-ppc/cpu.h +++ b/qemu/target-ppc/cpu.h @@ -725,8 +725,6 @@ #endif void do_store_sr (CPUPPCState *env, int srnum, target_ulong value); #endif /* !defined(CONFIG_USER_ONLY) */ -target_ulong ppc_load_xer (CPUPPCState *env); -void ppc_store_xer (CPUPPCState *env, target_ulong value); void ppc_store_msr (CPUPPCState *env, target_ulong value); void cpu_ppc_reset (void *opaque); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05 of 10] [PATCH] libcflat: ppc: add timebase accessor
Provide a timebase accessor for ppc testcases. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> [diffstat] config-powerpc-44x.mak |3 ++- test/lib/powerpc/44x/timebase.S | 28 test/lib/powerpc/44x/timebase.h | 25 + 3 files changed, 55 insertions(+), 1 deletion(-) [diff] diff --git a/user/config-powerpc-44x.mak b/user/config-powerpc-44x.mak --- a/user/config-powerpc-44x.mak +++ b/user/config-powerpc-44x.mak @@ -5,7 +5,8 @@ cflatobjs += \ test/lib/powerpc/44x/map.o \ - test/lib/powerpc/44x/tlbwe.o + test/lib/powerpc/44x/tlbwe.o \ + test/lib/powerpc/44x/timebase.o simpletests += \ test/powerpc/44x/tlbsx.bin \ diff --git a/user/test/lib/powerpc/44x/timebase.S b/user/test/lib/powerpc/44x/timebase.S new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/timebase.S @@ -0,0 +1,28 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +/* unsigned long long mftb(void); */ +.global mftb +mftb: + mftbu r5 + mftbl r4 + mftbu r3 + cmpwr3, r5 + bne mftb + blr diff --git a/user/test/lib/powerpc/44x/timebase.h b/user/test/lib/powerpc/44x/timebase.h new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/timebase.h @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +#ifndef __TIMEBASE_H__ +#define __TIMEBASE_H__ + +unsigned long long mftb(void); + +#endif /* __TIMEBASE_H__ */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04 of 10] [PATCH] user: ppc: implement PowerPC 44x libcflat
From: Hollis Blanchard <[EMAIL PROTECTED]> - Create a 44x-specific makefile. - Reorganize PowerPC makefiles to separate "simple" tests from those which link with libcflat. - Create a minimal libcflat testcase (which just exits). Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] config-powerpc-44x.mak | 14 +++ config-powerpc.mak | 46 ++ test/lib/powerpc/44x/map.c | 51 +++ test/lib/powerpc/44x/tlbwe.S | 29 test/lib/powerpc/io.c| 35 + test/powerpc/cstart.S| 38 test/powerpc/exit.c | 23 +++ 7 files changed, 221 insertions(+), 15 deletions(-) [diff] diff --git a/user/config-powerpc-44x.mak b/user/config-powerpc-44x.mak new file mode 100644 --- /dev/null +++ b/user/config-powerpc-44x.mak @@ -0,0 +1,14 @@ + + +# for some reason binutils hates tlbsx unless we say we're 405 :( +CFLAGS += -Wa,-m405 -I test/lib/powerpc/44x + +cflatobjs += \ + test/lib/powerpc/44x/map.o \ + test/lib/powerpc/44x/tlbwe.o + +simpletests += \ + test/powerpc/44x/tlbsx.bin \ + test/powerpc/44x/tlbwe_16KB.bin \ + test/powerpc/44x/tlbwe_hole.bin \ + test/powerpc/44x/tlbwe.bin diff --git a/user/config-powerpc.mak b/user/config-powerpc.mak --- a/user/config-powerpc.mak +++ b/user/config-powerpc.mak @@ -1,26 +1,42 @@ +platform := 44x + CFLAGS += -m32 CFLAGS += -D__powerpc__ CFLAGS += -I $(KERNELDIR)/include -# for some reaons binutils hates tlbsx unless we say we're 405 :( -CFLAGS += -Wa,-mregnames,-m405 +CFLAGS += -Wa,-mregnames -I test/lib -%.bin: %.o - $(OBJCOPY) -O binary $^ $@ +cstart := test/powerpc/cstart.o -testobjs := \ - io.bin \ - spin.bin \ - sprg.bin \ - 44x/tlbsx.bin \ - 44x/tlbwe_16KB.bin \ - 44x/tlbwe_hole.bin \ - 44x/tlbwe.bin +cflatobjs += \ + test/lib/powerpc/io.o -tests := $(addprefix test/powerpc/, $(testobjs)) +$(libcflat): LDFLAGS += -nostdlib +$(libcflat): CFLAGS += -ffreestanding -all: kvmtrace kvmctl $(tests) +# these tests do not use libcflat +simpletests := \ + test/powerpc/spin.bin \ + test/powerpc/io.bin \ + test/powerpc/sprg.bin + +# theses tests use cstart.o, libcflat, and libgcc +tests := \ + test/powerpc/exit.bin + +include config-powerpc-$(platform).mak + + +all: kvmtrace kvmctl $(libcflat) $(simpletests) $(tests) + +$(simpletests): %.bin: %.o + $(CC) -nostdlib $^ -Wl,-T,flat.lds -o $@ + +$(tests): %.bin: $(cstart) %.o $(libcflat) + $(CC) -nostdlib $^ $(libgcc) -Wl,-T,flat.lds -o $@ kvmctl_objs = main-ppc.o iotable.o ../libkvm/libkvm.a arch_clean: - rm -f $(tests) + $(RM) $(simpletests) $(tests) $(cstart) + $(RM) $(patsubst %.bin, %.elf, $(simpletests) $(tests)) + $(RM) $(patsubst %.bin, %.o, $(simpletests) $(tests)) diff --git a/user/test/lib/powerpc/44x/map.c b/user/test/lib/powerpc/44x/map.c new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/map.c @@ -0,0 +1,51 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +#include "libcflat.h" + +#define TLB_SIZE 64 + +extern void tlbwe(unsigned int index, + unsigned char tid, + unsigned int word0, + unsigned int word1, + unsigned int word2); + +unsigned int next_free_index; + +#define PAGE_SHIFT 12 +#define PAGE_MASK (~((1<= TLB_SIZE) + panic("TLB overflow"); + + w0 = (vaddr & PAGE_MASK) | V; + w1 = paddr & PAGE_MASK; + w2 = 0x3; + + tlbwe(next_free_index, 0, w0, w1, w2); +} diff --git a/user/test/lib/powerpc/44x/tlbwe.S b/user/test/lib/powerpc/44x/tlbwe.S new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/tlbwe.S @@ -0,0 +1,29 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied w
[PATCH 10 of 10] [PATCH] kvm-userspace: ppc: fix initial ppc memory setup
From: Christian Ehrhardt <[EMAIL PROTECTED]> The old memory initialization code was broken for all cases not fitting in one ram stick. This patch fixes the ram_stick calculation, now sets the proper base adresses per stick and removes the old workaround. Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] ppc440.c| 12 +--- ppc440.h|8 ++-- ppc440_bamboo.c | 30 -- 3 files changed, 31 insertions(+), 19 deletions(-) [diff] diff --git a/qemu/hw/ppc440.c b/qemu/hw/ppc440.c --- a/qemu/hw/ppc440.c +++ b/qemu/hw/ppc440.c @@ -3,6 +3,7 @@ * * Copyright 2007 IBM Corporation. * Authors: Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL license version 2 or later. * @@ -24,15 +25,15 @@ void ppc440ep_init(CPUState *env, - target_phys_addr_t ram_bases[2], - target_phys_addr_t ram_sizes[2], + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS], + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS], + int nbanks, qemu_irq **picp, ppc4xx_pci_t **pcip, int do_init) { ppc4xx_mmio_t *mmio; qemu_irq *pic, *irqs; - ram_addr_t offset; ppc4xx_pci_t *pci; int i; @@ -55,10 +56,7 @@ /* SDRAM controller */ printf("trying to setup sdram controller\n"); /* XXX 440EP's ECC interrupts are on UIC1 */ - ppc405_sdram_init(env, pic[14], 2, ram_bases, ram_sizes, do_init); - offset = 0; - for (i = 0; i < 2; i++) - offset += ram_sizes[i]; + ppc405_sdram_init(env, pic[14], nbanks, ram_bases, ram_sizes, do_init); /* PCI */ pci = ppc4xx_pci_init(env, pic, diff --git a/qemu/hw/ppc440.h b/qemu/hw/ppc440.h --- a/qemu/hw/ppc440.h +++ b/qemu/hw/ppc440.h @@ -3,6 +3,7 @@ * * Copyright 2007 IBM Corporation. * Authors: Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL licence version 2 or later * @@ -20,9 +21,12 @@ #include "exec-all.h" #include "boards.h" +#define PPC440_MAX_RAM_SLOTS 4 + void ppc440ep_init(CPUState *env, - target_phys_addr_t ram_bases[2], - target_phys_addr_t ram_sizes[2], + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS], + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS], + int nbanks, qemu_irq **picp, ppc4xx_pci_t **pcip, int do_init); diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -2,7 +2,9 @@ * Qemu PowerPC 440 board emualtion * * Copyright 2007 IBM Corporation. - * Authors: Jerone Young <[EMAIL PROTECTED]> + * Authors: + * Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL license version 2 or later. * @@ -30,7 +32,8 @@ const char *cpu_model) { char *buf=NULL; - target_phys_addr_t ram_bases[4], ram_sizes[4]; + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS]; + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS]; NICInfo *nd; qemu_irq *pic; ppc4xx_pci_t *pci; @@ -46,6 +49,8 @@ int ret; int ram_stick_sizes[] = {256<<20, 128<<20, 64<<20, 32<<20, 16<<20, 8<<20 }; /* in bytes */ + int nbanks = 0; /* number of used memory banks */ + int next_bank_offset = 0; ram_addr_t tmp_ram_size; int i=0, k=0; uint32_t cpu_freq; @@ -55,15 +60,22 @@ printf("%s: START\n", __func__); /* Setup Memory */ - printf("Ram size passed is: %i MB\n", - bytes_to_mb((int)ram_size)); + if (ram_size < 8<<20) { + printf("ERROR: ram size too small (min 8mb)\n"); + exit(1); + } else + printf("Ram size passed is: %i MB\n", + bytes_to_mb((int)ram_size)); tmp_ram_size = ram_size; - for (i=0; i < (sizeof(ram_sizes)/sizeof(ram_sizes[0])); i++) { - for (k=0; k < (sizeof(ram_stick_sizes)/sizeof(ram_stick_sizes[0])); k++) { + for (i = 0; i < PPC440_MAX_RAM_SLOTS; i++) { + for (k = 0; k < (sizeof(ram_stick_sizes)/sizeof(int)); k++) { if ((tmp_ram_size/ram_stick_sizes[k]) > 0) { ram_sizes[i] = ram_stick_sizes[k]; + ram_bases[i] = next_bank_offset; + next_bank_offset += ram_stick_sizes[k]; + nbanks++; tmp_ram_size -= ram_stick_sizes[k]; break;
[PATCH 08 of 10] [PATCH] qemu: ppc: if not a uImage, try to load kernel as ELF
From: Hollis Blanchard <[EMAIL PROTECTED]> This allows qemu to load "bare metal" ELF kernels, useful for standalone benchmarks and testcases. We could/should also load the specified file as a flat binary, if both uImage and ELF loaders fail. (See hw/arm_boot.c.) Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] ppc440_bamboo.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) [diff] diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -35,8 +35,8 @@ qemu_irq *pic; ppc4xx_pci_t *pci; CPUState *env; - target_ulong ep=0; - target_ulong la=0; + uint64_t ep=0; + uint64_t la=0; int is_linux=1; /* Will assume allways is Linux for now */ target_long kernel_size=0; target_ulong initrd_base=0; @@ -97,6 +97,9 @@ /* load kernel with uboot loader */ printf("%s: load kernel\n", __func__); ret = load_uimage(kernel_filename, &ep, &la, &kernel_size, &is_linux); + if (ret < 0) + ret = load_elf(kernel_filename, 0, &ep, &la, NULL); + if (ret < 0) { fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02 of 10] [PATCH] user: ppc: fix threading bugs in main-ppc.c
From: Hollis Blanchard <[EMAIL PROTECTED]> - call io_table_register() before any vcpus have started - wait for all vcpus to exit before exiting the parent thread Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c | 32 1 file changed, 12 insertions(+), 20 deletions(-) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -51,7 +51,7 @@ struct io_table mmio_table; static int ncpus = 1; -static sem_t init_sem; +static sem_t exited_sem; static __thread int vcpu; static sigset_t kernel_sigmask; static sigset_t ipi_sigmask; @@ -220,16 +220,8 @@ asm volatile ("sync; isync"); } -static void init_vcpu(int n, unsigned long entry) +static void init_vcpu(int n) { - /* XXX must set initial TLB state and stack - struct kvm_regs regs = { - .pc = entry, - }; - - kvm_set_regs(kvm, 0, ®s); - */ - sigemptyset(&ipi_sigmask); sigaddset(&ipi_sigmask, IPI_SIGNAL); sigprocmask(SIG_UNBLOCK, &ipi_sigmask, NULL); @@ -237,7 +229,6 @@ vcpus[n].tid = gettid(); vcpu = n; kvm_set_signal_mask(kvm, n, &kernel_sigmask); - sem_post(&init_sem); } static void *do_create_vcpu(void *_n) @@ -245,8 +236,9 @@ int n = (long)_n; kvm_create_vcpu(kvm, n); - init_vcpu(n, 0x0); + init_vcpu(n); kvm_run(kvm, n); + sem_post(&exited_sem); return NULL; } @@ -368,14 +360,14 @@ len = load_file(vm_mem, argv[optind], 1); sync_caches(vm_mem, len); - sem_init(&init_sem, 0, 0); - init_vcpu(0, 0x0); - for (i = 1; i < ncpus; ++i) - start_vcpu(i); - for (i = 0; i < ncpus; ++i) - sem_wait(&init_sem); - io_table_register(&mmio_table, 0xf000, 64, mmio_handler, NULL); - return kvm_run(kvm, 0); + sem_init(&exited_sem, 0, 0); + for (i = 0; i < ncpus; ++i) + start_vcpu(i); + /* Wait for all vcpus to exit. */ + for (i = 0; i < ncpus; ++i) + sem_wait(&exited_sem); + + return 0; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
On Tue, Oct 28, 2008 at 10:45:57AM -0500, Anthony Liguori wrote: >> +ifeq ($(USE_KVM), 1) >> +OBJS+= device-assignment.o >> +endif > > I don't think you want to build this on PPC so I think you need a > stronger check. Good point. How about checking TARGET_BASE_ARCH = i386? >> +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, >> + uint32_t value) >> +{ >> +AssignedDevRegion *r_access = opaque; >> +uint32_t r_pio = guest_to_host_ioport(r_access, addr); >> + >> +DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", >> + r_pio, (int)r_access->e_physbase, >> + (unsigned long)r_access->r_virtbase, value); >> > > The format doesn't match the parameter count. Yep, already fixed. >> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, >> +uint32_t addr, uint32_t size, int >> type) >> +{ >> +AssignedDevice *r_dev = (AssignedDevice *) pci_dev; >> +AssignedDevRegion *region = &r_dev->v_addrs[region_num]; >> +uint32_t old_port = region->u.r_baseport; >> +uint32_t old_num = region->e_size; >> +int first_map = (old_num == 0); >> +struct ioperm_data data; >> +int i; >> + >> +region->e_physbase = addr; >> +region->e_size = size; >> + >> +DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n", >> + addr, region->u.r_baseport, type, size, region_num); >> + >> +memset(&data, 0, sizeof(data)); >> + >> +if (!first_map) { >> +data.start_port = old_port; >> +data.num = old_num; + data.turn_on = 0; >> + >> +for (i = 0; i < smp_cpus; ++i) >> +kvm_ioperm(qemu_kvm_cpu_env(i), &data); >> > > How does this interact with VCPU hot-plug? I have no idea. Weidong? >> +#ifdef KVM_CAP_IOMMU >> +/* We always enable the IOMMU if present >> + * (or when not disabled on the command line) >> + */ >> +r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); >> +if (r && !disable_iommu) >> +assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; >> +#endif >> +r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); >> +if (r < 0) { >> +fprintf(stderr, "Could not notify kernel about " >> +"assigned device \"%s\"\n", e_dev_name); >> +perror("register_real_device"); >> +goto out; >> +} >> +} >> > > You still succeed if KVM_CAP_DEVICE_ASSIGNMENT isn't defined? That > means a newer userspace compiled on an older kernel will silently > fail if they try to do device assignment. There's probably no > reason to build this file if KVM_CAP_DEVICE_ASSIGNMENT isn't defined > (see how the in-kernel PIT gets conditionally build depending on > whether that cap is available). Ok, I'll take a look at this. >> +#endif >> +term_printf("Registered host PCI device %02x:%02x.%1x " >> +"(\"%s\") as guest device %02x:%02x.%1x\n", >> +r_bus, r_dev, r_func, e_dev_name, >> +pci_bus_num(e_bus), e_device, r_func); >> >> > > If I read the code correctly, this term_printf() happens regardless > of whether this is being done for PCI hotplug or for command-line > assignment? That's a problem as it'll print garbage on the monitor > when you start QEMU which could break management applications. Is there a more suitable alternative or shall I just nuke it? >> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c >> index d559f0c..5fdb726 100644 >> --- a/qemu/hw/pc.c >> +++ b/qemu/hw/pc.c >> @@ -33,6 +33,7 @@ >> #include "boards.h" >> #include "console.h" >> #include "fw_cfg.h" >> +#include "device-assignment.h" >> #include "qemu-kvm.h" >> @@ -1157,6 +1158,21 @@ static void pc_init1(ram_addr_t ram_size, int >> vga_ram_size, >> if (pci_enabled) >> virtio_balloon_init(pci_bus); >> + >> +if (kvm_enabled() && device_assignment_enabled) { >> +int i; >> > > Stray tab. Grrr. Silly emacs. > >> +for (i = 0; i < assigned_devices_index; i++) { >> +if (add_assigned_device(assigned_devices[i]) < 0) { >> +fprintf(stderr, "Warning: could not add assigned device >> %s\n", >> +assigned_devices[i]); >> +} >> +} >> + >> +if (init_all_assigned_devices(pci_bus)) { >> +fprintf(stderr, "Failed to initialize assigned devices\n"); >> +exit (1); >> +} >> +} >> } >> +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__) >> +case QEMU_OPTION_pcidevice: >> +device_assignment_enabled = 1; >> +if (assigned_devices_index >= MAX_DEV_ASSIGN_CMDLINE) { >> +fprintf(stderr, "Too many assigned devices\n"); >> +exit(1); >> +} >> +assigned_devices[assigned_devices_index] = optarg; >> +assigned_devices_index++; >> +break; >> >
[PATCH 07 of 10] [PATCH] qemu: ppc: define maximum SMP limit as 1 for Bamboo
From: Christian Ehrhardt <[EMAIL PROTECTED]> Fix for qemu runtime error. Full error message: Number of SMP cpus requested (1), exceeds max cpus supported by machine `bamboo' (0) Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> --- [diffstat] ppc440_bamboo.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) [diff] diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -202,7 +202,8 @@ } QEMUMachine bamboo_machine = { - "bamboo", - "bamboo", - bamboo_init, + .name = "bamboo", + .desc = "bamboo", + .init = bamboo_init, + .max_cpus = 1, }; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06 of 10] [PATCH] user: ppc: add stub nmi handler
From: Hollis Blanchard <[EMAIL PROTECTED]> Adding a nmi stub handler for user/main-ppc.c Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c |6 ++ 1 file changed, 6 insertions(+) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -83,6 +83,11 @@ } static int test_try_push_interrupts(void *opaque) +{ + return 0; +} + +static int test_try_push_nmi(void *opaque) { return 0; } @@ -175,6 +180,7 @@ .halt= test_halt, .io_window = test_io_window, .try_push_interrupts = test_try_push_interrupts, + .try_push_nmi = test_try_push_nmi, .post_kvm_run = test_post_kvm_run, .pre_kvm_run = test_pre_kvm_run, .powerpc_dcr_read = test_dcr_read, -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03 of 10] [PATCH] user: ppc: better error reporting in load_file
From: Hollis Blanchard <[EMAIL PROTECTED]> Fancy description. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -183,7 +183,7 @@ static unsigned long load_file(void *mem, const char *fname, int inval_icache) { - int r; + ssize_t r; int fd; unsigned long bytes = 0; @@ -200,6 +200,7 @@ if (r == -1) { perror("read"); + printf("read %d bytes\n", bytes); exit(1); } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09 of 10] [PATCH] kvm: external module: Treat NONARCH_CONFIG as a list
From: Hollis Blanchard <[EMAIL PROTECTED] As discussed on the list the unifdef changes break powerpc (and more ?). A fix is to treat NONARCH_CONFIG as a list instead of a single item. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] Makefile |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) [diff] diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -25,8 +25,9 @@ gawk -v version=$(version) -f $(ARCH_DIR)/hack-module.awk $1.orig \ | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig +unifdef_uflags = $(foreach arch, $(NONARCH_CONFIG), -UCONFIG_$(arch)) unifdef = mv $1 $1.orig && \ - unifdef -DCONFIG_$(ARCH_CONFIG) -UCONFIG_$(NONARCH_CONFIG) $1.orig > $1; \ + unifdef -DCONFIG_$(ARCH_CONFIG) $(unifdef_uflags) $1.orig > $1; \ [ $$? -le 2 ] && rm $1.orig hack = $(call _hack,$T/$(strip $1)) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00 of 10] kvm-userspace: ppc: userspace fixes for powerpc
From: Christian Ehrhardt <[EMAIL PROTECTED]> This is a set of various fixes in kvm-userspace for powerpc. This time without the split between user/* and the rest and without the qemu patch (sent separate to qemu-devel now). Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> b/kernel/Makefile |3 + b/qemu/hw/ppc440.c | 12 +++ b/qemu/hw/ppc440.h |8 +++-- b/qemu/hw/ppc440_bamboo.c |7 ++-- b/qemu/qemu-kvm-powerpc.c |4 +- b/user/config-powerpc-44x.mak | 14 + b/user/config-powerpc.mak | 46 - b/user/main-ppc.c | 32 +++- b/user/test/lib/powerpc/44x/map.c | 51 + b/user/test/lib/powerpc/44x/timebase.S | 28 ++ b/user/test/lib/powerpc/44x/timebase.h | 25 b/user/test/lib/powerpc/44x/tlbwe.S| 29 ++ b/user/test/lib/powerpc/io.c | 35 ++ b/user/test/powerpc/cstart.S | 38 b/user/test/powerpc/exit.c | 23 ++ qemu/hw/ppc440_bamboo.c| 36 +++ user/config-powerpc-44x.mak|3 + user/main-ppc.c|9 + 18 files changed, 339 insertions(+), 64 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01 of 10] [PATCH] kvm-userspace: powerpc: fix env->xer access
From: Christian Ehrhardt <[EMAIL PROTECTED]> Since qemu revision 5500 which was merged with the last qemu merge env->xer is accessed directly. Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] qemu-kvm-powerpc.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) [diff] diff --git a/qemu/qemu-kvm-powerpc.c b/qemu/qemu-kvm-powerpc.c --- a/qemu/qemu-kvm-powerpc.c +++ b/qemu/qemu-kvm-powerpc.c @@ -57,7 +57,7 @@ regs.ctr = env->ctr; regs.lr = env->lr; -regs.xer = ppc_load_xer(env); +regs.xer = env->xer; regs.msr = env->msr; regs.srr0 = env->spr[SPR_SRR0]; @@ -93,7 +93,7 @@ env->ctr =regs.ctr; env->lr = regs.lr; -ppc_store_xer(env,regs.xer); +env->xer = regs.xer; env->msr = regs.msr; /* calculate hflags based on the current msr using the ppc qemu helper */ hreg_compute_hflags(env); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
On Tue, Oct 28, 2008 at 11:36:10PM +0800, Han, Weidong wrote: > > diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c > > index c5f3f29..5e66832 100644 > > --- a/qemu/qemu-kvm.c > > +++ b/qemu/qemu-kvm.c > > @@ -20,6 +20,7 @@ int kvm_pit = 1; > > #include "console.h" > > #include "block.h" > > #include "compatfd.h" > > +#include "hw/device-assignment.h" > > It's not necessary. Indeed, left overs from my ioperm bits. Removed. > > #include "qemu-kvm.h" > > #include > > @@ -27,6 +28,7 @@ int kvm_pit = 1; > > #include > > #include > > #include > > +#include > > It's not necessary. This one is needed on my compile system for the ioperm() declaration. Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
[EMAIL PROTECTED] wrote: Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/Makefile.target|3 + qemu/hw/device-assignment.c | 641 +++ qemu/hw/device-assignment.h | 117 qemu/hw/pc.c| 16 + qemu/hw/pci.c |7 + qemu/qemu-kvm.c | 14 + qemu/qemu-kvm.h |8 + qemu/vl.c | 28 ++ 8 files changed, 834 insertions(+), 0 deletions(-) create mode 100644 qemu/hw/device-assignment.c create mode 100644 qemu/hw/device-assignment.h diff --git a/qemu/Makefile.target b/qemu/Makefile.target index d9bdeca..5d44e08 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o +ifeq ($(USE_KVM), 1) +OBJS+= device-assignment.o +endif I don't think you want to build this on PPC so I think you need a stronger check. +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, + uint32_t value) +{ +AssignedDevRegion *r_access = opaque; +uint32_t r_pio = guest_to_host_ioport(r_access, addr); + +DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->r_virtbase, value); The format doesn't match the parameter count. +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, + uint32_t e_phys, uint32_t e_size, int type) +{ +AssignedDevice *r_dev = (AssignedDevice *) pci_dev; +AssignedDevRegion *region = &r_dev->v_addrs[region_num]; +uint32_t old_ephys = region->e_physbase; +uint32_t old_esize = region->e_size; +int first_map = (region->e_size == 0); +int ret = 0; + +DEBUG("e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d \n", + e_phys, (uint32_t)region->r_virtbase, type, e_size, region_num); + +region->e_physbase = e_phys; +region->e_size = e_size; + +if (!first_map) + kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize); + +if (e_size > 0) + ret = kvm_register_phys_mem(kvm_context, e_phys, +region->u.r_virtbase, e_size, 0); +if (ret != 0) { + fprintf(stderr, "%s: Error: create new mapping failed\n", __func__); + exit(1); +} +} + +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, +uint32_t addr, uint32_t size, int type) +{ +AssignedDevice *r_dev = (AssignedDevice *) pci_dev; +AssignedDevRegion *region = &r_dev->v_addrs[region_num]; +uint32_t old_port = region->u.r_baseport; +uint32_t old_num = region->e_size; +int first_map = (old_num == 0); +struct ioperm_data data; +int i; + +region->e_physbase = addr; +region->e_size = size; + +DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n", + addr, region->u.r_baseport, type, size, region_num); + +memset(&data, 0, sizeof(data)); + +if (!first_map) { + data.start_port = old_port; + data.num = old_num; + data.turn_on = 0; + + for (i = 0; i < smp_cpus; ++i) + kvm_ioperm(qemu_kvm_cpu_env(i), &data); How does this interact with VCPU hot-plug? +} + +data.start_port = region->u.r_baseport; +data.num = size; +data.turn_on = 1; + +for (i = 0; i < smp_cpus; ++i) + kvm_ioperm(qemu_kvm_cpu_env(i), &data); + +register_ioport_read(addr, size, 1, assigned_dev_ioport_readb, + (r_dev->v_addrs + region_num)); +register_ioport_read(addr, size, 2, assigned_dev_ioport_readw, + (r_dev->v_addrs + region_num)); +register_ioport_read(addr, size, 4, assigned_dev_ioport_readl, + (r_dev->v_addrs + region_num)); +register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb, + (r_dev->v_addrs + region_num)); +register_ioport_write(addr, size, 2, assigned_dev_ioport_writew, + (r_dev->v_addrs + region_num)); +register_ioport_write(addr, size, 4, assigned_dev_ioport_writel, + (r_dev->v_addrs + region_num)); +} + +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, + uint32_t val, int len) +{ +int fd; +ssize_t ret; + +DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + (uint16_t) address, val, len); + +if (address == 0x4) { +pci_default_write_config(d, address, val, len); +/* Continue to program the card */ +} + +if ((addre
Re: [PATCH 3/6] qemu: piix: Introduce functions to get pin number from irq and vice versa
On Tue, Oct 28, 2008 at 12:46:39PM +0200, Avi Kivity wrote: > Muli Ben-Yehuda wrote: >> On Sun, Oct 26, 2008 at 03:31:24PM +0200, Avi Kivity wrote: >> >>> Amit Shah wrote: >>> +int piix3_get_pin(int pic_irq) +{ +int i; +for (i = 0; i < 4; i++) +if (piix3_dev->config[0x60+i] == pic_irq) +return i; +return -1; +} >>> What happens if two pci interrupts are routed to one irq line? >>> >> >> This one I'm still thinking about. >> > > Well, what is this needed for in the first place? This specific function is not used. I assume Amit added it for completeness with piix_get_irq. piix_get_irq, as far as I can tell, is used in only one place (when the guest updates a device's configuration space interrupt register) to go from interrupt pin (intx) to guest IRQ line. Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
On Tue, Oct 28, 2008 at 10:10:07PM +0800, Han, Weidong wrote: > > +DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", > > + r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > should be (unsigned long)r_access->u.r_virtbase Thanks, actually it should be u.r_baseport for IO ports and there were a number of other bogosities there too. Here's a quick incremental patch compiled with DEBUG() enabled. >From 9b917528647b55a1046a5a19d9e2427bb2d86db7 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda <[EMAIL PROTECTED]> Date: Tue, 28 Oct 2008 17:30:30 +0200 Subject: [PATCH 1/1] fix DEBUG statements (thanks to Weidong Han for spotting) Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/hw/device-assignment.c | 32 1 files changed, 16 insertions(+), 16 deletions(-) diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c index 89b05f9..8b56599 100644 --- a/qemu/hw/device-assignment.c +++ b/qemu/hw/device-assignment.c @@ -63,9 +63,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, AssignedDevRegion *r_access = opaque; uint32_t r_pio = guest_to_host_ioport(r_access, addr); -DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", +DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); + (unsigned long)r_access->u.r_baseport, value); outb(value, r_pio); } @@ -76,9 +76,9 @@ static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, AssignedDevRegion *r_access = opaque; uint32_t r_pio = guest_to_host_ioport(r_access, addr); -DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", - __func__, r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); +DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); outw(value, r_pio); } @@ -89,9 +89,9 @@ static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, AssignedDevRegion *r_access = opaque; uint32_t r_pio = guest_to_host_ioport(r_access, addr); -DEBUG("%s: r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", +DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); + (unsigned long)r_access->u.r_baseport, value); outl(value, r_pio); } @@ -104,9 +104,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) value = inb(r_pio); -DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", +DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); + (unsigned long)r_access->u.r_baseport, value); return value; } @@ -119,9 +119,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr) value = inw(r_pio); -DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", +DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); + (unsigned long)r_access->u.r_baseport, value); return value; } @@ -134,9 +134,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr) value = inl(r_pio); -DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", +DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->r_virtbase, value); + (unsigned long)r_access->u.r_baseport, value); return value; } @@ -151,8 +151,8 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, int first_map = (region->e_size == 0); int ret = 0; -DEBUG("e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d \n", - e_phys, (uint32_t)region->r_virtbase, type, e_size, region_num); +DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", + e_phys, region->u.r_virtbase, type, e_size, region_num); region->e_physbase = e_phys; region->e_size = e_size; @@ -425,7 +425,7 @@ again: rp->valid = 1; rp->base_addr = start; rp->size = size; -DEBUG("region %d size %d start 0x%x type %d resource_fd %d\n", +DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n", r, rp->size, start, rp->type, rp->resource_fd); } fclose(f); -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http:/
RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
[EMAIL PROTECTED] wrote: > From: Muli Ben-Yehuda <[EMAIL PROTECTED]> > > This patch has been contributed to by the following people: > > Or Sagi <[EMAIL PROTECTED]> > Nir Peleg <[EMAIL PROTECTED]> > Amit Shah <[EMAIL PROTECTED]> > Ben-Ami Yassour <[EMAIL PROTECTED]> > Weidong Han <[EMAIL PROTECTED]> > Glauber de Oliveira Costa <[EMAIL PROTECTED]> > Muli Ben-Yehuda <[EMAIL PROTECTED]> > > With this patch, we can assign a device on the host machine to a > guest. > > A new command-line option, -pcidevice is added. > To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this: > > -pcidevice host=04:08.0 > > * The host driver for the device, if any, is to be removed before > assigning the device (else device assignment will fail). > > * A device that shares IRQ with another host device cannot currently > be assigned. > > * The RAW_IO capability is needed for this to work > > This works only with the in-kernel irqchip method; to use the > userspace irqchip, a kernel module (irqhook) and some extra changes > are needed. > > [muli: lots of small fixes from Muli and Weidong Han addressing all v7 > review comments] > > Signed-off-by: Amit Shah <[EMAIL PROTECTED]> > Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> > --- > qemu/Makefile.target|3 + > qemu/hw/device-assignment.c | 641 > +++ > qemu/hw/device-assignment.h | 117 qemu/hw/pc.c > | 16 + qemu/hw/pci.c |7 + > qemu/qemu-kvm.c | 14 + > qemu/qemu-kvm.h |8 + > qemu/vl.c | 28 ++ > 8 files changed, 834 insertions(+), 0 deletions(-) > create mode 100644 qemu/hw/device-assignment.c > create mode 100644 qemu/hw/device-assignment.h > > diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c > index c5f3f29..5e66832 100644 > --- a/qemu/qemu-kvm.c > +++ b/qemu/qemu-kvm.c > @@ -20,6 +20,7 @@ int kvm_pit = 1; > #include "console.h" > #include "block.h" > #include "compatfd.h" > +#include "hw/device-assignment.h" It's not necessary. > > #include "qemu-kvm.h" > #include > @@ -27,6 +28,7 @@ int kvm_pit = 1; > #include > #include > #include > +#include It's not necessary. Regards, Weidong -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
Han, Weidong wrote: > [EMAIL PROTECTED] wrote: >> From: Muli Ben-Yehuda <[EMAIL PROTECTED]> >> >> This patch has been contributed to by the following people: >> >> Or Sagi <[EMAIL PROTECTED]> >> Nir Peleg <[EMAIL PROTECTED]> >> Amit Shah <[EMAIL PROTECTED]> >> Ben-Ami Yassour <[EMAIL PROTECTED]> >> Weidong Han <[EMAIL PROTECTED]> >> Glauber de Oliveira Costa <[EMAIL PROTECTED]> >> Muli Ben-Yehuda <[EMAIL PROTECTED]> >> >> With this patch, we can assign a device on the host machine to a >> guest. >> >> A new command-line option, -pcidevice is added. >> To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use >> this: >> >> -pcidevice host=04:08.0 >> >> * The host driver for the device, if any, is to be removed before >> assigning the device (else device assignment will fail). >> >> * A device that shares IRQ with another host device cannot currently >> be assigned. >> >> * The RAW_IO capability is needed for this to work >> >> This works only with the in-kernel irqchip method; to use the >> userspace irqchip, a kernel module (irqhook) and some extra changes >> are needed. >> >> [muli: lots of small fixes from Muli and Weidong Han addressing all >> v7 review comments] >> >> Signed-off-by: Amit Shah <[EMAIL PROTECTED]> >> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> >> --- >> qemu/Makefile.target|3 + >> qemu/hw/device-assignment.c | 641 >> +++ >> qemu/hw/device-assignment.h | 117 qemu/hw/pc.c >> | 16 + qemu/hw/pci.c |7 + >> qemu/qemu-kvm.c | 14 + >> qemu/qemu-kvm.h |8 + >> qemu/vl.c | 28 ++ >> 8 files changed, 834 insertions(+), 0 deletions(-) >> create mode 100644 qemu/hw/device-assignment.c >> create mode 100644 qemu/hw/device-assignment.h >> >> diff --git a/qemu/Makefile.target b/qemu/Makefile.target >> index d9bdeca..5d44e08 100644 >> --- a/qemu/Makefile.target >> +++ b/qemu/Makefile.target >> @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) >> dma.o OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o >> pc.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o >> OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o +ifeq >> ($(USE_KVM), 1) +OBJS+= device-assignment.o >> +endif >> ifeq ($(USE_KVM_PIT), 1) >> OBJS+= i8254-kvm.o >> endif >> diff --git a/qemu/hw/device-assignment.c >> b/qemu/hw/device-assignment.c new file mode 100644 index >> 000..89b05f9 --- /dev/null >> +++ b/qemu/hw/device-assignment.c >> @@ -0,0 +1,641 @@ >> +/* >> + * Copyright (c) 2007, Neocleus Corporation. >> + * >> + * This program is free software; you can redistribute it and/or >> modify it + * under the terms and conditions of the GNU General >> Public License, + * version 2, as published by the Free Software >> Foundation. + * + * This program is distributed in the hope it will >> be useful, but WITHOUT + * ANY WARRANTY; without even the implied >> warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. >> See the GNU General Public License for + * more details. >> + * >> + * You should have received a copy of the GNU General Public License >> along with + * this program; if not, write to the Free Software >> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA >> 02111-1307 USA. + * + * >> + * Assign a PCI device from the host to a guest VM. + * >> + * Adapted for KVM by Qumranet. >> + * >> + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) >> + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) >> + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) >> + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED]) >> + * Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED]) + */ >> +#include >> +#include >> +#include "qemu-kvm.h" >> +#include "hw.h" >> +#include "pc.h" >> +#include "sysemu.h" >> +#include "console.h" >> +#include "device-assignment.h" >> + >> +/* From linux/ioport.h */ >> +#define IORESOURCE_IO 0x0100 /* Resource type */ >> +#define IORESOURCE_MEM 0x0200 >> +#define IORESOURCE_IRQ 0x0400 >> +#define IORESOURCE_DMA 0x0800 >> +#define IORESOURCE_PREFETCH 0x1000 /* No side effects */ + >> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ >> + >> +#ifdef DEVICE_ASSIGNMENT_DEBUG >> +#define DEBUG(fmt, ...) \ >> +do { \ >> + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);\ + >> } while (0) +#else >> +#define DEBUG(fmt, ...) do { } while(0) >> +#endif >> + >> +static uint32_t guest_to_host_ioport(AssignedDevRegion *region, >> uint32_t addr) +{ +return region->u.r_baseport + (addr - >> region->e_physbase); +} + >> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, >> + uint32_t value) +{ >> +AssignedDevRegion *r_acces
[GIT PULL] KVM fixes for 2.6.28-rc2
Linus, please pull from the repo and branch @ git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git kvm-updates/2.6.28 To receive a bunch of kvm fixes. Most notably, fix the paravirt mmu bug that prevents booting recent Linux kernels on kvm. Shortlog, diffstat: Avi Kivity (1): KVM: Future-proof device assignment ABI Marcelo Tosatti (1): KVM: MMU: sync root on paravirt TLB flush Sheng Yang (1): KVM: Fix guest shared interrupt with in-kernel irqchip Xiantao Zhang (2): KVM: ia64: Fix halt emulation logic KVM: ia64: Makefile fix for forcing to re-generate asm-offsets.h arch/ia64/include/asm/kvm_host.h |6 ++- arch/ia64/kvm/Makefile |8 +++- arch/ia64/kvm/kvm-ia64.c | 80 +++-- arch/ia64/kvm/kvm_fw.c |9 +++- arch/ia64/kvm/process.c |2 +- arch/x86/include/asm/kvm_host.h |3 + arch/x86/kvm/i8254.c | 11 - arch/x86/kvm/i8254.h |1 + arch/x86/kvm/mmu.c |1 + arch/x86/kvm/x86.c |6 ++- include/linux/kvm.h |6 +++ include/linux/kvm_host.h |7 +++- virt/kvm/irq_comm.c | 42 ++- virt/kvm/kvm_main.c | 12 -- 14 files changed, 138 insertions(+), 56 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] qemu: piix: Introduce functions to get pin number from irq and vice versa
Muli Ben-Yehuda wrote: On Sun, Oct 26, 2008 at 03:31:24PM +0200, Avi Kivity wrote: Amit Shah wrote: +int piix3_get_pin(int pic_irq) +{ +int i; +for (i = 0; i < 4; i++) +if (piix3_dev->config[0x60+i] == pic_irq) +return i; +return -1; +} What happens if two pci interrupts are routed to one irq line? This one I'm still thinking about. Well, what is this needed for in the first place? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 5/6] device assignment: support for assigning PCI devices to guests
[EMAIL PROTECTED] wrote: > From: Muli Ben-Yehuda <[EMAIL PROTECTED]> > > This patch has been contributed to by the following people: > > Or Sagi <[EMAIL PROTECTED]> > Nir Peleg <[EMAIL PROTECTED]> > Amit Shah <[EMAIL PROTECTED]> > Ben-Ami Yassour <[EMAIL PROTECTED]> > Weidong Han <[EMAIL PROTECTED]> > Glauber de Oliveira Costa <[EMAIL PROTECTED]> > Muli Ben-Yehuda <[EMAIL PROTECTED]> > > With this patch, we can assign a device on the host machine to a > guest. > > A new command-line option, -pcidevice is added. > To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this: > > -pcidevice host=04:08.0 > > * The host driver for the device, if any, is to be removed before > assigning the device (else device assignment will fail). > > * A device that shares IRQ with another host device cannot currently > be assigned. > > * The RAW_IO capability is needed for this to work > > This works only with the in-kernel irqchip method; to use the > userspace irqchip, a kernel module (irqhook) and some extra changes > are needed. > > [muli: lots of small fixes from Muli and Weidong Han addressing all v7 > review comments] > > Signed-off-by: Amit Shah <[EMAIL PROTECTED]> > Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> > --- > qemu/Makefile.target|3 + > qemu/hw/device-assignment.c | 641 > +++ > qemu/hw/device-assignment.h | 117 qemu/hw/pc.c > | 16 + qemu/hw/pci.c |7 + > qemu/qemu-kvm.c | 14 + > qemu/qemu-kvm.h |8 + > qemu/vl.c | 28 ++ > 8 files changed, 834 insertions(+), 0 deletions(-) > create mode 100644 qemu/hw/device-assignment.c > create mode 100644 qemu/hw/device-assignment.h > > diff --git a/qemu/Makefile.target b/qemu/Makefile.target > index d9bdeca..5d44e08 100644 > --- a/qemu/Makefile.target > +++ b/qemu/Makefile.target > @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o > OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o > OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o > OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o > +ifeq ($(USE_KVM), 1) > +OBJS+= device-assignment.o > +endif > ifeq ($(USE_KVM_PIT), 1) > OBJS+= i8254-kvm.o > endif > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > new file mode 100644 > index 000..89b05f9 > --- /dev/null > +++ b/qemu/hw/device-assignment.c > @@ -0,0 +1,641 @@ > +/* > + * Copyright (c) 2007, Neocleus Corporation. > + * > + * This program is free software; you can redistribute it and/or > modify it + * under the terms and conditions of the GNU General > Public License, + * version 2, as published by the Free Software > Foundation. + * > + * This program is distributed in the hope it will be useful, but > WITHOUT + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > General Public License for + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with + * this program; if not, write to the Free Software > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA > 02111-1307 USA. + * > + * > + * Assign a PCI device from the host to a guest VM. > + * > + * Adapted for KVM by Qumranet. > + * > + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) > + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED]) > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED]) > + */ > +#include > +#include > +#include "qemu-kvm.h" > +#include "hw.h" > +#include "pc.h" > +#include "sysemu.h" > +#include "console.h" > +#include "device-assignment.h" > + > +/* From linux/ioport.h */ > +#define IORESOURCE_IO 0x0100 /* Resource type */ > +#define IORESOURCE_MEM 0x0200 > +#define IORESOURCE_IRQ 0x0400 > +#define IORESOURCE_DMA 0x0800 > +#define IORESOURCE_PREFETCH 0x1000 /* No side effects */ > + > +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ > + > +#ifdef DEVICE_ASSIGNMENT_DEBUG > +#define DEBUG(fmt, ...) \ > +do { \ > + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);\ > +} while (0) > +#else > +#define DEBUG(fmt, ...) do { } while(0) > +#endif > + > +static uint32_t guest_to_host_ioport(AssignedDevRegion *region, > uint32_t addr) +{ > +return region->u.r_baseport + (addr - region->e_physbase); > +} > + > +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, > + uint32_t value) > +{ > +AssignedDevRegion *r_access = opaque; > +uint32_t r_pio = guest_to_host_ioport(r_access, addr); > + > +DEBUG("r_pio=%08x e_physbase=%0
Re: [PATCH] x86 emulator: move skip_emulated_instruction()
Guillaume Thouvenin wrote: If we call the emulator we shouldn't call skip_emulated_instruction() in the first place, since the emulator already computes the next rip for us. Thus we move ->skip_emulated_instruction() out of kvm_emulate_pio() and into handle_io() (and the svm equivalent). We also replaced "return 0" by "break" in the "do_io:" case because now the shadow register state needs to be committed. Otherwise eip will never be updated. Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6] KVM/userspace: Device Assignment: Add ioctl wrappers needed for assigning devices
On Sun, Oct 26, 2008 at 03:29:19PM +0200, Avi Kivity wrote: > Amit Shah wrote: >> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT >> +int kvm_assign_pci_device(kvm_context_t kvm, >> + struct kvm_assigned_pci_dev *assigned_dev) >> +{ >> +return ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); >> > > Convert -1s to -errno, to avoid problems with errno being > overwritten later. Done. Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [v7] Userspace patches for PCI device assignment
On Fri, Oct 24, 2008 at 10:59:58AM -0500, Anthony Liguori wrote: > Amit Shah wrote: >> This patchset enables device assignment for KVM hosts for PCI devices. It >> uses the Intel IOMMU by default if available. >> >> Major changes since the last send in no particular order: >> - formatting changes: adhere to qemu style >> - use strncmp, strncpy etc. instead of the insecure ones >> > > FWIW, strncpy almost never does what you expect it to. snprintf() > is much nicer. Fixed all over. If you find a stray strncpy, shoot it. Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests
On Mon, Oct 27, 2008 at 02:32:48PM +0800, Han, Weidong wrote: > Yes, it's buggy. It should like: > > uint32_t old_ephys = region->e_physbase; > uint32_t old_esize = region->e_size; > > ... > > kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize); Fixed in v8. Thanks! Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] qemu: piix: Introduce functions to get pin number from irq and vice versa
On Sun, Oct 26, 2008 at 03:31:24PM +0200, Avi Kivity wrote: > Amit Shah wrote: >> +int piix3_get_pin(int pic_irq) >> +{ >> +int i; >> +for (i = 0; i < 4; i++) >> +if (piix3_dev->config[0x60+i] == pic_irq) >> +return i; >> +return -1; >> +} >> > > What happens if two pci interrupts are routed to one irq line? This one I'm still thinking about. Cheers, Muli -- The First Workshop on I/O Virtualization (WIOV '08) Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/ <-> SYSTOR 2009---The Israeli Experimental Systems Conference http://www.haifa.il.ibm.com/conferences/systor2009/ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests
On Fri, Oct 24, 2008 at 11:22:48AM -0500, Anthony Liguori wrote: > Amit Shah wrote: >> +#include >> > > Is this header really necessary? No, removed. > >> +#include "device-assignment.h" >> + >> +/* From linux/ioport.h */ >> +#define IORESOURCE_IO 0x0100 /* Resource type */ >> +#define IORESOURCE_MEM 0x0200 >> +#define IORESOURCE_IRQ 0x0400 >> +#define IORESOURCE_DMA 0x0800 >> +#define IORESOURCE_PREFETCH 0x1000 /* No side effects */ >> + >> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ >> + >> +#ifdef DEVICE_ASSIGNMENT_DEBUG >> +#define DEBUG(fmt, args...) \ >> > > Please use C99 style varidacs. Done. > >> +do { \ >> + fprintf(stderr, "%s: " fmt, __func__ , ## args);\ >> +} while (0) >> +#else >> +#define DEBUG(fmt, args...) do { } while(0) >> +#endif >> + >> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, >> + uint32_t value) >> +{ >> +AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; >> > > Cast is unnecessary. Removed. > >> +uint32_t r_pio = (unsigned long)r_access->r_virtbase >> ++ (addr - r_access->e_physbase); >> > > It would be nice to make this a function to make it more obvious that you > were translated from guest to host regions. The cast to unsigned long > should probably be target_ulong too. Done. > >> +DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x" >> + " r_virtbase=%08lx value=%08x\n", >> + __func__, r_pio, (int)r_access->e_physbase, >> + (unsigned long)r_access->r_virtbase, value); >> > > This debug statement looks wrong to me. You're passing stderr. > It's true for all of these functions. Fixed. > >> +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, >> + uint32_t e_phys, uint32_t e_size, int >> type) >> +{ >> +AssignedDevice *r_dev = (AssignedDevice *) pci_dev; >> +AssignedDevRegion *region = &r_dev->v_addrs[region_num]; >> +int first_map = (region->e_size == 0); >> +int ret = 0; >> + >> +DEBUG("%s: e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d \n", >> + __func__, e_phys, (uint32_t)region->r_virtbase, type, e_size, >> + region_num); >> > > You already have __func__ in your debug printf(). Fixed. > >> +region->e_physbase = e_phys; >> +region->e_size = e_size; >> + >> +/* FIXME: Add support for emulated MMIO for non-kvm guests */ >> +if (kvm_enabled()) { >> > > I don't think having a kvm_enabled() check here is very useful. I > think device-assignment.c should be conditional on USE_KVM, and the > only kvm_enabled() check should be when creating the initial device > assignment. Practically speaking, QEMU is never going to support > device assignment outside of the context of KVM because I strongly > doubt anything like irqhook will make it upstream. Reworked along your suggestions, please let me know if you have further comments. >> +if (!first_map) >> +kvm_destroy_phys_mem(kvm_context, e_phys, e_size); >> +if (e_size > 0) >> +ret = kvm_register_phys_mem(kvm_context, e_phys, >> +region->r_virtbase, e_size, 0); >> +if (ret != 0) >> +fprintf(stderr, "%s: Error: create new mapping failed\n", >> __func__); >> > > If we do get an error here, we shouldn't keep going. This error is > probably going to happen in practice if a guest tries to pass > through too many devices and we run out of slots. Fixed, we exit(1) now (is there a more graceful to bail out?). >> +} >> +} >> + >> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, >> +uint32_t addr, uint32_t size, int >> type) >> +{ >> +AssignedDevice *r_dev = (AssignedDevice *) pci_dev; >> +AssignedDevRegion *region = &r_dev->v_addrs[region_num]; >> +int r; >> + >> +region->e_physbase = addr; >> +region->e_size = size; >> + >> +DEBUG("%s: e_phys=0x%x r_virt=%x type=0x%x len=%d region_num=%d \n", >> + __func__, addr, (uint32_t)region->r_virtbase, type, size, >> region_num); >> > > Need to fix this DEBUG(). Fixed. > >> +r = ioperm((uint32_t)region->r_virtbase, size, 1); >> > > I don't think this is enough for KVM. This will only do the ioperm > in the thread that triggered the IO. If you have an SMP guest, > ioperm needs to be done on each VCPU's thread. Fixed. >> +if (r < 0) { >> +perror("assigned_dev_ioport_map: ioperm"); >> +return; >> +} >> > > Again, if we fail, we have to exit QEMU gracefully. Fixed. > >> +register_ioport_read(addr, size, 1, assigned_dev_ioport_readb, >> + (void *) (r_dev->v_addrs + region_num)); >> +register_ioport_read(addr, size, 2, assigned_dev_iop
[PATCH 1/6] device assignment: add ioctl wrappers
From: Amit Shah <[EMAIL PROTECTED]> [muli: return -errno instead of ioctl retval] Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- libkvm/libkvm.c | 25 + libkvm/libkvm.h | 27 +++ 2 files changed, 52 insertions(+), 0 deletions(-) diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c index 444b97f..e7dba8a 100644 --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -1112,3 +1112,28 @@ int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t siz return -ENOSYS; } +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); + if (ret < 0) + return -errno; + + return ret; +} + +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq); + if (ret < 0) + return -errno; + + return ret; +} +#endif diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h index 423ce31..53d67f2 100644 --- a/libkvm/libkvm.h +++ b/libkvm/libkvm.h @@ -686,4 +686,31 @@ int kvm_s390_interrupt(kvm_context_t kvm, int slot, int kvm_s390_set_initial_psw(kvm_context_t kvm, int slot, psw_t psw); int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr); #endif + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +/*! + * \brief Notifies host kernel about a PCI device to be assigned to a guest + * + * Used for PCI device assignment, this function notifies the host + * kernel about the assigning of the physical PCI device to a guest. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_dev Parameters, like bus, devfn number, etc + */ +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev); + +/*! + * \brief Notifies host kernel about changes to IRQ for an assigned device + * + * Used for PCI device assignment, this function notifies the host + * kernel about the changes in IRQ number for an assigned physical + * PCI device. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc + */ +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq); +#endif #endif -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] device assignment: build vtd.c for Intel IOMMU support
From: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- kernel/x86/Kbuild |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild index 2369d00..c4723b1 100644 --- a/kernel/x86/Kbuild +++ b/kernel/x86/Kbuild @@ -9,6 +9,9 @@ kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o ../anon_inodes.o irq.o i8259.o ifeq ($(EXT_CONFIG_KVM_TRACE),y) kvm-objs += kvm_trace.o endif +ifeq ($(CONFIG_DMAR),y) +kvm-objs += vtd.o +endif kvm-intel-objs := vmx.o vmx-debug.o ../external-module-compat.o kvm-amd-objs := svm.o ../external-module-compat.o -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] device assignment: introduce pci_map_irq to get irq nr from pin number
From: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/hw/pci.c |5 + qemu/hw/pci.h |1 + 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 512dbea..c82cd20 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -560,6 +560,11 @@ static void pci_set_irq(void *opaque, int irq_num, int level) bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); } +int pci_map_irq(PCIDevice *pci_dev, int pin) +{ +return pci_dev->bus->map_irq(pci_dev, pin); +} + /***/ /* monitor info on PCI */ diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index 60e4094..e11fbbf 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -81,6 +81,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num, uint32_t size, int type, PCIMapIORegionFunc *map_func); +int pci_map_irq(PCIDevice *pci_dev, int pin); uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len); void pci_default_write_config(PCIDevice *d, -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[v8] Userspace patches for PCI device assignment
[v8] Userspace patches for PCI device assignment This patchset enables device assignment for KVM hosts for PCI devices. It uses the Intel IOMMU by default if available. Changes from v7->v8 in in particular order: - various formatting fixes, DEBUG cleanups, cast removals, etc. - s/strncpy/snprintf/ - split initialization in two phases per aliguori's suggestion - bail out on errors when we can't limp on - do ioperm on every cpu and vcpu (Weidong Han) - use pwrite/pread where applicable - split r_virtbase into different fields for memory and IO - fix destruction of MMIO regions (Disheng Su and Weidong Han) Changes from v6->v7 in no particular order: - formatting changes: adhere to qemu style - use strncmp, strncpy etc. instead of the insecure ones - move from array to linked list - change iopl() to ioperm() (Weidong Han) - other small changes as suggested during the review of v6. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] device assignment: support for assigning PCI devices to guests
From: Muli Ben-Yehuda <[EMAIL PROTECTED]> This patch has been contributed to by the following people: Or Sagi <[EMAIL PROTECTED]> Nir Peleg <[EMAIL PROTECTED]> Amit Shah <[EMAIL PROTECTED]> Ben-Ami Yassour <[EMAIL PROTECTED]> Weidong Han <[EMAIL PROTECTED]> Glauber de Oliveira Costa <[EMAIL PROTECTED]> Muli Ben-Yehuda <[EMAIL PROTECTED]> With this patch, we can assign a device on the host machine to a guest. A new command-line option, -pcidevice is added. To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this: -pcidevice host=04:08.0 * The host driver for the device, if any, is to be removed before assigning the device (else device assignment will fail). * A device that shares IRQ with another host device cannot currently be assigned. * The RAW_IO capability is needed for this to work This works only with the in-kernel irqchip method; to use the userspace irqchip, a kernel module (irqhook) and some extra changes are needed. [muli: lots of small fixes from Muli and Weidong Han addressing all v7 review comments] Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/Makefile.target|3 + qemu/hw/device-assignment.c | 641 +++ qemu/hw/device-assignment.h | 117 qemu/hw/pc.c| 16 + qemu/hw/pci.c |7 + qemu/qemu-kvm.c | 14 + qemu/qemu-kvm.h |8 + qemu/vl.c | 28 ++ 8 files changed, 834 insertions(+), 0 deletions(-) create mode 100644 qemu/hw/device-assignment.c create mode 100644 qemu/hw/device-assignment.h diff --git a/qemu/Makefile.target b/qemu/Makefile.target index d9bdeca..5d44e08 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o +ifeq ($(USE_KVM), 1) +OBJS+= device-assignment.o +endif ifeq ($(USE_KVM_PIT), 1) OBJS+= i8254-kvm.o endif diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c new file mode 100644 index 000..89b05f9 --- /dev/null +++ b/qemu/hw/device-assignment.c @@ -0,0 +1,641 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * + * Assign a PCI device from the host to a guest VM. + * + * Adapted for KVM by Qumranet. + * + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED]) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED]) + */ +#include +#include +#include "qemu-kvm.h" +#include "hw.h" +#include "pc.h" +#include "sysemu.h" +#include "console.h" +#include "device-assignment.h" + +/* From linux/ioport.h */ +#define IORESOURCE_IO 0x0100 /* Resource type */ +#define IORESOURCE_MEM 0x0200 +#define IORESOURCE_IRQ 0x0400 +#define IORESOURCE_DMA 0x0800 +#define IORESOURCE_PREFETCH 0x1000 /* No side effects */ + +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, ...) \ +do { \ + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);\ +} while (0) +#else +#define DEBUG(fmt, ...) do { } while(0) +#endif + +static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr) +{ +return region->u.r_baseport + (addr - region->e_physbase); +} + +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, + uint32_t value) +{ +AssignedDevRegion *r_access = opaque; +uint32_t r_pio = guest_to_host_ioport(r_access, addr); + +DEBUG("r_pio=%08x e_physbase=%08x r_virtbase=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->r_virtbase, value); + +outb(value, r_pio); +} + +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, + uint32_t value) +{
[PATCH 6/6] device assignment: support for hot-plugging PCI devices
From: Amit Shah <[EMAIL PROTECTED]> This patch adds support for hot-plugging host PCI devices into guests Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/hw/device-hotplug.c | 21 + qemu/monitor.c |2 +- 2 files changed, 22 insertions(+), 1 deletions(-) diff --git a/qemu/hw/device-hotplug.c b/qemu/hw/device-hotplug.c index 8e2bc35..817e708 100644 --- a/qemu/hw/device-hotplug.c +++ b/qemu/hw/device-hotplug.c @@ -6,6 +6,7 @@ #include "pc.h" #include "console.h" #include "block_int.h" +#include "device-assignment.h" #define PCI_BASE_CLASS_STORAGE 0x01 #define PCI_BASE_CLASS_NETWORK 0x02 @@ -27,6 +28,24 @@ static PCIDevice *qemu_system_hot_add_nic(const char *opts, int bus_nr) return pci_nic_init (pci_bus, &nd_table[ret], -1); } +static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr) +{ +PCIBus *pci_bus; +AssignedDevInfo *adev; + +pci_bus = pci_find_bus(bus_nr); +if (!pci_bus) { +term_printf ("Can't find pci_bus %d\n", bus_nr); +return NULL; +} +adev = add_assigned_device(opts); +if (adev == NULL) { +term_printf ("Error adding device; check syntax\n"); +return NULL; +} +return init_assigned_device(adev, pci_bus); +} + static int add_init_drive(const char *opts) { int drive_opt_idx, drive_idx; @@ -143,6 +162,8 @@ void device_hot_add(int pcibus, const char *type, const char *opts) dev = qemu_system_hot_add_nic(opts, pcibus); else if (strcmp(type, "storage") == 0) dev = qemu_system_hot_add_storage(opts, pcibus); +else if (strcmp(type, "host") == 0) +dev = qemu_system_hot_assign_device(opts, pcibus); else term_printf("invalid type: %s\n", type); diff --git a/qemu/monitor.c b/qemu/monitor.c index 79b6b4c..d1043b1 100644 --- a/qemu/monitor.c +++ b/qemu/monitor.c @@ -1529,7 +1529,7 @@ static const term_cmd_t term_cmds[] = { "[,cyls=c,heads=h,secs=s[,trans=t]]\n" "[snapshot=on|off][,cache=on|off]", "add drive to PCI storage controller" }, -{ "pci_add", "iss", device_hot_add, "bus nic|storage [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]...", "hot-add PCI device" }, +{ "pci_add", "iss", device_hot_add, "bus nic|storage|host [[vlan=n][,macaddr=addr][,model=type]] [file=file][,if=type][,bus=nr]... [host=02:00.0[,name=string][,dma=none]" "hot-add PCI device" }, { "pci_del", "ii", device_hot_remove, "bus slot-number", "hot remove PCI device" }, #endif { "balloon", "i", do_balloon, -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] device assignment: introduce functions to correlate pin number and irq
From: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Amit Shah <[EMAIL PROTECTED]> Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]> --- qemu/hw/pc.h |3 +++ qemu/hw/piix_pci.c | 19 +++ 2 files changed, 22 insertions(+), 0 deletions(-) diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index 1f63678..3edf62f 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -112,6 +112,9 @@ void i440fx_init_memory_mappings(PCIDevice *d); int piix4_init(PCIBus *bus, int devfn); +int piix3_get_pin(int pic_irq); +int piix_get_irq(int pin); + /* vga.c */ enum vga_retrace_method { VGA_RETRACE_DUMB, diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c index 6fbf47b..dc12c8a 100644 --- a/qemu/hw/piix_pci.c +++ b/qemu/hw/piix_pci.c @@ -243,6 +243,25 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level) } } +int piix3_get_pin(int pic_irq) +{ +int i; +for (i = 0; i < 4; i++) +if (piix3_dev->config[0x60+i] == pic_irq) +return i; +return -1; +} + +int piix_get_irq(int pin) +{ +if (piix3_dev) +return piix3_dev->config[0x60+pin]; +if (piix4_dev) +return piix4_dev->config[0x60+pin]; + +return 0; +} + static void piix3_reset(PCIDevice *d) { uint8_t *pci_conf = d->config; -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: Fix kvm_free_physmem_slot memory leak.
[Sorry, I realized I forgot to check style, here is the fixed patch] Make sure that kvm_free_physmem_slot also frees the VM memory if it was allocated by the kernel. Signed-off-by: François Diakhaté <[EMAIL PROTECTED]> --- arch/x86/kvm/x86.c | 10 +- virt/kvm/kvm_main.c | 19 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 883c137..818220b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4179,13 +4179,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (npages && !old.rmap) { unsigned long userspace_addr; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->mm->mmap_sem); userspace_addr = do_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->mm->mmap_sem); if (IS_ERR((void *)userspace_addr)) return PTR_ERR((void *)userspace_addr); @@ -4198,10 +4198,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (!old.user_alloc && old.rmap) { int ret; - down_write(¤t->mm->mmap_sem); - ret = do_munmap(current->mm, old.userspace_addr, + down_write(&kvm->mm->mmap_sem); + ret = do_munmap(kvm->mm, old.userspace_addr, old.npages * PAGE_SIZE); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a87f45e..c7d6585 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -617,9 +617,20 @@ out: /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { + if (!dont || free->userspace_addr != dont->userspace_addr) { + struct kvm_userspace_memory_region mem = { + .slot = memslot_id(kvm, free), + .guest_phys_addr = free->base_gfn << PAGE_SHIFT, + .memory_size = 0, + .flags = 0, + }; + kvm_arch_set_memory_region(kvm, &mem, *free, free->user_alloc); + } + if (!dont || free->rmap != dont->rmap) vfree(free->rmap); @@ -640,7 +651,7 @@ void kvm_free_physmem(struct kvm *kvm) int i; for (i = 0; i < kvm->nmemslots; ++i) - kvm_free_physmem_slot(&kvm->memslots[i], NULL); + kvm_free_physmem_slot(kvm, &kvm->memslots[i], NULL); } static void kvm_destroy_vm(struct kvm *kvm) @@ -821,7 +832,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); #ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); @@ -831,7 +842,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; -- 1.6.0.3 N�r��yb�X��ǧv�^�){.n�+h����ܨ}���Ơz�&j:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf
[PATCH] x86 emulator: move skip_emulated_instruction()
If we call the emulator we shouldn't call skip_emulated_instruction() in the first place, since the emulator already computes the next rip for us. Thus we move ->skip_emulated_instruction() out of kvm_emulate_pio() and into handle_io() (and the svm equivalent). We also replaced "return 0" by "break" in the "do_io:" case because now the shadow register state needs to be committed. Otherwise eip will never be updated. Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]> --- arch/x86/kvm/svm.c |1 + arch/x86/kvm/vmx.c |1 + arch/x86/kvm/x86.c |2 -- arch/x86/kvm/x86_emulate.c |2 +- 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 743aebd..f0ad4d4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1115,6 +1115,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) rep = (io_info & SVM_IOIO_REP_MASK) != 0; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; + skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64e2439..789f819 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2687,6 +2687,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; + skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ceeac88..38f79b6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2478,8 +2478,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); - kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 57d7cc4..8f60ace 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1772,7 +1772,7 @@ special_insn: c->eip = saved_eip; goto cannot_emulate; } - return 0; + break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; break; -- 1.6.0.3.514.g2f91b -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/3] kvm-userspace: ppc: userspace fixes for powerpc
Ok I should have send these two series with some minutes in between to not intermix them :-/ Additionally I have the wrong header in this one it should be [0/5] :-/++ Overall it is a patch series of three patches for powerpc kvm-userspace, a five patch series for kvm-suerspace/user/ and a single patch I just submitted while cleaning our userspace repo to get the missing things upstream. Avi, let me know if I confused you and I'll send them once again with some time in between to order them easier. [EMAIL PROTECTED] wrote: From: Christian Ehrhardt <[EMAIL PROTECTED]> This is a set of fixes for the powerpc tests kvm-userspace/user. Patch 1&2 fix main-ppc.c while patch 3 introduces libcflat for powerpc. Further on patch 4 provides a timebase accessor for the ppc testcases (not used yet) and patch 5 finally adds a stub nmi handler to main-ppc.c. [patches in series] [PATCH 1/5] user: ppc: fix threading bugs in main-ppc.c [PATCH 2/5] user: ppc: better error reporting in load_file [PATCH 3/5] user: ppc: implement PowerPC 44x libcflat [PATCH 4/5] libcflat: ppc: add timebase accessor [PATCH 5/5] user: ppc: add stub nmi handler --- [diffstat] b/user/config-powerpc-44x.mak | 14 + b/user/config-powerpc.mak | 46 - b/user/main-ppc.c | 32 +++- b/user/test/lib/powerpc/44x/map.c | 51 + b/user/test/lib/powerpc/44x/timebase.S | 28 ++ b/user/test/lib/powerpc/44x/timebase.h | 25 b/user/test/lib/powerpc/44x/tlbwe.S| 29 ++ b/user/test/lib/powerpc/io.c | 35 ++ b/user/test/powerpc/cstart.S | 38 b/user/test/powerpc/exit.c | 23 ++ user/config-powerpc-44x.mak|3 + user/main-ppc.c|9 + 12 files changed, 296 insertions(+), 37 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- Grüsse / regards, Christian Ehrhardt IBM Linux Technology Center, Open Virtualization -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] kvm-userspace: ppc: fix initial ppc memory setup
From: Christian Ehrhardt <[EMAIL PROTECTED]> The old memory initialization code was broken for all cases not fitting in one ram stick. This patch fixes the ram_stick calculation, now sets the proper base adresses per stick and removes the old workaround. Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] ppc440.c| 12 +--- ppc440.h|8 ++-- ppc440_bamboo.c | 30 -- 3 files changed, 31 insertions(+), 19 deletions(-) [diff] diff --git a/qemu/hw/ppc440.c b/qemu/hw/ppc440.c --- a/qemu/hw/ppc440.c +++ b/qemu/hw/ppc440.c @@ -3,6 +3,7 @@ * * Copyright 2007 IBM Corporation. * Authors: Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL license version 2 or later. * @@ -24,15 +25,15 @@ void ppc440ep_init(CPUState *env, - target_phys_addr_t ram_bases[2], - target_phys_addr_t ram_sizes[2], + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS], + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS], + int nbanks, qemu_irq **picp, ppc4xx_pci_t **pcip, int do_init) { ppc4xx_mmio_t *mmio; qemu_irq *pic, *irqs; - ram_addr_t offset; ppc4xx_pci_t *pci; int i; @@ -55,10 +56,7 @@ /* SDRAM controller */ printf("trying to setup sdram controller\n"); /* XXX 440EP's ECC interrupts are on UIC1 */ - ppc405_sdram_init(env, pic[14], 2, ram_bases, ram_sizes, do_init); - offset = 0; - for (i = 0; i < 2; i++) - offset += ram_sizes[i]; + ppc405_sdram_init(env, pic[14], nbanks, ram_bases, ram_sizes, do_init); /* PCI */ pci = ppc4xx_pci_init(env, pic, diff --git a/qemu/hw/ppc440.h b/qemu/hw/ppc440.h --- a/qemu/hw/ppc440.h +++ b/qemu/hw/ppc440.h @@ -3,6 +3,7 @@ * * Copyright 2007 IBM Corporation. * Authors: Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL licence version 2 or later * @@ -20,9 +21,12 @@ #include "exec-all.h" #include "boards.h" +#define PPC440_MAX_RAM_SLOTS 4 + void ppc440ep_init(CPUState *env, - target_phys_addr_t ram_bases[2], - target_phys_addr_t ram_sizes[2], + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS], + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS], + int nbanks, qemu_irq **picp, ppc4xx_pci_t **pcip, int do_init); diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -2,7 +2,9 @@ * Qemu PowerPC 440 board emualtion * * Copyright 2007 IBM Corporation. - * Authors: Jerone Young <[EMAIL PROTECTED]> + * Authors: + * Jerone Young <[EMAIL PROTECTED]> + * Christian Ehrhardt <[EMAIL PROTECTED]> * * This work is licensed under the GNU GPL license version 2 or later. * @@ -30,7 +32,8 @@ const char *cpu_model) { char *buf=NULL; - target_phys_addr_t ram_bases[4], ram_sizes[4]; + target_phys_addr_t ram_bases[PPC440_MAX_RAM_SLOTS]; + target_phys_addr_t ram_sizes[PPC440_MAX_RAM_SLOTS]; NICInfo *nd; qemu_irq *pic; ppc4xx_pci_t *pci; @@ -46,6 +49,8 @@ int ret; int ram_stick_sizes[] = {256<<20, 128<<20, 64<<20, 32<<20, 16<<20, 8<<20 }; /* in bytes */ + int nbanks = 0; /* number of used memory banks */ + int next_bank_offset = 0; ram_addr_t tmp_ram_size; int i=0, k=0; uint32_t cpu_freq; @@ -55,15 +60,22 @@ printf("%s: START\n", __func__); /* Setup Memory */ - printf("Ram size passed is: %i MB\n", - bytes_to_mb((int)ram_size)); + if (ram_size < 8<<20) { + printf("ERROR: ram size too small (min 8mb)\n"); + exit(1); + } else + printf("Ram size passed is: %i MB\n", + bytes_to_mb((int)ram_size)); tmp_ram_size = ram_size; - for (i=0; i < (sizeof(ram_sizes)/sizeof(ram_sizes[0])); i++) { - for (k=0; k < (sizeof(ram_stick_sizes)/sizeof(ram_stick_sizes[0])); k++) { + for (i = 0; i < PPC440_MAX_RAM_SLOTS; i++) { + for (k = 0; k < (sizeof(ram_stick_sizes)/sizeof(int)); k++) { if ((tmp_ram_size/ram_stick_sizes[k]) > 0) { ram_sizes[i] = ram_stick_sizes[k]; + ram_bases[i] = next_bank_offset; + next_bank_offset += ram_stick_sizes[k]; + nbanks++; tmp_ram_size -= ram_stick_sizes[k]; break;
[PATCH 0/3] kvm-userspace: ppc: userspace fixes for powerpc
From: Christian Ehrhardt <[EMAIL PROTECTED]> This is a set of various functional fixes in kvm-userspace for powerpc. Patch 1 fullfils the requirement to provide a max smp cpu in the machine struct, without that value qemu denies to run the guest (cpu 1 > maxcpu 0) Patch 2 is a intermediate fix to allow ppc (and hopefully all others to build) until we changed the unifdef to sed'ing files as avi suggested. Until then it would be nice if that patch could fix the build issues for all of us in the unifdef style. Patch3 is a rework of the powerpc 440 guest memory initialization. I looked at it because the -m option did not work sometimes but it came up that the memory setup is broken and only running due to a workaround. qemu-devel is on cc for patch 1/3 [patches in series] [PATCH 1/3] qemu: ppc: define maximum SMP limit as 1 for Bamboo [PATCH 2/3] kvm: external module: Treat NONARCH_CONFIG as a list [PATCH 3/3] kvm-userspace: ppc: fix initial ppc memory setup --- [diffstat] b/kernel/Makefile |3 ++- b/qemu/hw/ppc440.c| 12 +--- b/qemu/hw/ppc440.h|8 ++-- b/qemu/hw/ppc440_bamboo.c |7 --- qemu/hw/ppc440_bamboo.c | 30 -- 5 files changed, 37 insertions(+), 23 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] qemu: ppc: if not a uImage, try to load kernel as ELF
From: Hollis Blanchard <[EMAIL PROTECTED]> This allows qemu to load "bare metal" ELF kernels, useful for standalone benchmarks and testcases. We could/should also load the specified file as a flat binary, if both uImage and ELF loaders fail. (See hw/arm_boot.c.) Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] ppc440_bamboo.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) [diff] diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -35,8 +35,8 @@ void bamboo_init(ram_addr_t ram_size, in qemu_irq *pic; ppc4xx_pci_t *pci; CPUState *env; - target_ulong ep=0; - target_ulong la=0; + uint64_t ep=0; + uint64_t la=0; int is_linux=1; /* Will assume allways is Linux for now */ target_long kernel_size=0; target_ulong initrd_base=0; @@ -98,6 +98,9 @@ void bamboo_init(ram_addr_t ram_size, in /* load kernel with uboot loader */ printf("%s: load kernel\n", __func__); ret = load_uimage(kernel_filename, &ep, &la, &kernel_size, &is_linux); + if (ret < 0) + ret = load_elf(kernel_filename, 0, &ep, &la, NULL); + if (ret < 0) { fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/5] user: ppc: fix threading bugs in main-ppc.c
From: Hollis Blanchard <[EMAIL PROTECTED]> - call io_table_register() before any vcpus have started - wait for all vcpus to exit before exiting the parent thread Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c | 32 1 file changed, 12 insertions(+), 20 deletions(-) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -51,7 +51,7 @@ struct io_table mmio_table; struct io_table mmio_table; static int ncpus = 1; -static sem_t init_sem; +static sem_t exited_sem; static __thread int vcpu; static sigset_t kernel_sigmask; static sigset_t ipi_sigmask; @@ -220,16 +220,8 @@ void sync_caches(void *mem, unsigned lon asm volatile ("sync; isync"); } -static void init_vcpu(int n, unsigned long entry) +static void init_vcpu(int n) { - /* XXX must set initial TLB state and stack - struct kvm_regs regs = { - .pc = entry, - }; - - kvm_set_regs(kvm, 0, ®s); - */ - sigemptyset(&ipi_sigmask); sigaddset(&ipi_sigmask, IPI_SIGNAL); sigprocmask(SIG_UNBLOCK, &ipi_sigmask, NULL); @@ -237,7 +229,6 @@ static void init_vcpu(int n, unsigned lo vcpus[n].tid = gettid(); vcpu = n; kvm_set_signal_mask(kvm, n, &kernel_sigmask); - sem_post(&init_sem); } static void *do_create_vcpu(void *_n) @@ -245,8 +236,9 @@ static void *do_create_vcpu(void *_n) int n = (long)_n; kvm_create_vcpu(kvm, n); - init_vcpu(n, 0x0); + init_vcpu(n); kvm_run(kvm, n); + sem_post(&exited_sem); return NULL; } @@ -368,14 +360,14 @@ int main(int argc, char **argv) len = load_file(vm_mem, argv[optind], 1); sync_caches(vm_mem, len); - sem_init(&init_sem, 0, 0); - init_vcpu(0, 0x0); - for (i = 1; i < ncpus; ++i) - start_vcpu(i); - for (i = 0; i < ncpus; ++i) - sem_wait(&init_sem); - io_table_register(&mmio_table, 0xf000, 64, mmio_handler, NULL); - return kvm_run(kvm, 0); + sem_init(&exited_sem, 0, 0); + for (i = 0; i < ncpus; ++i) + start_vcpu(i); + /* Wait for all vcpus to exit. */ + for (i = 0; i < ncpus; ++i) + sem_wait(&exited_sem); + + return 0; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] kvm-userspace: ppc: userspace fixes for powerpc
From: Christian Ehrhardt <[EMAIL PROTECTED]> This is a set of fixes for the powerpc tests kvm-userspace/user. Patch 1&2 fix main-ppc.c while patch 3 introduces libcflat for powerpc. Further on patch 4 provides a timebase accessor for the ppc testcases (not used yet) and patch 5 finally adds a stub nmi handler to main-ppc.c. [patches in series] [PATCH 1/5] user: ppc: fix threading bugs in main-ppc.c [PATCH 2/5] user: ppc: better error reporting in load_file [PATCH 3/5] user: ppc: implement PowerPC 44x libcflat [PATCH 4/5] libcflat: ppc: add timebase accessor [PATCH 5/5] user: ppc: add stub nmi handler --- [diffstat] b/user/config-powerpc-44x.mak | 14 + b/user/config-powerpc.mak | 46 - b/user/main-ppc.c | 32 +++- b/user/test/lib/powerpc/44x/map.c | 51 + b/user/test/lib/powerpc/44x/timebase.S | 28 ++ b/user/test/lib/powerpc/44x/timebase.h | 25 b/user/test/lib/powerpc/44x/tlbwe.S| 29 ++ b/user/test/lib/powerpc/io.c | 35 ++ b/user/test/powerpc/cstart.S | 38 b/user/test/powerpc/exit.c | 23 ++ user/config-powerpc-44x.mak|3 + user/main-ppc.c|9 + 12 files changed, 296 insertions(+), 37 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/5] libcflat: ppc: add timebase accessor
Provide a timebase accessor for ppc testcases. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> [diffstat] config-powerpc-44x.mak |3 ++- test/lib/powerpc/44x/timebase.S | 28 test/lib/powerpc/44x/timebase.h | 25 + 3 files changed, 55 insertions(+), 1 deletion(-) [diff] diff --git a/user/config-powerpc-44x.mak b/user/config-powerpc-44x.mak --- a/user/config-powerpc-44x.mak +++ b/user/config-powerpc-44x.mak @@ -5,7 +5,8 @@ cflatobjs += \ test/lib/powerpc/44x/map.o \ - test/lib/powerpc/44x/tlbwe.o + test/lib/powerpc/44x/tlbwe.o \ + test/lib/powerpc/44x/timebase.o simpletests += \ test/powerpc/44x/tlbsx.bin \ diff --git a/user/test/lib/powerpc/44x/timebase.S b/user/test/lib/powerpc/44x/timebase.S new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/timebase.S @@ -0,0 +1,28 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +/* unsigned long long mftb(void); */ +.global mftb +mftb: + mftbu r5 + mftbl r4 + mftbu r3 + cmpwr3, r5 + bne mftb + blr diff --git a/user/test/lib/powerpc/44x/timebase.h b/user/test/lib/powerpc/44x/timebase.h new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/timebase.h @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +#ifndef __TIMEBASE_H__ +#define __TIMEBASE_H__ + +unsigned long long mftb(void); + +#endif /* __TIMEBASE_H__ */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/5] user: ppc: better error reporting in load_file
From: Hollis Blanchard <[EMAIL PROTECTED]> Fancy description. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -183,7 +183,7 @@ static struct kvm_callbacks test_callbac static unsigned long load_file(void *mem, const char *fname, int inval_icache) { - int r; + ssize_t r; int fd; unsigned long bytes = 0; @@ -200,6 +200,7 @@ static unsigned long load_file(void *mem if (r == -1) { perror("read"); + printf("read %d bytes\n", bytes); exit(1); } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/5] user: ppc: add stub nmi handler
From: Hollis Blanchard <[EMAIL PROTECTED]> Adding a nmi stub handler for user/main-ppc.c. We already pushed a stub for qemu but not for the test suite in the user dir. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] main-ppc.c |6 ++ 1 file changed, 6 insertions(+) [diff] diff --git a/user/main-ppc.c b/user/main-ppc.c --- a/user/main-ppc.c +++ b/user/main-ppc.c @@ -83,6 +83,11 @@ static int test_io_window(void *opaque) } static int test_try_push_interrupts(void *opaque) +{ + return 0; +} + +static int test_try_push_nmi(void *opaque) { return 0; } @@ -175,6 +180,7 @@ static struct kvm_callbacks test_callbac .halt= test_halt, .io_window = test_io_window, .try_push_interrupts = test_try_push_interrupts, + .try_push_nmi = test_try_push_nmi, .post_kvm_run = test_post_kvm_run, .pre_kvm_run = test_pre_kvm_run, .powerpc_dcr_read = test_dcr_read, -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/5] user: ppc: implement PowerPC 44x libcflat
From: Hollis Blanchard <[EMAIL PROTECTED]> - Create a 44x-specific makefile. - Reorganize PowerPC makefiles to separate "simple" tests from those which link with libcflat. - Create a minimal libcflat testcase (which just exits). Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] config-powerpc-44x.mak | 14 +++ config-powerpc.mak | 46 ++ test/lib/powerpc/44x/map.c | 51 +++ test/lib/powerpc/44x/tlbwe.S | 29 test/lib/powerpc/io.c| 35 + test/powerpc/cstart.S| 38 test/powerpc/exit.c | 23 +++ 7 files changed, 221 insertions(+), 15 deletions(-) [diff] diff --git a/user/config-powerpc-44x.mak b/user/config-powerpc-44x.mak new file mode 100644 --- /dev/null +++ b/user/config-powerpc-44x.mak @@ -0,0 +1,14 @@ + + +# for some reason binutils hates tlbsx unless we say we're 405 :( +CFLAGS += -Wa,-m405 -I test/lib/powerpc/44x + +cflatobjs += \ + test/lib/powerpc/44x/map.o \ + test/lib/powerpc/44x/tlbwe.o + +simpletests += \ + test/powerpc/44x/tlbsx.bin \ + test/powerpc/44x/tlbwe_16KB.bin \ + test/powerpc/44x/tlbwe_hole.bin \ + test/powerpc/44x/tlbwe.bin diff --git a/user/config-powerpc.mak b/user/config-powerpc.mak --- a/user/config-powerpc.mak +++ b/user/config-powerpc.mak @@ -1,26 +1,42 @@ +platform := 44x + CFLAGS += -m32 CFLAGS += -D__powerpc__ CFLAGS += -I $(KERNELDIR)/include -# for some reaons binutils hates tlbsx unless we say we're 405 :( -CFLAGS += -Wa,-mregnames,-m405 +CFLAGS += -Wa,-mregnames -I test/lib -%.bin: %.o - $(OBJCOPY) -O binary $^ $@ +cstart := test/powerpc/cstart.o -testobjs := \ - io.bin \ - spin.bin \ - sprg.bin \ - 44x/tlbsx.bin \ - 44x/tlbwe_16KB.bin \ - 44x/tlbwe_hole.bin \ - 44x/tlbwe.bin +cflatobjs += \ + test/lib/powerpc/io.o -tests := $(addprefix test/powerpc/, $(testobjs)) +$(libcflat): LDFLAGS += -nostdlib +$(libcflat): CFLAGS += -ffreestanding -all: kvmtrace kvmctl $(tests) +# these tests do not use libcflat +simpletests := \ + test/powerpc/spin.bin \ + test/powerpc/io.bin \ + test/powerpc/sprg.bin + +# theses tests use cstart.o, libcflat, and libgcc +tests := \ + test/powerpc/exit.bin + +include config-powerpc-$(platform).mak + + +all: kvmtrace kvmctl $(libcflat) $(simpletests) $(tests) + +$(simpletests): %.bin: %.o + $(CC) -nostdlib $^ -Wl,-T,flat.lds -o $@ + +$(tests): %.bin: $(cstart) %.o $(libcflat) + $(CC) -nostdlib $^ $(libgcc) -Wl,-T,flat.lds -o $@ kvmctl_objs = main-ppc.o iotable.o ../libkvm/libkvm.a arch_clean: - rm -f $(tests) + $(RM) $(simpletests) $(tests) $(cstart) + $(RM) $(patsubst %.bin, %.elf, $(simpletests) $(tests)) + $(RM) $(patsubst %.bin, %.o, $(simpletests) $(tests)) diff --git a/user/test/lib/powerpc/44x/map.c b/user/test/lib/powerpc/44x/map.c new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/map.c @@ -0,0 +1,51 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <[EMAIL PROTECTED]> + */ + +#include "libcflat.h" + +#define TLB_SIZE 64 + +extern void tlbwe(unsigned int index, + unsigned char tid, + unsigned int word0, + unsigned int word1, + unsigned int word2); + +unsigned int next_free_index; + +#define PAGE_SHIFT 12 +#define PAGE_MASK (~((1<= TLB_SIZE) + panic("TLB overflow"); + + w0 = (vaddr & PAGE_MASK) | V; + w1 = paddr & PAGE_MASK; + w2 = 0x3; + + tlbwe(next_free_index, 0, w0, w1, w2); +} diff --git a/user/test/lib/powerpc/44x/tlbwe.S b/user/test/lib/powerpc/44x/tlbwe.S new file mode 100644 --- /dev/null +++ b/user/test/lib/powerpc/44x/tlbwe.S @@ -0,0 +1,29 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied w
[PATCH 2/3] kvm: external module: Treat NONARCH_CONFIG as a list
From: Hollis Blanchard <[EMAIL PROTECTED]> As discussed on the list the unifdef changes break powerpc (and more ?). A fix is to treat NONARCH_CONFIG as a list instead of a single item. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> --- [diffstat] Makefile |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) [diff] diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -25,8 +25,9 @@ gawk -v version=$(version) -f $(ARCH_DIR)/hack-module.awk $1.orig \ | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig +unifdef_uflags = $(foreach arch, $(NONARCH_CONFIG), -UCONFIG_$(arch)) unifdef = mv $1 $1.orig && \ - unifdef -DCONFIG_$(ARCH_CONFIG) -UCONFIG_$(NONARCH_CONFIG) $1.orig > $1; \ + unifdef -DCONFIG_$(ARCH_CONFIG) $(unifdef_uflags) $1.orig > $1; \ [ $$? -le 2 ] && rm $1.orig hack = $(call _hack,$T/$(strip $1)) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] qemu: ppc: define maximum SMP limit as 1 for Bamboo
From: Christian Ehrhardt <[EMAIL PROTECTED]> Fix for qemu runtime error. Full error message: Number of SMP cpus requested (1), exceeds max cpus supported by machine `bamboo' (0) Signed-off-by: Christian Ehrhardt <[EMAIL PROTECTED]> Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> --- [diffstat] ppc440_bamboo.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) [diff] diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -203,7 +203,8 @@ void bamboo_init(ram_addr_t ram_size, in } QEMUMachine bamboo_machine = { - "bamboo", - "bamboo", - bamboo_init, + .name = "bamboo", + .desc = "bamboo", + .init = bamboo_init, + .max_cpus = 1, }; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: Fix kvm_free_physmem_slot memory leak.
[Updated the patch taking your comments into account] Make sure that kvm_free_physmem_slot also frees the VM memory if it was allocated by the kernel. Signed-off-by: François Diakhaté <[EMAIL PROTECTED]> --- arch/x86/kvm/x86.c | 10 +- virt/kvm/kvm_main.c | 18 ++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 883c137..818220b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4179,13 +4179,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (npages && !old.rmap) { unsigned long userspace_addr; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->mm->mmap_sem); userspace_addr = do_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->mm->mmap_sem); if (IS_ERR((void *)userspace_addr)) return PTR_ERR((void *)userspace_addr); @@ -4198,10 +4198,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (!old.user_alloc && old.rmap) { int ret; - down_write(¤t->mm->mmap_sem); - ret = do_munmap(current->mm, old.userspace_addr, + down_write(&kvm->mm->mmap_sem); + ret = do_munmap(kvm->mm, old.userspace_addr, old.npages * PAGE_SIZE); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a87f45e..b420930 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -617,9 +617,19 @@ out: /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm * kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { + if(!dont || free->userspace_addr != dont->userspace_addr) { + struct kvm_userspace_memory_region mem = { + .slot = memslot_id(kvm, free), + .guest_phys_addr = free->base_gfn << PAGE_SHIFT, + .memory_size = 0, + .flags = 0, + }; + kvm_arch_set_memory_region(kvm, &mem, *free, free->user_alloc); + } + if (!dont || free->rmap != dont->rmap) vfree(free->rmap); @@ -640,7 +650,7 @@ void kvm_free_physmem(struct kvm *kvm) int i; for (i = 0; i < kvm->nmemslots; ++i) - kvm_free_physmem_slot(&kvm->memslots[i], NULL); + kvm_free_physmem_slot(kvm, &kvm->memslots[i], NULL); } static void kvm_destroy_vm(struct kvm *kvm) @@ -821,7 +831,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); #ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); @@ -831,7 +841,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; -- 1.6.0.3