On Sun, Aug 25, 2013 at 04:47:59PM +0100, Alexander Graf wrote: > > On 23.08.2013, at 21:10, Christoffer Dall wrote: > > > Save and restore the ARM KVM VGIC state from the kernel. We rely on > > QEMU to marshal the GICState data structure and therefore simply > > synchronize the kernel state with the QEMU emulated state in both > > directions. > > > > We take some care on the restore path to check the VGIC has been > > configured with enough IRQs and CPU interfaces that we can properly > > restore the state, and for separate set/clear registers we first fully > > clear the registers and then set the required bits. > > > > Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org> > > --- > > hw/intc/arm_gic_common.c | 2 + > > hw/intc/arm_gic_kvm.c | 418 > > ++++++++++++++++++++++++++++++++++++++++++- > > hw/intc/gic_internal.h | 1 + > > include/migration/vmstate.h | 6 + > > 4 files changed, 425 insertions(+), 2 deletions(-) > > > > diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c > > index a50ded2..f39bdc1 100644 > > --- a/hw/intc/arm_gic_common.c > > +++ b/hw/intc/arm_gic_common.c > > @@ -77,6 +77,8 @@ static const VMStateDescription vmstate_gic = { > > VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU), > > VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU), > > VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU), > > + VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU), > > + VMSTATE_UINT32(num_irq, GICState), > > VMSTATE_END_OF_LIST() > > } > > }; > > diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c > > index 9f0a852..9785281 100644 > > --- a/hw/intc/arm_gic_kvm.c > > +++ b/hw/intc/arm_gic_kvm.c > > @@ -23,6 +23,15 @@ > > #include "kvm_arm.h" > > #include "gic_internal.h" > > > > +//#define DEBUG_GIC_KVM > > + > > +#ifdef DEBUG_GIC_KVM > > +#define DPRINTF(fmt, ...) \ > > +do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0) > > +#else > > +#define DPRINTF(fmt, ...) do {} while(0) > > You really want to make DPRINTF still be format checked by the compiler. > Check out hw/intc/openpic.c for an example how to get there. >
good point, thanks for the pointer. > > +#endif > > + > > #define TYPE_KVM_ARM_GIC "kvm-arm-gic" > > #define KVM_ARM_GIC(obj) \ > > OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC) > > @@ -73,14 +82,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, > > int level) > > kvm_set_irq(kvm_state, kvm_irq, !!level); > > } > > > > +static bool kvm_arm_gic_can_save_restore(GICState *s) > > +{ > > + KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s); > > + return kgc->dev_fd >= 0; > > +} > > + > > +static void kvm_arm_gic_reg_access(GICState *s, int group, int offset, > > + int cpu, uint32_t *val, bool write) > > +{ > > + KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s); > > + struct kvm_device_attr attr; > > + int type; > > + > > + cpu = cpu & 0xff; > > + > > + attr.flags = 0; > > + attr.group = group; > > + attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) & > > + KVM_DEV_ARM_VGIC_CPUID_MASK) | > > + (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) & > > + KVM_DEV_ARM_VGIC_OFFSET_MASK); > > + attr.addr = (uint64_t)(long)val; > > uintptr_t? > yup > > + > > + if (write) { > > + type = KVM_SET_DEVICE_ATTR; > > + } else { > > + type = KVM_GET_DEVICE_ATTR; > > + } > > + > > + if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) { > > + fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n", > > + strerror(errno)); > > + abort(); > > + } > > +} > > + > > +static void kvm_arm_gic_dist_reg_access(GICState *s, int offset, int cpu, > > + uint32_t *val, bool write) > > +{ > > + kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, > > + offset, cpu, val, write); > > +} > > + > > +static void kvm_arm_gic_cpu_reg_access(GICState *s, int offset, int cpu, > > + uint32_t *val, bool write) > > +{ > > + kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, > > + offset, cpu, val, write); > > +} > > + > > +#define for_each_irq_reg(_ctr, _max_irq, _field_width) \ > > + for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++) > > + > > +/* > > + * Translate from the in-kernel field for an IRQ value to/from the qemu > > + * representation. > > + */ > > +typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel); > > + > > +/* synthetic translate function used for clear/set registers to completely > > + * clear a setting using a clear-register before setting the remaing bits > > + * using a set-register */ > > +static void translate_clear(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + if (to_kernel) { > > + *field = ~0; > > + } else { > > + /* does not make sense: qemu model doesn't use set/clear regs */ > > + abort(); > > + } > > I don't understand the to_kernel bits. I thought we're in a device file that > only works with KVM? > yeah, but you need to retrieve the state from the kernel on a suspend "from_kernel" and you need to save the state "to_kernel" on a resume operation. These small functions translate between a qemu-representation and a KVM representation, which allows us to do saving of the in-kernel state by reusing the qemu model of the gic. This specific little function is used for MMIO registers that have a separate register for clearing bits than setting bits, like Paolo describes. The performance of these operations are going to be vastly dominated by saving/restoring your memory and I therefore prefer keeping the interface coherent with the hardware spec, instead of rewriting things to save a few writes to the kernel. Does that answer your question? > > +} > > + > > +static void translate_enabled(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK; > > + > > + if (to_kernel) { > > + *field = GIC_TEST_ENABLED(irq, cm); > > + } else { > > + if (*field & 1) { > > + GIC_SET_ENABLED(irq, cm); > > + } > > + } > > +} > > + > > +static void translate_pending(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK; > > + > > + if (to_kernel) { > > + *field = GIC_TEST_PENDING(irq, cm); > > + } else { > > + if (*field & 1) { > > + GIC_SET_PENDING(irq, cm); > > + /* TODO: Capture is level-line is held high in the kernel */ > > + } > > + } > > +} > > + > > +static void translate_active(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK; > > + > > + if (to_kernel) { > > + *field = GIC_TEST_ACTIVE(irq, cm); > > + } else { > > + if (*field & 1) { > > + GIC_SET_ACTIVE(irq, cm); > > + } > > + } > > +} > > + > > +static void translate_trigger(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + if (to_kernel) { > > + *field = (GIC_TEST_TRIGGER(irq)) ? 0x2 : 0x0; > > + } else { > > + if (*field & 0x2) { > > + GIC_SET_TRIGGER(irq); > > + } > > + } > > +} > > + > > +static void translate_priority(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + if (to_kernel) { > > + *field = GIC_GET_PRIORITY(irq, cpu) & 0xff; > > + } else { > > + GIC_SET_PRIORITY(irq, cpu, *field & 0xff); > > + } > > +} > > + > > +static void translate_targets(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + if (to_kernel) { > > + *field = s->irq_target[irq] & 0xff; > > + } else { > > + s->irq_target[irq] = *field & 0xff; > > + } > > +} > > + > > +static void translate_sgisource(GICState *s, int irq, int cpu, > > + uint32_t *field, bool to_kernel) > > +{ > > + if (to_kernel) { > > + *field = s->sgi_source[irq][cpu] & 0xff; > > + } else { > > + s->sgi_source[irq][cpu] = *field & 0xff; > > + } > > +} > > + > > +/* Read a register group from the kernel VGIC */ > > +static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width, > > + int maxirq, vgic_translate_fn > > translate_fn) > > +{ > > + uint32_t reg; > > + int i; > > + int j; > > + int irq; > > + int cpu; > > + int regsz = 32 / width; /* irqs per kernel register */ > > + uint32_t field; > > + > > + for_each_irq_reg(i, maxirq, width) { > > + irq = i * regsz; > > + cpu = 0; > > + while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) { > > + kvm_arm_gic_dist_reg_access(s, offset, cpu, ®, false); > > + for (j = 0; j < regsz; j++) { > > + field = reg >> (j * width) & ((1 << width) - 1); > > + translate_fn(s, irq + j, cpu, &field, false); > > + } > > + > > + cpu++; > > + } > > + offset += 4; > > + } > > +} > > + > > +/* Write a register group to the kernel VGIC */ > > +static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int > > width, > > + int maxirq, vgic_translate_fn > > translate_fn) > > +{ > > + uint32_t reg; > > + int i; > > + int j; > > + int irq; > > + int cpu; > > + int regsz = 32 / width; /* irqs per kernel register */ > > + uint32_t field; > > + > > + for_each_irq_reg(i, maxirq, width) { > > + irq = i * regsz; > > + cpu = 0; > > + while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) { > > + reg = 0; > > + for (j = 0; j < regsz; j++) { > > + translate_fn(s, irq + j, cpu, &field, true); > > + reg |= (field & ((1 << width) - 1)) << (j * width); > > + } > > + kvm_arm_gic_dist_reg_access(s, offset, cpu, ®, true); > > + > > + cpu++; > > + } > > + offset += 4; > > + } > > +} > > + > > static void kvm_arm_gic_put(GICState *s) > > { > > - /* TODO: there isn't currently a kernel interface to set the GIC state > > */ > > + uint32_t reg; > > + int i; > > + int cpu; > > + int num_cpu; > > + int num_irq; > > + > > + if (!kvm_arm_gic_can_save_restore(s)) { > > + DPRINTF("Cannot put kernel gic state, no kernel interface"); > > + return; > > + } > > + > > + /* Note: We do the restore in a slightly different order than the save > > + * (where the order doesn't matter and is simply ordered according to > > the > > + * register offset values */ > > + > > + /***************************************************************** > > + * Distributor State > > + */ > > + > > + /* s->enabled -> GICD_CTLR */ > > + reg = s->enabled; > > + kvm_arm_gic_dist_reg_access(s, 0x0, 0, ®, true); > > + > > + /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */ > > + kvm_arm_gic_dist_reg_access(s, 0x4, 0, ®, false); > > + num_irq = ((reg & 0x1f) + 1) * 32; > > + num_cpu = ((reg & 0xe0) >> 5) + 1; > > + > > + if (num_irq < s->num_irq) { > > + fprintf(stderr, "Restoring %u IRQs, but kernel supports max > > %d\n", > > + s->num_irq, num_irq); > > + abort(); > > + } else if (num_cpu != s->num_cpu ) { > > + fprintf(stderr, "Restoring %u CPU interfaces, kernel only has > > %d\n", > > + s->num_cpu, num_cpu); > > + /* Did we not create the VCPUs in the kernel yet? */ > > + abort(); > > + } > > + > > + /* TODO: Consider checking compatibility with the IIDR ? */ > > + > > + /* irq_state[n].enabled -> GICD_ISENABLERn */ > > + kvm_arm_gic_dist_writer(s, 0x180, 1, s->num_irq, translate_clear); > > + kvm_arm_gic_dist_writer(s, 0x100, 1, s->num_irq, translate_enabled); > > Don't these magic numbers have #define'd equivalents in their GIC > implementation header file? Then you don't need the comments above each of > these. in the kernel yes, in qemu no. Unless I'm mistaken, in which case please point me in the right direction. I don't want to taint the git history with a rewrite of every mmio function in the tcg version or have defines for one file and not use them in the other file, which is also a bit weird. If people feel this is necessary, I can have a rewrite of the whole thing, but I prefer not to. Surprisingly. > > I also find the function names quite long, but I guess as long as everything > still fits that doesn't really matter. > I think the names are as descriptive as they should be, but we can drop some of the kvm_arm_gic prefixes if that's what you have in mind. So far, most calls and definitions fit on 80 chars width so I'm not too worried... -Christoffer