Hi,

The patch have moved PIT from QEMU to kernel, which greatly increase the 
accuracy of KVM guest timer. The code mostly based on QEMU and Xen's code.

The patch works well on IA32e host(passed 2.6.22, 2.6.20, 2.6.18, 2.6.16 with 
hpet=disable, 2.6.9 with clock=pit), mostly OK on pae host(passed 2.6.18, 
2.6.9 with clock=pit). But Linux 2.6.16 guest's timer on pae host is 
inaccuracy with the patch, about 1/10 faster. This is a regression, for it's 
OK with QEMU. The same kernel on IA32e need hpet=disable to keep accuracy, 
but I can't find a parameter to make it accuracy on pae host with 2.6.16. 

The patch didn't contains save/restore part, for the function is broken in KVM 
now (I may spent some effect on this issue later). 

Any comments are welcome!

Thanks
Yang, Sheng
From 56a50952929f9a7e78fc3ec812dd4550c623b956 Mon Sep 17 00:00:00 2001
From: Sheng Yang <[EMAIL PROTECTED]>
Date: Mon, 21 Jan 2008 16:42:37 +0800
Subject: [PATCH] KVM: In-kernel PIT model


Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
---
 arch/x86/kvm/Makefile      |    3 +-
 arch/x86/kvm/i8254.c       |  589 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/i8254.h       |   60 +++++
 arch/x86/kvm/irq.c         |    3 +
 arch/x86/kvm/x86.c         |    9 +
 include/asm-x86/kvm_host.h |    1 +
 include/linux/kvm.h        |    2 +
 7 files changed, 666 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/i8254.c
 create mode 100644 arch/x86/kvm/i8254.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+	i8254.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..f5b53a5
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,589 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ *   Sheng Yang <[EMAIL PROTECTED]>
+ *   Port from QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "i8254.h"
+
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+/* #define pit_debug(fmt, arg...) */
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+	union {
+		u64 ll;
+		struct {
+#ifdef WORDS_BIGENDIAN
+			u32 high, low;
+#else
+			u32 low, high;
+#endif
+		} l;
+	} u, res;
+	u64 rl, rh;
+
+	u.ll = a;
+	rl = (u64)u.l.low * (u64)b;
+	rh = (u64)u.l.high * (u64)b;
+	rh += (rl >> 32);
+	res.l.high = div64_64(rh, c);
+	res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+	return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+	struct PITChannelState *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	switch (c->mode) {
+	default:
+	case 0:
+	case 4:
+		/* XXX: just disable/enable counting */
+		break;
+	case 1:
+	case 2:
+	case 3:
+	case 5:
+		/* Restart counting on rising edge. */
+		if (c->gate < val)
+			kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER,
+				    &c->count_load_time);
+		break;
+	}
+
+	c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+	struct PITChannelState *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+	u64 d, t;
+	int counter;
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, &t);
+	d = muldiv64(t - c->count_load_time, PIT_FREQ, cpu_khz * 1000);
+
+	switch (c->mode) {
+	case 0:
+	case 1:
+	case 4:
+	case 5:
+		counter = (c->count - d) & 0xffff;
+		break;
+	case 3:
+		/* XXX: may be incorrect for odd counts */
+		counter = c->count - (mod_64((2 * d), c->count));
+		break;
+	default:
+		counter = c->count - mod_64(d, c->count);
+		break;
+	}
+	return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+	struct PITChannelState *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+	u64 d, t;
+	int out;
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, &t);
+	d = muldiv64(t - c->count_load_time, PIT_FREQ, cpu_khz * 1000);
+
+	switch (c->mode) {
+	default:
+	case 0:
+		out = (d >= c->count);
+		break;
+	case 1:
+		out = (d < c->count);
+		break;
+	case 2:
+		out = ((mod_64(d, c->count) == 0) && (d != 0));
+		break;
+	case 3:
+		out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+		break;
+	case 4:
+	case 5:
+		out = (d == c->count);
+		break;
+	}
+
+	return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+	struct PITChannelState *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	if (!c->count_latched) {
+		c->latched_count = pit_get_count(kvm, channel);
+		c->count_latched = c->rw_mode;
+	}
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+	struct PITChannelState *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	if (!c->status_latched) {
+		/* TODO: Return NULL COUNT (bit 6). */
+		c->status = ((pit_get_out(kvm, channel) << 7) |
+				(c->rw_mode << 4) |
+				(c->mode << 1) |
+				c->bcd);
+		c->status_latched = 1;
+	}
+}
+
+int __pit_timer_fn(struct PITChannelState *pc)
+{
+	struct PITTimer *pt = &pc->pit_timer;
+	struct kvm_vcpu *vcpu0 = pc->pit_state->pit->kvm->vcpus[0];
+
+	atomic_inc(&pt->pending);
+	if (waitqueue_active(&vcpu0->wq)) {
+		vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+		wake_up_interruptible(&vcpu0->wq);
+	}
+
+	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+	pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+	return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+	struct PITChannelState *pc;
+	int restart_timer = 0;
+
+	pc = container_of(data, struct PITChannelState, pit_timer.timer);
+
+	restart_timer = __pit_timer_fn(pc);
+
+	if (restart_timer)
+		return HRTIMER_RESTART;
+	else
+		return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct PITChannelState *s)
+{
+	pit_debug("pit: execute del timer!\n");
+	hrtimer_cancel(&s->pit_timer.timer);
+}
+
+static void create_pit_timer(struct PITChannelState *pc, u32 val, int is_period)
+{
+	s64 interval;
+	ktime_t now = pc->pit_timer.timer.base->get_time();
+
+	interval = muldiv64(val, 1e9, PIT_FREQ);
+
+	pit_debug("pit: create pit timer, interval is %llu\n", interval);
+
+	/* TODO The new value only affected after the retriggered */
+	hrtimer_cancel(&pc->pit_timer.timer);
+	pc->pit_timer.period = (is_period == 0) ? 0 : interval;
+	pc->pit_timer.timer.function = pit_timer_fn;
+	atomic_set(&pc->pit_timer.pending, 0);
+
+	hrtimer_start(&pc->pit_timer.timer, ktime_add_ns(now, interval),
+		      HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+	u64 t;
+	struct PITChannelState *s =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+
+	ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	pit_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+	if (val == 0)
+		val = 0x10000;
+
+	if (vcpu == NULL) {
+		pit_debug("pit: load host tsc instead of guest\n");
+		rdtscll(t);
+	} else
+		kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, &t);
+	s->count_load_time = t;
+	s->count = val;
+
+	if (channel != 0)
+		return;
+
+	/* Two types of timer
+	 * mode 1 is one shot, mode 2 is period, otherwise del timer
+	 */
+	switch (s->mode) {
+	case 1:
+		create_pit_timer(s, val, 0);
+		break;
+	case 2:
+		create_pit_timer(s, val, 1);
+		break;
+	default:
+		destroy_pit_timer(s);
+	}
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+			     gpa_t addr, int len, const void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct PITState *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	int channel, access;
+	struct PITChannelState *s;
+	u32 val = *(u32 *) data;
+
+	val  &= 0xff;
+	addr &= 3;
+
+	mutex_lock(&pit_state->lock);
+
+	if (val != 0)
+		pit_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+			  (unsigned int)addr, len, val);
+
+	if (addr == 3) {
+		channel = val >> 6;
+		if (channel == 3) {
+			/* Read-Back Command. */
+			for (channel = 0; channel < 3; channel++) {
+				s = &pit_state->channels[channel];
+				if (val & (2 << channel)) {
+					if (!(val & 0x20))
+						pit_latch_count(kvm, channel);
+					if (!(val & 0x10))
+						pit_latch_status(kvm, channel);
+				}
+			}
+		} else {
+			/* Select Counter <channel>. */
+			s = &pit_state->channels[channel];
+			access = (val >> 4) & 3;
+			if (access == 0) {
+				pit_latch_count(kvm, channel);
+			} else {
+				s->rw_mode = access;
+				s->read_state = access;
+				s->write_state = access;
+				s->mode = (val >> 1) & 7;
+				if (s->mode > 5)
+					s->mode -= 4;
+				s->bcd = val & 1;
+			}
+		}
+	} else {
+		/* Write Count. */
+		s = &pit_state->channels[addr];
+		switch (s->write_state) {
+		default:
+		case RW_STATE_LSB:
+			pit_load_count(kvm, addr, val);
+			break;
+		case RW_STATE_MSB:
+			pit_load_count(kvm, addr, val << 8);
+			break;
+		case RW_STATE_WORD0:
+			s->write_latch = val;
+			s->write_state = RW_STATE_WORD1;
+			break;
+		case RW_STATE_WORD1:
+			pit_load_count(kvm, addr, s->write_latch | (val << 8));
+			s->write_state = RW_STATE_WORD0;
+			break;
+		}
+	}
+
+	mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+			    gpa_t addr, int len, void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct PITState *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	int ret, count;
+	struct PITChannelState *s;
+
+	addr &= 3;
+	s = &pit_state->channels[addr];
+
+	mutex_lock(&pit_state->lock);
+
+	if (s->status_latched) {
+		s->status_latched = 0;
+		ret = s->status;
+	} else if (s->count_latched) {
+		switch (s->count_latched) {
+		default:
+		case RW_STATE_LSB:
+			ret = s->latched_count & 0xff;
+			s->count_latched = 0;
+			break;
+		case RW_STATE_MSB:
+			ret = s->latched_count >> 8;
+			s->count_latched = 0;
+			break;
+		case RW_STATE_WORD0:
+			ret = s->latched_count & 0xff;
+			s->count_latched = RW_STATE_MSB;
+			break;
+		}
+	} else {
+		switch (s->read_state) {
+		default:
+		case RW_STATE_LSB:
+			count = pit_get_count(kvm, addr);
+			ret = count & 0xff;
+			break;
+		case RW_STATE_MSB:
+			count = pit_get_count(kvm, addr);
+			ret = (count >> 8) & 0xff;
+			break;
+		case RW_STATE_WORD0:
+			count = pit_get_count(kvm, addr);
+			ret = count & 0xff;
+			s->read_state = RW_STATE_WORD1;
+			break;
+		case RW_STATE_WORD1:
+			count = pit_get_count(kvm, addr);
+			ret = (count >> 8) & 0xff;
+			s->read_state = RW_STATE_WORD0;
+			break;
+		}
+	}
+
+	if (len > sizeof(ret))
+		len = sizeof(ret);
+	memcpy(data, (char *)&ret, len);
+
+	mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+	return ((addr >= PIT_BASE_ADDRESS) &&
+		(addr < PIT_BASE_ADDRESS + PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+				 gpa_t addr, int len, const void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct PITState *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	u32 val = *(u32 *) data;
+
+	mutex_lock(&pit_state->lock);
+	pit_state->speaker_data_on = (val >> 1) & 1;
+	pit_set_gate(kvm, 2, val & 1);
+	mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+				gpa_t addr, int len, void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct PITState *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+	unsigned int refresh_clock;
+	int ret;
+	u64 t;
+
+	kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, &t);
+	refresh_clock = ((unsigned int)t >> 14) & 1;
+
+	mutex_lock(&pit_state->lock);
+	ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+		(pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+	if (len > sizeof(ret))
+		len = sizeof(ret);
+	memcpy(data, (char *)&ret, len);
+	mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+	return (addr == SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+	int i;
+	struct kvm_pit *pit;
+	struct PITState *pit_state;
+	struct PITChannelState *c;
+
+	pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+	if (!pit)
+		return NULL;
+
+	mutex_init(&pit->pit_state.lock);
+	mutex_lock(&pit->pit_state.lock);
+
+	/* Initialize PIO device */
+	pit->dev.read = pit_ioport_read;
+	pit->dev.write = pit_ioport_write;
+	pit->dev.in_range = pit_in_range;
+	pit->dev.private = pit;
+	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+	pit->speaker_dev.read = speaker_ioport_read;
+	pit->speaker_dev.write = speaker_ioport_write;
+	pit->speaker_dev.in_range = speaker_in_range;
+	pit->speaker_dev.private = pit;
+	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+	kvm->arch.vpit = pit;
+	pit->kvm = kvm;
+
+	pit_state = &pit->pit_state;
+	pit_state->pit = pit;
+	hrtimer_init(&pit_state->channels[0].pit_timer.timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	atomic_set(&pit_state->channels[0].pit_timer.pending, 0);
+	for (i = 0; i < 3; i++) {
+		c = &pit_state->channels[i];
+		c->pit_state = pit_state;
+		c->mode = 0xff;
+		c->gate = (i != 2);
+		pit_load_count(kvm, i, 0);
+	}
+
+	mutex_unlock(&pit->pit_state.lock);
+
+	return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+	struct hrtimer *timer;
+
+	if (kvm->arch.vpit) {
+		mutex_lock(&kvm->arch.vpit->pit_state.lock);
+		timer = &kvm->arch.vpit->pit_state.channels[0].pit_timer.timer;
+		hrtimer_cancel(timer);
+		mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+		kfree(kvm->arch.vpit);
+	}
+}
+
+int __inject_pit_timer_irq(struct kvm *kvm)
+{
+	mutex_lock(&kvm->lock);
+	kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+	kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+	kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+	kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+	mutex_unlock(&kvm->lock);
+
+	return 1;
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+	struct PITChannelState *pc;
+
+	if (pit) {
+		pc = &pit->pit_state.channels[0];
+		if (atomic_read(&pc->pit_timer.pending))
+			__inject_pit_timer_irq(vcpu->kvm);
+	}
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+	struct PITChannelState *pc;
+
+	if (pit) {
+		pc = &pit->pit_state.channels[0];
+		if (atomic_read(&pc->pit_timer.pending) &&
+		(((vcpu->kvm->arch.vpic->pics[0].imr & 1) == 0 &&
+		  vcpu->kvm->arch.vpic->pics[0].irq_base == vec) ||
+		  vcpu->kvm->arch.vioapic->redirtbl[0].fields.vector == vec)) {
+			atomic_dec(&pc->pit_timer.pending);
+			kvm_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER,
+					&pc->count_load_time);
+		}
+	}
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..6df973b
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,60 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "irq.h"
+
+extern unsigned int cpu_khz;
+
+struct PITTimer {
+	struct hrtimer timer;
+	int irq;
+	s64 period; /* unit: ns */
+	s64 scheduled;
+	ktime_t last_update;
+	atomic_t pending;
+};
+
+struct PITChannelState {
+	int count; /* can be 65536 */
+	u16 latched_count;
+	u8 count_latched;
+	u8 status_latched;
+	u8 status;
+	u8 read_state;
+	u8 write_state;
+	u8 write_latch;
+	u8 rw_mode;
+	u8 mode;
+	u8 bcd; /* not supported */
+	u8 gate; /* timer start */
+	u64 count_load_time;
+	struct PITTimer pit_timer;
+	struct PITState *pit_state;
+};
+
+struct PITState {
+	struct PITChannelState channels[3];
+	struct mutex lock;
+	struct kvm_pit *pit;
+	u32    speaker_data_on;
+};
+
+struct kvm_pit {
+	unsigned long base_addresss;
+	struct kvm_io_device dev;
+	struct kvm_io_device speaker_dev;
+	struct kvm *kvm;
+	struct PITState pit_state;
+};
+
+#define PIT_BASE_ADDRESS	0x40
+#define SPEAKER_BASE_ADDRESS	0x61
+#define PIT_MEM_LENGTH		4
+#define PIT_FREQ		1193181
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 
 #include "irq.h"
+#include "i8254.h"
 
 /*
  * check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_apic_timer_irqs(vcpu);
+	kvm_inject_pit_timer_irqs(vcpu);
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 {
 	kvm_apic_timer_intr_post(vcpu, vec);
+	kvm_pit_timer_intr_post(vcpu, vec);
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b..0993fad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -18,6 +18,7 @@
 #include "segment_descriptor.h"
 #include "irq.h"
 #include "mmu.h"
+#include "i8254.h"
 
 #include <linux/kvm.h>
 #include <linux/fs.h>
@@ -680,6 +681,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_PIT:
 		r = 1;
 		break;
 	case KVM_CAP_VAPIC:
@@ -1432,6 +1434,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		} else
 			goto out;
 		break;
+	case KVM_CREATE_PIT:
+		r = -ENOMEM;
+		kvm->arch.vpit = kvm_create_pit(kvm);
+		if (kvm->arch.vpit)
+			r = 0;
+		break;
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -3207,6 +3215,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
 	kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d6db0de..66dbdd5 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -283,6 +283,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
+	struct kvm_pit *vpit;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4de4fd2..2f18cfe 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -232,6 +232,7 @@ struct kvm_vapic_addr {
 #define KVM_CAP_SET_TSS_ADDR 4
 #define KVM_CAP_EXT_CPUID 5
 #define KVM_CAP_VAPIC 6
+#define KVM_CAP_PIT 7
 
 /*
  * ioctls for VM fds
@@ -255,6 +256,7 @@ struct kvm_vapic_addr {
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
 #define KVM_GET_IRQCHIP		  _IOWR(KVMIO, 0x62, struct kvm_irqchip)
 #define KVM_SET_IRQCHIP		  _IOR(KVMIO,  0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
 
 /*
  * ioctls for vcpu fds
-- 
debian.1.5.3.7.1-dirty

From 5f7e9bf8856602cf8ffcb50ff744ee1d0058a850 Mon Sep 17 00:00:00 2001
From: Sheng Yang <[EMAIL PROTECTED]>
Date: Mon, 21 Jan 2008 16:41:47 +0800
Subject: [PATCH] kvm: libkvm: In-kernel PIT model


Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
---
 kernel/Kbuild       |    2 +-
 libkvm/kvm-common.h |    2 ++
 libkvm/libkvm.c     |   20 ++++++++++++++++++++
 qemu/qemu-kvm.c     |    4 ++++
 qemu/qemu-kvm.h     |    1 +
 qemu/vl.c           |    6 ++++++
 6 files changed, 34 insertions(+), 1 deletions(-)

diff --git a/kernel/Kbuild b/kernel/Kbuild
index ed02f5a..014cc17 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,7 +1,7 @@
 EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
 obj-m := kvm.o kvm-intel.o kvm-amd.o
 kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o \
-	 lapic.o ioapic.o preempt.o
+	 lapic.o ioapic.o preempt.o i8254.o
 kvm-intel-objs := vmx.o vmx-debug.o
 kvm-amd-objs := svm.o
 
diff --git a/libkvm/kvm-common.h b/libkvm/kvm-common.h
index f4040be..bd9f1de 100644
--- a/libkvm/kvm-common.h
+++ b/libkvm/kvm-common.h
@@ -47,6 +47,8 @@ struct kvm_context {
 	int no_irqchip_creation;
 	/// in-kernel irqchip status
 	int irqchip_in_kernel;
+	/// do not create in-kernel pit if set
+	int no_pit_creation;
 };
 
 void init_slots(void);
diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 45f58d6..15e7c0d 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -271,6 +271,11 @@ void kvm_disable_irqchip_creation(kvm_context_t kvm)
 	kvm->no_irqchip_creation = 1;
 }
 
+void kvm_disable_pit_creation(kvm_context_t kvm)
+{
+	kvm->no_pit_creation = 1;
+}
+
 int kvm_create_vcpu(kvm_context_t kvm, int slot)
 {
 	long mmap_size;
@@ -368,6 +373,20 @@ void kvm_create_irqchip(kvm_context_t kvm)
 #endif
 }
 
+void kvm_create_pit(kvm_context_t kvm)
+{
+	int r;
+
+	if (!kvm->no_pit_creation) {
+		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
+		if (r > 0) {
+			r = ioctl(kvm->vm_fd, KVM_CREATE_PIT);
+			if (r < 0)
+				printf("Create kernel PIC irqchip failed\n");
+		}
+	}
+}
+
 int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
 {
 	int r;
@@ -383,6 +402,7 @@ int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
 	if (r < 0)
 	        return r;
 	kvm_create_irqchip(kvm);
+	kvm_create_pit(kvm);
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index fddbbd6..a4f4761 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -10,6 +10,7 @@
 
 int kvm_allowed = KVM_ALLOWED_DEFAULT;
 int kvm_irqchip = 1;
+int kvm_pit = 1;
 
 #ifdef USE_KVM
 
@@ -556,6 +557,9 @@ int kvm_qemu_create_context(void)
     if (!kvm_irqchip) {
         kvm_disable_irqchip_creation(kvm_context);
     }
+    if (!kvm_pit) {
+        kvm_disable_pit_creation(kvm_context);
+    }
     if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
 	kvm_qemu_destroy();
 	return -1;
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index c4514bb..883a4da 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -42,6 +42,7 @@ void kvm_arch_update_regs_for_sipi(CPUState *env);
 
 extern int kvm_allowed;
 extern int kvm_irqchip;
+extern int kvm_pit;
 
 void kvm_tpr_opt_setup(CPUState *env);
 void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write);
diff --git a/qemu/vl.c b/qemu/vl.c
index 756e13d..5b76c8d 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -8015,6 +8015,7 @@ static void help(int exitcode)
 #ifdef USE_KVM
 	   "-no-kvm         disable KVM hardware virtualization\n"
 	   "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
+	   "-no-kvm-pit	    disable KVM kernel mode PIT\n"
 #endif
 #ifdef TARGET_I386
            "-std-vga        simulate a standard VGA card with VESA Bochs Extensions\n"
@@ -8131,6 +8132,7 @@ enum {
     QEMU_OPTION_no_acpi,
     QEMU_OPTION_no_kvm,
     QEMU_OPTION_no_kvm_irqchip,
+    QEMU_OPTION_no_kvm_pit,
     QEMU_OPTION_no_reboot,
     QEMU_OPTION_show_cursor,
     QEMU_OPTION_daemonize,
@@ -8213,6 +8215,7 @@ const QEMUOption qemu_options[] = {
 #ifdef USE_KVM
     { "no-kvm", 0, QEMU_OPTION_no_kvm },
     { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
+    { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit },
 #endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
     { "g", 1, QEMU_OPTION_g },
@@ -9046,6 +9049,9 @@ int main(int argc, char **argv)
 	    case QEMU_OPTION_no_kvm_irqchip:
 		kvm_irqchip = 0;
 		break;
+	    case QEMU_OPTION_no_kvm_pit:
+		kvm_pit = 0;
+		break;
 #endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
-- 
debian.1.5.3.7.1-dirty

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to