#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>

#include <sys/ioctl.h>
#include <sys/mman.h>

#include "kvm.h"

#define MEM_SIZE	(2ULL * 1024 * 1024 * 1024)
#define VCPU_NUM	6
#define MAX_VCPU	512

struct vcpu_share {
	void *share_mem;
	void *write_mem;
	u64 vcpu_num;
	u64 write_mem_size;
	long vm_state;
};

struct vcpu_info {
	long vcpu_state;
};

static struct vcpu_share vcpu_share;
static struct vcpu_info vcpu_info[MAX_VCPU];

static struct {
	char p[5 * PAGE_SIZE];
} stack[MAX_VCPU];

#define PRINT_PORT 0x9

static struct {
	char p[PAGE_SIZE];
} print_buf[MAX_VCPU];

extern char _end[];
extern char _start[];

static void vm_init_memory_layout(struct vm *vm, int vcpu_num, u64 write_mem_size)
{
	void *p = NULL;
	u64 programe_size = (long)_end;

//	mprotect(PAGE_ALIGN((long)_start) - PAGE_SIZE, PAGE_ALIGN(programe_size - (long)_start), PROT_WRITE);
//	perror("mprotect");

//	printf("programe_size:%llx, align:%llx.\n", programe_size, PAGE_ALIGN(programe_size));

	/*
	 * Mapping the programe to guest memory., so in guest, we can call the functions in this programe.
	 */
	programe_size = PAGE_ALIGN(programe_size);

	kvm_register_mem(vm, 0, 0, programe_size);

	/* Write memory. */
	dprintf("write_mem_size:%llx.\n", write_mem_size);
	write_mem_size = PAGE_ALIGN(write_mem_size);
	dprintf("write_mem_size:%llx.\n", write_mem_size);
	p = wmalloc_align(write_mem_size);

	if (madvise(p, write_mem_size, MADV_HUGEPAGE)) {
		perror("madvise");
		die("madvise");
	}

	mlock(p, write_mem_size);

	kvm_register_mem_dirty(vm, (unsigned long)p, p, write_mem_size);

	vcpu_share.vcpu_num = vcpu_num;
	vcpu_share.write_mem = p;
	vcpu_share.write_mem_size = write_mem_size;
}

#ifdef DEBUG
#define vprint(vcpu_id, fmt, args...)	do {	\
	wsprintf(print_buf[vcpu_id].p, fmt, ##args);	\
	asm volatile("outb %%al, %%dx" : : "a"(0x9), "d"(0x9));} while (0)
#else
#define vprint(vcpu_id, fmt, args...)
#endif

#define vprints(vcpu_id, s)	do {	\
	our_memcpy(print_buf[vcpu_id].p, s, PAGE_SIZE);	\
	asm volatile("outb %%al, %%dx" : : "a"(0x9), "d"(0x9));} while (0)

#define VCPU_STATE_READY	0
#define VCPU_STATE_DOEN		1
#define VM_STATE_DONE	2
#define VM_STATE_BEGIN		3

static void set_vm_state(long state)
{
	vcpu_share.vm_state = state;
	mb();
}

static void wait_vm_state(long state)
{
	while (vcpu_share.vm_state != state)
		;
	mb();
}

static void set_vcpu_state(int vcpu_num, long state)
{
	vcpu_info[vcpu_num].vcpu_state = state;
	mb();

}

static void wait_vcpu_state(int vcpu_num, long state)
{
	int i;

	for (i = 0; i < vcpu_num; i++) {
		while (vcpu_info[i].vcpu_state != state)
			;
	}

	mb();	
}

static void vcpu_write(int vcpu_id, int write)
{
	char *p = vcpu_share.write_mem + vcpu_id * PAGE_SIZE;
	char *end = vcpu_share.write_mem + vcpu_share.write_mem_size - PAGE_SIZE;
	int vcpu_num = vcpu_share.vcpu_num;

//	vprint(vcpu_id, "VCPU_NUM:%d, vcpu_id:%d end:%lx.\n", vcpu_num, vcpu_id, (unsigned long)end);

	while ((unsigned long)p < (unsigned long)end) {
	//	vprint(vcpu_id, "write:%p...\n", p);
		if (write)
			our_memcpy(p, "VCPU", 3);
		else {
			char buf[3];
			our_memcpy(buf, p, 3);
		}

		p += vcpu_num * PAGE_SIZE;
	}
}

static void vcpu_common_fun(void)
{
	int long vcpu_id;
	int i = 0;

	asm ("mov %%esi, %0" : "=rm"(vcpu_id));

	vprints(vcpu_id, "UP.\n");

	vprint(vcpu_id, "vcpu:%d up.\n", vcpu_id);

	vcpu_write(vcpu_id, 1);

	while (1) {
		set_vcpu_state(vcpu_id, VCPU_STATE_READY);
	
		wait_vm_state(VM_STATE_BEGIN);

		vprint(vcpu_id, "Write...\n");
		vcpu_write(vcpu_id, !(i & 0x1));

		set_vcpu_state(vcpu_id, VCPU_STATE_DOEN);

		wait_vm_state(VM_STATE_DONE);

		i++;
	}
	/* Access a port to casue guest exit. */
	asm volatile("outb %%al, %%dx" : : "a"(0xf), "d"(0xf));
}

static struct vm *vm;

static int kvm_io(struct vcpu *vcpu, u16 port, int direction)
{
	if (port != PRINT_PORT || direction != 1)
		return -1;

	(void)vcpu;
	dprintf("VCPU[%d]: %s", vcpu->id, print_buf[vcpu->id].p);
	return 0;
}

static void vcpu_init_stack(struct vcpu *vcpu)
{
	vcpu->stack = (void *)PAGE_ALIGN((long)stack[vcpu->id].p);

	dprintf("vcpu:%d stack:%p.\n", vcpu->id, vcpu->stack);
}

static void *vcpu_thread (void *arg)
{
	unsigned long vcpu_id = (unsigned long)arg;
	struct vcpu *vcpu;

	vcpu = create_vcpu(vm, vcpu_id);
	vcpu_init_stack(vcpu);
	vcpu_register_fun(vcpu, vcpu_common_fun);

run:
	vcpu_run(vcpu);

	dprintf("Exit code:%x.\n", vcpu->kvm_run->exit_reason);
	switch (vcpu->kvm_run->exit_reason) {
		case KVM_EXIT_IO: {
			if (!kvm_io(vcpu, vcpu->kvm_run->io.port, vcpu->kvm_run->io.direction))
					goto run;
			break;
		}

		default:
			break;
	}			

	return NULL;
}

static u64 time_ns()
{
    struct timespec ts;

    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ts.tv_sec * (u64)1000000000 + ts.tv_nsec;
}

#define log_size(s) ((s >> 12) + 7) / 8

static void get_dirty_log(struct vm *vm, void *p)
{
	struct kvm_dirty_log kdl;

	kdl.slot = vm->slot - 1;
	kdl.dirty_bitmap = p;
	if (ioctl(vm->fd, KVM_GET_DIRTY_LOG, &kdl) < 0) {
		perror("Get Dirty");
		exit(-1);
	};
}

static void usage(void)
{
	printf("Usage:\n");
	printf("-c:	the vcpu number (default: %d).\n", VCPU_NUM);
	printf("-m:	the memory size (M) (default: %lld M).\n", MEM_SIZE / 1024 /1024);
	printf("-t:	the memory write time (default: %d M).\n", 10);
}

int main(int argc, char *argv[])
{
	u64 ram_size, total_time = 0;
	long vcpu_num, count, i, opt;
	pthread_t *tinfo;
	char *endptr;
	void *p;

	(void)argc; (void)argv;

	dprintf("Start...\n");

	ram_size = MEM_SIZE;
	vcpu_num = VCPU_NUM;
	count = 10;

	while ((opt = getopt(argc, argv, "c:m:t:")) != -1) {
		switch (opt) {
		case 'c':
			vcpu_num = strtol(optarg, &endptr, 10);
			break;
		case 'm':
			ram_size = strtol(optarg, &endptr, 10);
			ram_size *= 1024 * 1024;
			break;
		case 't':
			count = strtol(optarg, &endptr, 10);
			break;
		default:
			usage();
			fmt_die("Unknown %c.\n", opt);
		}
	}

	ram_size = PAGE_ALIGN(ram_size);

	printf("VCPU:%ld, Mem:%lldM, Time:%ld.\n", vcpu_num, ram_size / 1024 / 1024, count);

	p = wmalloc(log_size(ram_size));

	tinfo = wmalloc(sizeof(pthread_t) * vcpu_num);

	vm = create_vm();

	vm_init_memory_layout(vm, vcpu_num, ram_size);

	for (i = 0; i < vcpu_num; i++)
		pthread_create(&tinfo[i], NULL, vcpu_thread, (void *)(unsigned long)i);

	i = 0;

	for (i = 0; i < count; i++) {
		u64 start, end;
	
		wait_vcpu_state(vcpu_num, VCPU_STATE_READY);
	
		start = time_ns();
//		get_dirty_log(vm, p);
		set_vm_state(VM_STATE_BEGIN);
		wait_vcpu_state(vcpu_num, VCPU_STATE_DOEN);
		end = time_ns();
		if (i & 0x1) {
			printf("The %ld Read time: %lld ns.\n", i, end - start);
			total_time += end - start;
		}
		set_vm_state(VM_STATE_DONE);
		kvm_register_mem_slot(vm, (unsigned long)vcpu_share.write_mem, vcpu_share.write_mem, vcpu_share.write_mem_size,
			0, vm->slot - 1);
	}

	printf("Run %ld times, Avg Read time:%lld ns.\n", count, total_time / (count /2));
#if 0
	for (i = 0; i < vcpu_num; i++) {
		if (pthread_join(tinfo[i], NULL) < 0)
			perror("pthread join");
//		printf("VCPU %d exit.\n", i);
	}
#endif

	return 0;
}
