Hello,
this patch moves the execution of the guest code to ring-3.
(The patch applies on top of my 'reload PDBR' patch.)
The changes in details:
- The do_nothing task is removed from the kernel module.
Instead, it is built as standalone executable 'virtcode.bin',
which gets loaded by the user app into the guest memory.
- The kernel module, on initial switch to the monitor, starts
executing the guest code in ring-3.
- A nasty problem with the mmap() implementation appeared:
It seems that Linux doesn't like if we map normal memory
pages using remap_page_range; apparently you are only allowed
to map 'hardware' pages. While it did seem to work, what
actually happened was that remap_page_range recognized the
page as normal memory page, and decided to map in an anonymous
page instead. Thus, the guest could access the mapped memory
perfectly well --- the only problem is that is accessed completely
different pages, which were mapped in by the standard nopage
handler :-/
This patch circumvent the problem by a very ugly hack: it
simply sets the PG_reserved bit of all those pages used for
guest memory, thereby declaring them as 'hardware range' ;-)
This works fine on my system (2.0.36), but I'm not at all sure
that it will work on other versions ...
Anyway, this needs to be fixed correctly, of course. I'll be
looking into this problem. For now, we need the hack, because
otherwise the user app is unable to load the guest code in ...
Anyway, with the patch in, we're already approaching something like
the real conditions: we have separate host/monitor/guest environments,
the guest code runs inside guest physical memory in ring-3 (*unable*
to access any monitor memory!), hardware interrupts and exceptions
perform a ring transition to ring-0 monitor handlers, which switch
back to the host for interrupt reflection ... :-)
Bye,
Ulrich
diff -urN fmw-uwold/kernel/Makefile.in fmw-uw/kernel/Makefile.in
--- fmw-uwold/kernel/Makefile.in Tue Aug 24 21:04:59 1999
+++ fmw-uw/kernel/Makefile.in Wed Aug 25 04:05:46 1999
@@ -44,7 +44,7 @@
$(CC) -c $(ALL_CFLAGS) $<
-$(KERNEL_TARGET): $(HOST_O) monitor.o virtcode.o
+$(KERNEL_TARGET): $(HOST_O) monitor.o
$(LD) $(KLDFLAGS) $^ -o $@
clean:
diff -urN fmw-uwold/kernel/host-linux.c fmw-uw/kernel/host-linux.c
--- fmw-uwold/kernel/host-linux.c Tue Aug 24 21:04:59 1999
+++ fmw-uw/kernel/host-linux.c Wed Aug 25 04:11:27 1999
@@ -449,8 +449,6 @@
if (redir_cnt[i])
len += sprintf(buf+len, " 0x%2x:%10u\n", i, redir_cnt[i]);
}
- len += sprintf(buf+len, "nothing count: %u\n",
- monitor_info.nothing_count);
return(len);
}
@@ -524,8 +522,10 @@
// give back pages which were allocated for the VM environment
for (p=0; p<vm_pages.guest_n_pages; p++) {
- if (vm_pages.guest[p])
+ if (vm_pages.guest[p]) {
+ clear_bit(PG_reserved, &mem_map[MAP_NR(vm_pages.guest[p])].flags); /*
+HACK!!! */
free_page(vm_pages.guest[p]);
+ }
}
if (vm_pages.monitor_page_dir)
@@ -564,6 +564,7 @@
unalloc_vm_pages();
return (-ENOMEM);
}
+ set_bit(PG_reserved, &mem_map[MAP_NR(vm_pages.guest[p])].flags); /* HACK!!!
+*/
//printk(KERN_WARNING "freemware: free page laddr was %08x\n",
// vm_pages.guest[p]);
//printk(KERN_WARNING "freemware: free page paddr was %08lx\n",
diff -urN fmw-uwold/kernel/include/monitor.h fmw-uw/kernel/include/monitor.h
--- fmw-uwold/kernel/include/monitor.h Tue Aug 24 21:04:59 1999
+++ fmw-uw/kernel/include/monitor.h Tue Aug 24 21:28:29 1999
@@ -29,7 +29,6 @@
typedef struct {
unsigned ret_because;
unsigned vector;
- unsigned nothing_count;
} monitor_info_t;
extern monitor_info_t monitor_info;
diff -urN fmw-uwold/kernel/monitor.c fmw-uw/kernel/monitor.c
--- fmw-uwold/kernel/monitor.c Tue Aug 24 21:04:59 1999
+++ fmw-uw/kernel/monitor.c Wed Aug 25 04:04:20 1999
@@ -34,9 +34,6 @@
/* Declarations */
/************************************************************************/
-void __do_nothing(void); // Assembly stub
-void do_nothing(void); // Do nothing...
-
void reflect_int(u32 n); // Generic IRQ handler
void __reflect_int(void); // Interrupt reflection handler
@@ -46,7 +43,16 @@
void default_except_handler(void); // Default exception handler
void __monitor2host(void); // Switch from monitor to host
+void __trampoline(void); // Trampoline for initial ring-3 switch
+
+
+/*
+ * For now, we use these hard-coded values as inital
+ * %eip and %esp values of the guest task ...
+ */
+#define GUEST_CODE_OFF 0x10000
+#define GUEST_STACK_OFF 0x09000
/************************************************************************/
@@ -123,6 +129,7 @@
unsigned int seg;
u32 off;
u32 virt_addr;
+ far_jmp_info_t *guest_jmp_info, *guest_stack_info;
// Zero out data structures for good measure
memset(&monitor_info, 0, sizeof(monitor_info));
@@ -206,7 +213,8 @@
// Setup the page table
for (pti=0; pti<1024; pti++) {
// Get the address of the page frame
- pageTable[pti].base = L2P(vm_pages.guest[page_index++]) >> 12;
+ pageTable[pti].base = L2P(vm_pages.guest[page_index]) >> 12;
+ page_index++;
// Fill in the PTE flags
pageTable[pti].avail = 0;
@@ -302,9 +310,9 @@
D_PRESENT, D_DPL0, D_DATA | D_WRITE)
// Setup user segments
- SET_DESCRIPTOR(mon_gdt[3], KERNEL_OFFSET,0xfffff, D_PG, D_D32, D_AVL0,
+ SET_DESCRIPTOR(mon_gdt[3], 0, 0xfffff, D_PG, D_D32, D_AVL0,
D_PRESENT, D_DPL3, D_CODE | D_READ)
- SET_DESCRIPTOR(mon_gdt[4], KERNEL_OFFSET,0xfffff, D_PG, D_D32, D_AVL0,
+ SET_DESCRIPTOR(mon_gdt[4], 0, 0xfffff, D_PG, D_D32, D_AVL0,
D_PRESENT, D_DPL3, D_DATA | D_WRITE)
// Setup TSS used by monitor
@@ -347,11 +355,26 @@
* point to starting values for the monitor
*/
- mon_jmp_info.offset = (u32) __do_nothing;
+ mon_jmp_info.offset = (u32) __trampoline;
mon_jmp_info.selector = Selector(1, 0, RPL0);
- mon_stack_info.offset = ((u32) mon_kstack) + sizeof(mon_kstack);
+ mon_stack_info.offset = (u32) mon_tss.esp0;
mon_stack_info.selector = Selector(2, 0, RPL0);
+ /*
+ * Set up guest cs:eip and ss:eip values so that the monitor
+ * trampoline can use a far return to switch to ring 3
+ */
+
+ mon_stack_info.offset -= 8;
+ guest_stack_info = (far_jmp_info_t *)mon_stack_info.offset;
+ mon_stack_info.offset -= 8;
+ guest_jmp_info = (far_jmp_info_t *)mon_stack_info.offset;
+
+ guest_jmp_info->offset = (u32) GUEST_CODE_OFF;
+ guest_jmp_info->selector = Selector(3, 0, RPL3);
+ guest_stack_info->offset = (u32) GUEST_STACK_OFF;
+ guest_stack_info->selector = Selector(4, 0, RPL3);
+
/*
* Setup the IDT for the monitor/guest environment
@@ -376,8 +399,6 @@
// structures. They are used by the asm code.
ASM_USED(host_jmp_info);
ASM_USED(host_stack_info);
-
- monitor_info.nothing_count = 0;
}
void
@@ -553,6 +574,25 @@
" ret \n" // Go back to host code
);
+
+/*
+ * __trampoline(): Perform the initial switch from to the guest
+ *
+ * NOTE: This routine must be called in the monitor contexts with
+ * interrupts off. The stack must contain ring-3 %cs:%eip
+ * and %ss:%esp values suitable for an intersegment return.
+ */
+asm (
+ ".text; .globl __trampoline \n" // Start function
+ "__trampoline: \n" // __trampoline
+ " xorl %eax, %eax \n" // Clear out segment registers
+ " movw %ax, %ds \n"
+ " movw %ax, %es \n"
+ " movw %ax, %fs \n"
+ " movw %ax, %gs \n"
+ " sti \n" // Enable interrupts
+ " lret \n" // Jump to ring-3 guest code
+);
/*
* The interrupt handlers -- C code
diff -urN fmw-uwold/kernel/virtcode.c fmw-uw/kernel/virtcode.c
--- fmw-uwold/kernel/virtcode.c Tue Aug 24 21:04:59 1999
+++ fmw-uw/kernel/virtcode.c Thu Jan 1 01:00:00 1970
@@ -1,59 +0,0 @@
-/*
- * FreeMWare: run multiple x86 operating systems concurrently
- * Copyright (C) 1999 Kevin P. Lawton
- *
- * virtcode.c: This file contains the test code to run in the VM
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "fmw.h"
-#include "host.h"
-#include "monitor.h"
-
-/*
- * For now, we have this CPL0 task which doesn't do anything.
- * Just testing the interrupt redirection handling, and
- * host<-->monitor context switching mechanisms.
- */
-
-asm (
- ".text; .globl __do_nothing \n"
- "__do_nothing: \n"
- " movw %ss, %ax \n" /* Setup the segment registers */
- " movw %ax, %ds \n"
- " movw %ax, %es \n"
- " movw %ax, %fs \n"
- " movw %ax, %gs \n"
- " sti \n" /* Enable interrupts */
- " call do_nothing \n" /* Run the test code */
- "1: hlt \n" /* Idle until the next */
- " jmp 1b \n" /* interrupt */
-);
-
-void
-do_nothing(void)
-{
- /*
- * For now, this ring0 code does nothing except
- * spin it's wheels and increment a counter for something
- * interesting to display in /proc/freemware.
- */
-
- while (1) {
- monitor_info.nothing_count++; // Increment our counter
- asm("hlt"); // wait until next interrupt
- }
-}
diff -urN fmw-uwold/user/Makefile.in fmw-uw/user/Makefile.in
--- fmw-uwold/user/Makefile.in Tue Aug 24 21:04:59 1999
+++ fmw-uw/user/Makefile.in Wed Aug 25 04:07:41 1999
@@ -20,6 +20,8 @@
LDFLAGS = @LDFLAGS@
+ALL: user resetmod virtcode.bin
+
.c.o:
gcc -I../kernel/include/ -c $<
@@ -29,8 +31,14 @@
resetmod: resetmod.o
gcc -o resetmod resetmod.o
+virtcode.elf: virtcode.o
+ gcc -o virtcode.elf -Wl,-Ttext,0x10000 -nostartfiles -nostdlib virtcode.o
+
+virtcode.bin: virtcode.elf
+ objcopy -O binary virtcode.elf virtcode.bin
+
clean:
- /bin/rm -f *.o user resetmod
+ /bin/rm -f *.o user resetmod virtcode.elf virtcode.bin
dist-clean: clean
/bin/rm -f Makefile
diff -urN fmw-uwold/user/user.c fmw-uw/user/user.c
--- fmw-uwold/user/user.c Tue Aug 24 21:04:59 1999
+++ fmw-uw/user/user.c Wed Aug 25 03:54:50 1999
@@ -26,12 +26,16 @@
#include "fmw.h"
+#define GUESTCODE_OFF 0x10000
+#define COUNTER_OFF 0x08000
+
int
main(int argc, char *argv[])
{
- int fileno, ret;
+ int fileno, virtno, ret;
int request, data;
unsigned i, quantums;
+ struct stat statbuf;
char *ptr;
if (argc < 2)
@@ -71,6 +75,22 @@
fprintf(stderr, "Zeroing virtualised physical memory\n");
memset(ptr, 0, 8*1024*1024);
+ fprintf(stderr, "Loading guest code\n");
+ virtno = open("virtcode.bin", O_RDONLY);
+ if (virtno < 0) {
+ perror("open(\"virtcode.bin\")");
+ exit(1);
+ }
+ if (fstat(virtno, &statbuf) != 0) {
+ perror("fstat");
+ exit(1);
+ }
+ if (read(virtno, ptr+GUESTCODE_OFF, statbuf.st_size) != statbuf.st_size) {
+ perror("read");
+ exit(1);
+ }
+ close(virtno);
+
fprintf(stderr, "Running VM for %d timeslices\n", quantums);
request = FMWRUNGUEST;
data = quantums;
@@ -79,6 +99,8 @@
fprintf(stderr, "error: ioctl failed\n");
goto close_fd;
}
+
+ fprintf(stderr, "Guest counter: %d\n", *(int *)(ptr + COUNTER_OFF));
fprintf(stderr, "Tearing down VM\n");
request = FMWTEARDOWN;
diff -urN fmw-uwold/user/virtcode.c fmw-uw/user/virtcode.c
--- fmw-uwold/user/virtcode.c Thu Jan 1 01:00:00 1970
+++ fmw-uw/user/virtcode.c Wed Aug 25 02:19:38 1999
@@ -0,0 +1,54 @@
+/*
+ * FreeMWare: run multiple x86 operating systems concurrently
+ * Copyright (C) 1999 Kevin P. Lawton
+ *
+ * virtcode.c: This file contains the test code to run in the VM
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * For now, we have this CPL3 task which doesn't do anything.
+ * Just testing the interrupt redirection handling, and
+ * host<-->monitor context switching mechanisms.
+ */
+
+asm (
+ ".text; .globl _start \n"
+ "_start: \n"
+ " movw %ss, %ax \n" /* Setup the segment registers */
+ " movw %ax, %ds \n"
+ " movw %ax, %es \n"
+ " movw %ax, %fs \n"
+ " movw %ax, %gs \n"
+ " call do_nothing \n" /* Run the test code */
+ "1: jmp 1b \n" /* Should never get here */
+);
+
+void
+do_nothing(void)
+{
+ int *nothing_count = (int *)0x8000;
+
+ /*
+ * For now, this ring3 code does nothing except
+ * spin it's wheels and increment a counter.
+ */
+
+ while (1) {
+ (*nothing_count)++; // Increment our counter
+ }
+}
+
--
Ulrich Weigand,
IMMD 1, Universitaet Erlangen-Nuernberg,
Martensstr. 3, D-91058 Erlangen, Phone: +49 9131 85-7688