Module Name: src
Committed By: imil
Date: Tue Feb 18 10:16:04 UTC 2025
Modified Files:
src/sys/arch/i386/i386: genassym.cf locore.S machdep.c
Added Files:
src/sys/arch/i386/conf: MICROVM
Log Message:
Add support for non-Xen PVH guests to i386, PR kern/57813
Tested on QEMU by me, Xen PV & PVH by bouyer@.
To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/arch/i386/conf/MICROVM
cvs rdiff -u -r1.135 -r1.136 src/sys/arch/i386/i386/genassym.cf
cvs rdiff -u -r1.198 -r1.199 src/sys/arch/i386/i386/locore.S
cvs rdiff -u -r1.842 -r1.843 src/sys/arch/i386/i386/machdep.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/i386/i386/genassym.cf
diff -u src/sys/arch/i386/i386/genassym.cf:1.135 src/sys/arch/i386/i386/genassym.cf:1.136
--- src/sys/arch/i386/i386/genassym.cf:1.135 Wed Oct 4 20:28:05 2023
+++ src/sys/arch/i386/i386/genassym.cf Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.135 2023/10/04 20:28:05 ad Exp $
+# $NetBSD: genassym.cf,v 1.136 2025/02/18 10:16:03 imil Exp $
#
# Copyright (c) 1998, 2006, 2007, 2008, 2023 The NetBSD Foundation, Inc.
@@ -377,6 +377,7 @@ define L2_FRAME L2_FRAME
define VM_GUEST_XENPV VM_GUEST_XENPV
define VM_GUEST_XENPVH VM_GUEST_XENPVH
+define VM_GUEST_GENPVH VM_GUEST_GENPVH
ifdef XEN
define CPU_INFO_VCPU offsetof(struct cpu_info, ci_vcpu)
@@ -391,7 +392,12 @@ define START_INFO_STORE_MFN offsetof(str
define SIF_INITDOMAIN SIF_INITDOMAIN
define EVTCHN_UPCALL_PENDING offsetof(struct vcpu_info, evtchn_upcall_pending)
define EVTCHN_UPCALL_MASK offsetof(struct vcpu_info, evtchn_upcall_mask)
-
+define HVM_START_INFO_SIZE sizeof(struct hvm_start_info)
+define START_INFO_VERSION offsetof(struct hvm_start_info, version)
+define MMAP_PADDR offsetof(struct hvm_start_info, memmap_paddr)
+define MMAP_ENTRIES offsetof(struct hvm_start_info, memmap_entries)
+define MMAP_ENTRY_SIZE sizeof(struct hvm_memmap_table_entry)
+define CMDLINE_PADDR offsetof(struct hvm_start_info, cmdline_paddr)
define HYPERVISOR_sched_op __HYPERVISOR_sched_op
define SCHEDOP_yield SCHEDOP_yield
endif /* XEN */
Index: src/sys/arch/i386/i386/locore.S
diff -u src/sys/arch/i386/i386/locore.S:1.198 src/sys/arch/i386/i386/locore.S:1.199
--- src/sys/arch/i386/i386/locore.S:1.198 Wed Jul 31 20:05:28 2024
+++ src/sys/arch/i386/i386/locore.S Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $ */
+/* $NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $ */
/*
* Copyright-o-rama!
@@ -128,7 +128,7 @@
*/
#include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $");
#include "opt_copy_symtab.h"
#include "opt_ddb.h"
@@ -244,11 +244,12 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1
#ifdef XEN
#define __ASSEMBLY__
+#include <xen/include/public/arch-x86/cpuid.h>
#include <xen/include/public/elfnote.h>
#include <xen/include/public/xen.h>
#define ELFNOTE(name, type, desctype, descdata...) \
-.pushsection .note.name ; \
+.pushsection .note.name, "a", @note ; \
.align 4 ; \
.long 2f - 1f /* namesz */ ; \
.long 4f - 3f /* descsz */ ; \
@@ -272,7 +273,7 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, start)
#else
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0)
- ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_xenpvh))
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_pvh))
#endif /* XENPV */
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
@@ -1193,7 +1194,7 @@ END(start)
#if defined(XEN)
#ifndef XENPV
/* entry point for Xen PVH */
-ENTRY(start_xenpvh)
+ENTRY(start_pvh)
/* Xen doesn't start us with a valid gdt */
movl $RELOC(gdtdesc_xenpvh), %eax
lgdt (%eax)
@@ -1217,6 +1218,93 @@ ENTRY(start_xenpvh)
stosb
/*
+ * Here, we have 2 cases :
+ *
+ * 1) We have been started by Xen
+ * 2) We have been started by another VMM (Qemu, Firecracker, ...)
+ *
+ * The main difference is that, when we are started by Xen,
+ * %ebx (addr of the hvm_start_info structure) is pointing to a
+ * location that will be mapped correctly later.
+ *
+ * In the second case, we have to copy this structure (and all
+ * the information contained in it) to a location that will be
+ * mapped later : __kernel_end
+ *
+ * To distinguish between the 2 cases, we'll use the 'cpuid' instruction
+ */
+
+ push %ebx
+ xorl %eax, %eax
+ cpuid
+ cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */
+ jb .start_genpvh
+ xorl %eax, %eax
+ inc %eax
+ cpuid
+ shr $31, %ecx
+ testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */
+ jz .start_genpvh
+ xorl %eax, %eax
+ inc %eax
+ shl $30, %eax
+ cpuid /* Calling cpuid with eax=0x40000000 */
+ cmp $XEN_CPUID_SIGNATURE_EBX, %ebx /* "VneX" */
+ je .start_xen
+
+ /* We have been started by a VMM that is *not* Xen */
+
+.start_genpvh:
+
+ /* First, copy the hvm_start_info structure to __kernel_end */
+ pop %ebx
+ movl %ebx, %esi
+ movl $RELOC(__kernel_end), %edi
+ movl $HVM_START_INFO_SIZE, %ecx
+ shrl $2, %ecx
+ rep movsl
+
+ /* Copy cmdline_paddr after hvm_start_info */
+ movl CMDLINE_PADDR(%ebx), %esi
+ movl $RELOC(__kernel_end), %ecx
+ movl %edi, CMDLINE_PADDR(%ecx) /* Set new cmdline_paddr in hvm_start_info */
+ .cmdline_copy:
+ movb (%esi), %al
+ movsb
+ cmp $0, %al
+ jne .cmdline_copy
+
+ /* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */
+ xorl %eax, %eax
+ cmpl START_INFO_VERSION(%ebx), %eax
+ je .reload_ebx
+ movl MMAP_PADDR(%ebx), %esi
+ movl $RELOC(__kernel_end), %ecx
+ movl %edi, MMAP_PADDR(%ecx) /* Set new memmap_paddr in hvm_start_info */
+ movl MMAP_ENTRIES(%ebx), %eax /* Get memmap_entries */
+ movl $MMAP_ENTRY_SIZE, %ebx
+ mull %ebx /* eax * ebx => edx:eax */
+ movl %eax, %ecx
+ shrl $2, %ecx
+ rep movsl
+
+.reload_ebx:
+ movl $RELOC(__kernel_end), %ebx
+
+ /* announce ourself */
+ movl $VM_GUEST_GENPVH, RELOC(vm_guest)
+
+ jmp .save_hvm_start_paddr
+
+.start_xen:
+ pop %ebx
+ movl $VM_GUEST_XENPVH, RELOC(vm_guest)
+
+.save_hvm_start_paddr:
+ /*
+ * save addr of the hvm_start_info structure. This is also the end
+ * of the symbol table
+ /*
* save addr of the hvm_start_info structure. This is also the end
* of the symbol table
*/
@@ -1226,22 +1314,25 @@ ENTRY(start_xenpvh)
movl $RELOC(esym),%ebp
movl %eax,(%ebp)
/* get a page for HYPERVISOR_shared_info */
+ /* this is only needed if we are running on Xen */
+ cmpl $VM_GUEST_XENPVH, RELOC(vm_guest)
+ jne .add_hvm_start_info_page
addl $PAGE_SIZE, %ebx
addl $PGOFSET,%ebx
andl $~PGOFSET,%ebx
movl $RELOC(HYPERVISOR_shared_info_pa),%ebp
movl %ebx,(%ebp)
/* XXX assume hvm_start_info+dependant structure fits in a single page */
+.add_hvm_start_info_page:
addl $PAGE_SIZE, %ebx
addl $PGOFSET,%ebx
andl $~PGOFSET,%ebx
addl $KERNBASE,%ebx
movl $RELOC(eblob),%ebp
movl %ebx,(%ebp)
- /* announce ourself */
- movl $VM_GUEST_XENPVH, RELOC(vm_guest)
+
jmp .Lstart_common
-END(start_xenpvh)
+END(start_pvh)
.align 8
gdtdesc_xenpvh:
.word gdt_xenpvhend - gdt_xenpvh
Index: src/sys/arch/i386/i386/machdep.c
diff -u src/sys/arch/i386/i386/machdep.c:1.842 src/sys/arch/i386/i386/machdep.c:1.843
--- src/sys/arch/i386/i386/machdep.c:1.842 Thu Jun 27 23:58:46 2024
+++ src/sys/arch/i386/i386/machdep.c Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-/* $NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $ */
+/* $NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $ */
/*
* Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017
@@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $");
#include "opt_beep.h"
#include "opt_compat_freebsd.h"
@@ -1105,6 +1105,11 @@ init386_ksyms(void)
return;
#endif
+ if (vm_guest == VM_GUEST_GENPVH) {
+ ksyms_addsyms_elf(0, ((int *)&end) + 1, esym);
+ return;
+ }
+
if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
return;
@@ -1184,7 +1189,7 @@ init386(paddr_t first_avail)
#endif
#ifdef XEN
- if (vm_guest == VM_GUEST_XENPVH)
+ if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH)
xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
#endif
Added files:
Index: src/sys/arch/i386/conf/MICROVM
diff -u /dev/null src/sys/arch/i386/conf/MICROVM:1.1
--- /dev/null Tue Feb 18 10:16:04 2025
+++ src/sys/arch/i386/conf/MICROVM Tue Feb 18 10:16:03 2025
@@ -0,0 +1,143 @@
+# $NetBSD: MICROVM,v 1.1 2025/02/18 10:16:03 imil Exp $
+#
+# MICROVM kernel configuration, for use with Qemu microvm machine type
+# or Firecracker.
+# Stripped-down configuration with no PCI, use VirtIO over MMIO virtual
+# bus instead. ACPI is disabled as Firecracker doesn't support it,
+# use legacy MP tables instead.
+#
+# Exemple qemu usage on a Linux host to boot a NetBSD guest:
+#
+# qemu-system-x86_64 \
+# -M microvm,x-option-roms=off,rtc=on,acpi=off,pic=off,accel=kvm \
+# -m 256 -cpu host -kernel ${KERNEL} \
+# -append "root=ld0a console=com rw -z" \
+# -display none -device virtio-blk-device,drive=hd0 \
+# -drive file=${IMG},format=raw,id=hd0 \
+# -device virtio-net-device,netdev=net0 \
+# -netdev user,id=net0,ipv6=off,hostfwd=::2200-:22 \
+# -global virtio-mmio.force-legacy=false -serial stdio
+
+machine i386 x86 xen
+include "conf/std" # MI standard options
+include "arch/xen/conf/std.xenversion"
+
+options CPU_IN_CKSUM
+options EXEC_ELF32 # exec ELF binaries
+options EXEC_SCRIPT # exec #! scripts
+options MTRR
+options MULTIPROCESSOR
+
+options CHILD_MAX=1024 # 160 is too few
+options OPEN_MAX=1024 # 128 is too few
+
+mainbus0 at root
+cpu* at mainbus?
+ioapic* at mainbus? apid ?
+
+options INCLUDE_CONFIG_FILE # embed config file in kernel binary
+maxusers 8 # estimated number of users
+
+options INSECURE # disable kernel security levels - X needs this
+
+options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT
+
+options PIPE_SOCKETPAIR # smaller, but slower pipe(2)
+
+# Xen PV support for PVH and HVM guests, needed for PVH boot
+options XENPVHVM
+options XEN
+hypervisor* at mainbus? # Xen hypervisor
+xenbus* at hypervisor? # Xen virtual bus
+xencons* at hypervisor? # Xen virtual console
+
+# Include NetBSD 10 compatibility
+options COMPAT_100
+#
+# Because gcc omits the frame pointer for any -O level, the line below
+# is needed to make backtraces in DDB work.
+#
+makeoptions COPTS="-O2 -fno-omit-frame-pointer"
+
+# File systems
+#include "conf/filesystems.config"
+file-system FFS
+file-system EXT2FS
+file-system KERNFS
+file-system MFS
+file-system TMPFS
+file-system PTYFS
+file-system MSDOSFS
+file-system PROCFS
+
+options DKWEDGE_AUTODISCOVER
+options DKWEDGE_METHOD_GPT
+# File system options
+# ffs
+options FFS_NO_SNAPSHOT # No FFS snapshot support
+options WAPBL # File system journaling support
+
+# Networking options
+#options GATEWAY # packet forwarding
+options INET # IP + ICMP + TCP + UDP
+options INET6 # IPV6
+
+# Kernel root file system and dump configuration.
+config netbsd root on ? type ?
+
+#
+# Device configuration
+#
+
+# ACPI will be used if present. If not it will fall back to MPBIOS
+options MPBIOS # configure CPUs and APICs using MPBIOS
+# Provide bug-for-bug compatibility with Linux in MP Table searching
+# and parsing. Firecracker relies on these bugs.
+options MPTABLE_LINUX_BUG_COMPAT
+
+#pci* at mainbus? bus ?
+#acpi0 at mainbus0
+
+# ISA bus support
+isa0 at mainbus?
+
+# ISA serial interfaces
+com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports
+
+# Virtual bus for non-PCI devices
+pv* at pvbus?
+
+## Virtio devices
+# Use MMIO by default
+virtio* at pv?
+#virtio* at acpi?
+#virtio* at pci? dev ? function ? # Virtio PCI device
+#viomb* at virtio? # Virtio memory balloon device
+
+ld* at virtio? # Virtio disk device
+vioif* at virtio? # Virtio network device
+viornd* at virtio? # Virtio entropy device
+viocon* at virtio?
+
+vio9p* at virtio? # Virtio 9P device
+#vioscsi* at virtio?
+#scsibus* at vioscsi?
+
+pseudo-device md # memory disk device (ramdisk)
+#options MEMORY_DISK_HOOKS # enable md specific hooks
+#options MEMORY_DISK_DYNAMIC # enable dynamic resizing
+#
+pseudo-device vnd # disk-like interface to files
+#options VND_COMPRESSION # compressed vnd(4)
+
+## network pseudo-devices
+pseudo-device bpfilter # Berkeley packet filter
+pseudo-device loop # network loopback
+
+## miscellaneous pseudo-devices
+pseudo-device pty # pseudo-terminals
+# userland interface to drivers, including autoconf and properties retrieval
+pseudo-device drvctl
+
+file-system PUFFS
+pseudo-device putter