This change relinks kernel objects randomly, and unmaps the bootcode component of locore during boot. This makes gadgets harder to find.
The current linker layout is: locore.o [bring-up code + asm runtime] rest of kernel .o, in order supplied by config(8) The new linker layout is: locore.o [just the bring-up code] gap.o rest of kernel .o + new locore2.S [asm runtime], via sort -R The gap.o being use some discussion. This creates 5 random sized gaps: Few pages after locore.o .text <pagesize pad before other .text <pagesize pad before .rodata <pagesize pad before .data <pagesize pad before .bss resulting in the following layout: boot code [few pages of gap] endboot: [partial page gap] rest of .text - randomized order [page-alignment] [partial page gap] .rodata [page-alignment] [partial page gap] .data [partial page gap] .data When we supply the .o files to the linker in random order, their text segments are placed in that random order. The .rodata/.data/.bss for each of those is also placed in the same order into their respective sections. Once the system is booted, we unmap the locore.o bring-up code and the first few pages of gap. (Cannot be too early, must be after "codepatch") This bootcode is at a known location in KVA space. At known offsets within this .o text object, there are pointers to other .o in particular to main(). By unmapping this bootcode, the addresses of gadgets in the remaining objects become unknown. Due to randomization are not known. There is no text-segment knowledge anymore about where these objects are. Obviously some leakage of KVA addresses occurs, and cleanup will need to continue to ASLR more of those objects. There are a few mitigation strategies against BROP attack methodology. One can be summarized as "never reuse an address space". If a freshly linked kernel of this type was booted each time, we would be well on the way to satisfying that. Then other migitations efforts come into play. I've booted around 100 amd64 kernels, that is fairly well tested. i386 hasn't been tested as well yet. Index: arch/amd64/amd64/autoconf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v retrieving revision 1.47 diff -u -p -u -r1.47 autoconf.c --- arch/amd64/amd64/autoconf.c 8 Jun 2016 17:24:44 -0000 1.47 +++ arch/amd64/amd64/autoconf.c 29 May 2017 13:04:34 -0000 @@ -59,6 +59,7 @@ #include <sys/socketvar.h> #include <sys/timeout.h> #include <sys/hibernate.h> +#include <uvm/uvm.h> #include <net/if.h> #include <net/if_types.h> @@ -105,6 +106,21 @@ void aesni_setup(void); extern int amd64_has_aesni; #endif +void +unmap_startup(void) +{ + extern void *kernel_text, *endboot; + vaddr_t p; + + printf("unmap kernel init code %lx-%lx\n", + (vaddr_t)&kernel_text, (vaddr_t)&endboot); + p = (vaddr_t)&kernel_text; + do { + pmap_kremove(p, PAGE_SIZE); + p += NBPG; + } while (p < (vaddr_t)&endboot); +} + /* * Determine i/o configuration for a machine. */ @@ -122,6 +138,8 @@ cpu_configure(void) lapic_set_lvt(); ioapic_enable(); #endif + + unmap_startup(); #ifdef MULTIPROCESSOR cpu_init_idle_pcbs(); Index: arch/amd64/amd64/locore.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v retrieving revision 1.84 diff -u -p -u -r1.84 locore.S --- arch/amd64/amd64/locore.S 6 Feb 2017 09:15:51 -0000 1.84 +++ arch/amd64/amd64/locore.S 29 May 2017 20:52:28 -0000 @@ -131,115 +131,13 @@ #include <machine/asm.h> -#define SET_CURPROC(proc,cpu) \ - movq CPUVAR(SELF),cpu ; \ - movq proc,CPUVAR(CURPROC) ; \ - movq cpu,P_CPU(proc) - -#define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg -#define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB) - - /* XXX temporary kluge; these should not be here */ /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ #include <dev/isa/isareg.h> - -/* - * Initialization - */ - .data - -#if NLAPIC > 0 - .align NBPG, 0xcc - .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr) -_C_LABEL(local_apic): - .space LAPIC_ID -_C_LABEL(lapic_id): - .long 0x00000000 - .space LAPIC_TPRI-(LAPIC_ID+4) -_C_LABEL(lapic_tpr): - .space LAPIC_PPRI-LAPIC_TPRI -_C_LABEL(lapic_ppr): - .space LAPIC_ISR-LAPIC_PPRI -_C_LABEL(lapic_isr): - .space NBPG-LAPIC_ISR -#endif - - .globl _C_LABEL(cpu_id),_C_LABEL(cpu_vendor) - .globl _C_LABEL(cpuid_level),_C_LABEL(cpu_feature) - .globl _C_LABEL(cpu_ebxfeature) - .globl _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature) - .globl _C_LABEL(cpu_perf_eax) - .globl _C_LABEL(cpu_perf_ebx) - .globl _C_LABEL(cpu_perf_edx) - .globl _C_LABEL(cpu_apmi_edx) - .globl _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto) - .globl _C_LABEL(bootdev) - .globl _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase) - .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr) - .globl _C_LABEL(biosbasemem),_C_LABEL(biosextmem) - .globl _C_LABEL(bootapiver) - .globl _C_LABEL(pg_nx) -_C_LABEL(cpu_id): .long 0 # saved from `cpuid' instruction -_C_LABEL(cpu_feature): .long 0 # feature flags from 'cpuid' - # instruction -_C_LABEL(cpu_ebxfeature):.long 0 # ext. ebx feature flags from 'cpuid' -_C_LABEL(cpu_ecxfeature):.long 0 # ext. ecx feature flags from 'cpuid' -_C_LABEL(ecpu_ecxfeature):.long 0 # extended ecx feature flags -_C_LABEL(cpu_perf_eax): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_perf_ebx): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_perf_edx): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_apmi_edx): .long 0 # adv. power mgmt. info. from 'cpuid' -_C_LABEL(cpuid_level): .long -1 # max. level accepted by 'cpuid' - # instruction -_C_LABEL(cpu_vendor): .space 16 # vendor string returned by `cpuid' - # instruction -_C_LABEL(ssym): .quad 0 # ptr to start of syms -_C_LABEL(esym): .quad 0 # ptr to end of syms -_C_LABEL(atdevbase): .quad 0 # location of start of iomem in virtual -_C_LABEL(bootapiver): .long 0 # /boot API version -_C_LABEL(bootdev): .long 0 # device we booted from -_C_LABEL(proc0paddr): .quad 0 -_C_LABEL(PTDpaddr): .quad 0 # paddr of PTD, for libkvm -#ifndef REALBASEMEM -_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS -#else -_C_LABEL(biosbasemem): .long REALBASEMEM -#endif -#ifndef REALEXTMEM -_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS -#else -_C_LABEL(biosextmem): .long REALEXTMEM -#endif -_C_LABEL(pg_nx): .quad 0 # NX PTE bit (if CPU supports) - #define _RELOC(x) ((x) - KERNBASE) #define RELOC(x) _RELOC(_C_LABEL(x)) - .globl gdt64 - -gdt64: - .word gdt64_end-gdt64_start-1 - .quad _RELOC(gdt64_start) -.align 64, 0xcc - -gdt64_start: - .quad 0x0000000000000000 /* always empty */ - .quad 0x00af9a000000ffff /* kernel CS */ - .quad 0x00cf92000000ffff /* kernel DS */ -gdt64_end: - -farjmp64: - .long longmode-KERNBASE - .word GSEL(GCODE_SEL, SEL_KPL) - - .space 512 -tmpstk: - - .globl _C_LABEL(cpu_private) - .comm _C_LABEL(cpu_private),NBPG,NBPG - /* * Some hackage to deal with 64bit symbols in 32 bit mode. * This may not be needed if things are cleaned up a little. @@ -748,475 +646,6 @@ longmode_hi: call _C_LABEL(main) -/*****************************************************************************/ - -/* - * Signal trampoline; copied to top of user stack. - * gdb's backtrace logic matches against the instructions in this. - */ - .section .rodata - .globl _C_LABEL(sigcode) -_C_LABEL(sigcode): - call *%rax - - movq %rsp,%rdi - pushq %rdi /* fake return address */ - movq $SYS_sigreturn,%rax - syscall - .globl _C_LABEL(sigcoderet) -_C_LABEL(sigcoderet): - movq $SYS_exit,%rax - syscall - .globl _C_LABEL(esigcode) -_C_LABEL(esigcode): - - .globl _C_LABEL(sigfill) -_C_LABEL(sigfill): - int3 -_C_LABEL(esigfill): - .globl _C_LABEL(sigfillsiz) -_C_LABEL(sigfillsiz): - .long _C_LABEL(esigfill) - _C_LABEL(sigfill) - - .text -/* - * void lgdt(struct region_descriptor *rdp); - * Change the global descriptor table. - */ -NENTRY(lgdt) - /* Reload the descriptor table. */ - movq %rdi,%rax - lgdt (%rax) - /* Flush the prefetch q. */ - jmp 1f - nop -1: /* Reload "stale" selectors. */ - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%ss - /* Reload code selector by doing intersegment return. */ - popq %rax - pushq $GSEL(GCODE_SEL, SEL_KPL) - pushq %rax - lretq - -ENTRY(setjmp) - /* - * Only save registers that must be preserved across function - * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) - * and %rip. - */ - movq %rdi,%rax - movq %rbx,(%rax) - movq %rsp,8(%rax) - movq %rbp,16(%rax) - movq %r12,24(%rax) - movq %r13,32(%rax) - movq %r14,40(%rax) - movq %r15,48(%rax) - movq (%rsp),%rdx - movq %rdx,56(%rax) - xorl %eax,%eax - ret - -ENTRY(longjmp) - movq %rdi,%rax - movq (%rax),%rbx - movq 8(%rax),%rsp - movq 16(%rax),%rbp - movq 24(%rax),%r12 - movq 32(%rax),%r13 - movq 40(%rax),%r14 - movq 48(%rax),%r15 - movq 56(%rax),%rdx - movq %rdx,(%rsp) - xorl %eax,%eax - incl %eax - ret - -/*****************************************************************************/ - -/* - * int cpu_switchto(struct proc *old, struct proc *new) - * Switch from "old" proc to "new". - */ -ENTRY(cpu_switchto) - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - movq %rdi, %r13 - movq %rsi, %r12 - - /* Record new proc. */ - movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC - SET_CURPROC(%r12,%rcx) - - movl CPUVAR(CPUID),%edi - - /* If old proc exited, don't bother. */ - testq %r13,%r13 - jz switch_exited - - /* - * Save old context. - * - * Registers: - * %rax, %rcx - scratch - * %r13 - old proc, then old pcb - * %r12 - new proc - * %edi - cpuid - */ - - movq P_ADDR(%r13),%r13 - - /* clear the old pmap's bit for the cpu */ - movq PCB_PMAP(%r13),%rcx - lock - btrq %rdi,PM_CPUS(%rcx) - - /* Save stack pointers. */ - movq %rsp,PCB_RSP(%r13) - movq %rbp,PCB_RBP(%r13) - -switch_exited: - /* did old proc run in userspace? then reset the segment regs */ - btrl $CPUF_USERSEGS_BIT, CPUVAR(FLAGS) - jnc restore_saved - - /* set %ds, %es, and %fs to expected value to prevent info leak */ - movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax - movw %ax,%ds - movw %ax,%es - movw %ax,%fs - -restore_saved: - /* - * Restore saved context. - * - * Registers: - * %rax, %rcx, %rdx - scratch - * %r13 - new pcb - * %r12 - new process - */ - - /* No interrupts while loading new state. */ - cli - movq P_ADDR(%r12),%r13 - - /* Restore stack pointers. */ - movq PCB_RSP(%r13),%rsp - movq PCB_RBP(%r13),%rbp - - movq CPUVAR(TSS),%rcx - movq PCB_KSTACK(%r13),%rdx - movq %rdx,TSS_RSP0(%rcx) - - movq PCB_CR3(%r13),%rax - movq %rax,%cr3 - - /* Don't bother with the rest if switching to a system process. */ - testl $P_SYSTEM,P_FLAG(%r12) - jnz switch_restored - - /* set the new pmap's bit for the cpu */ - movl CPUVAR(CPUID),%edi - movq PCB_PMAP(%r13),%rcx - lock - btsq %rdi,PM_CPUS(%rcx) -#ifdef DIAGNOSTIC - jc _C_LABEL(switch_pmcpu_set) -#endif - -switch_restored: - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%r13),%ecx -#ifdef MULTIPROCESSOR - movq PCB_FPCPU(%r13),%r8 - cmpq CPUVAR(SELF),%r8 - jz 1f - orl $CR0_TS,%ecx -1: -#endif - movq %rcx,%cr0 - - SET_CURPCB(%r13) - - /* Interrupts are okay again. */ - sti - -switch_return: - - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - ret - -ENTRY(cpu_idle_enter) - movq _C_LABEL(cpu_idle_enter_fcn),%rax - cmpq $0,%rax - je 1f - jmpq *%rax -1: - ret - -ENTRY(cpu_idle_cycle) - movq _C_LABEL(cpu_idle_cycle_fcn),%rax - cmpq $0,%rax - je 1f - call *%rax - ret -1: - sti - hlt - ret - -ENTRY(cpu_idle_leave) - movq _C_LABEL(cpu_idle_leave_fcn),%rax - cmpq $0,%rax - je 1f - jmpq *%rax -1: - ret - - .globl _C_LABEL(panic) - -#ifdef DIAGNOSTIC -NENTRY(switch_pmcpu_set) - movabsq $1f,%rdi - call _C_LABEL(panic) - /* NOTREACHED */ -1: .asciz "activate already active pmap" -#endif /* DIAGNOSTIC */ - -/* - * savectx(struct pcb *pcb); - * Update pcb, saving current processor state. - */ -ENTRY(savectx) - /* Save stack pointers. */ - movq %rsp,PCB_RSP(%rdi) - movq %rbp,PCB_RBP(%rdi) - - ret - -IDTVEC(syscall32) - sysret /* go away please */ - -/* - * syscall insn entry. This currently isn't much faster, but - * it can be made faster in the future. - */ -IDTVEC(syscall) - /* - * Enter here with interrupts blocked; %rcx contains the caller's - * %rip and the original rflags has been copied to %r11. %cs and - * %ss have been updated to the kernel segments, but %rsp is still - * the user-space value. - * First order of business is to swap to the kernel gs.base so that - * we can access our struct cpu_info and use the scratch space there - * to switch to our kernel stack. Once that's in place we can - * unblock interrupts and save the rest of the syscall frame. - */ - swapgs - movq %r15,CPUVAR(SCRATCH) - movq CPUVAR(CURPCB),%r15 - movq PCB_KSTACK(%r15),%r15 - xchgq %r15,%rsp - sti - - /* - * XXX don't need this whole frame, split of the - * syscall frame and trapframe is needed. - * First, leave some room for the trapno, error, - * ss:rsp, etc, so that all GP registers can be - * saved. Then, fill in the rest. - */ - pushq $(GSEL(GUDATA_SEL, SEL_UPL)) - pushq %r15 - subq $(TF_RSP-TF_TRAPNO),%rsp - movq CPUVAR(SCRATCH),%r15 - subq $32,%rsp - INTR_SAVE_GPRS - movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */ - movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp) - movq %rcx,TF_RIP(%rsp) - movq $2,TF_ERR(%rsp) /* ignored */ - - movq CPUVAR(CURPROC),%r14 - movq %rsp,P_MD_REGS(%r14) # save pointer to frame - andl $~MDP_IRET,P_MD_FLAGS(%r14) - movq %rsp,%rdi - call _C_LABEL(syscall) - -.Lsyscall_check_asts: - /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%r11) - je 2f - CLEAR_ASTPENDING(%r11) - sti - movq %rsp,%rdi - call _C_LABEL(ast) - jmp .Lsyscall_check_asts - -2: -#ifdef DIAGNOSTIC - cmpl $IPL_NONE,CPUVAR(ILEVEL) - jne .Lsyscall_spl_not_lowered -#endif /* DIAGNOSTIC */ - - /* Could registers have been changed that require an iretq? */ - testl $MDP_IRET, P_MD_FLAGS(%r14) - jne intr_fast_exit - - movq TF_RDI(%rsp),%rdi - movq TF_RSI(%rsp),%rsi - movq TF_R8(%rsp),%r8 - movq TF_R9(%rsp),%r9 - movq TF_R10(%rsp),%r10 - movq TF_R12(%rsp),%r12 - movq TF_R13(%rsp),%r13 - movq TF_R14(%rsp),%r14 - movq TF_R15(%rsp),%r15 - movq TF_RBP(%rsp),%rbp - movq TF_RBX(%rsp),%rbx - - INTR_RESTORE_SELECTORS - - movq TF_RDX(%rsp),%rdx - movq TF_RAX(%rsp),%rax - - movq TF_RIP(%rsp),%rcx - movq TF_RFLAGS(%rsp),%r11 - movq TF_RSP(%rsp),%rsp - sysretq - -#ifdef DIAGNOSTIC -.Lsyscall_spl_not_lowered: - movabsq $4f, %rdi - movl TF_RAX(%rsp),%esi - movl TF_RDI(%rsp),%edx - movl %ebx,%ecx - movl CPUVAR(ILEVEL),%r8d - xorq %rax,%rax - call _C_LABEL(printf) -#ifdef DDB - int $3 -#endif /* DDB */ - movl $IPL_NONE,CPUVAR(ILEVEL) - jmp .Lsyscall_check_asts -4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" -#endif - - -NENTRY(proc_trampoline) -#ifdef MULTIPROCESSOR - call _C_LABEL(proc_trampoline_mp) -#endif - movl $IPL_NONE,CPUVAR(ILEVEL) - movq %r13,%rdi - call *%r12 - movq CPUVAR(CURPROC),%r14 - jmp .Lsyscall_check_asts - - -/* - * Return via iretq, for real interrupts and signal returns - */ -NENTRY(intr_fast_exit) - movq TF_RDI(%rsp),%rdi - movq TF_RSI(%rsp),%rsi - movq TF_R8(%rsp),%r8 - movq TF_R9(%rsp),%r9 - movq TF_R10(%rsp),%r10 - movq TF_R12(%rsp),%r12 - movq TF_R13(%rsp),%r13 - movq TF_R14(%rsp),%r14 - movq TF_R15(%rsp),%r15 - movq TF_RBP(%rsp),%rbp - movq TF_RBX(%rsp),%rbx - - testq $SEL_RPL,TF_CS(%rsp) - je 5f - - INTR_RESTORE_SELECTORS - -5: movq TF_RDX(%rsp),%rdx - movq TF_RCX(%rsp),%rcx - movq TF_R11(%rsp),%r11 - movq TF_RAX(%rsp),%rax - -#if !defined(GPROF) && defined(DDBPROF) - /* - * If we are returning from a probe trap we need to fix the - * stack layout and emulate the patched instruction. - * - * The code below does that by trashing %rax, so it MUST be - * restored afterward. - */ - cmpl $INTR_FAKE_TRAP, TF_ERR(%rsp) - je .Lprobe_fixup -#endif /* !defined(GPROF) && defined(DDBPROF) */ - - addq $TF_RIP,%rsp - - .globl _C_LABEL(doreti_iret) -_C_LABEL(doreti_iret): - iretq - - -#if !defined(GPROF) && defined(DDBPROF) -.Lprobe_fixup: - /* Reserve enough room to emulate "pushq %rbp". */ - subq $16, %rsp - - /* Shift hardware-saved registers. */ - movq (TF_RIP + 16)(%rsp), %rax - movq %rax, TF_RIP(%rsp) - movq (TF_CS + 16)(%rsp), %rax - movq %rax, TF_CS(%rsp) - movq (TF_RFLAGS + 16)(%rsp), %rax - movq %rax, TF_RFLAGS(%rsp) - movq (TF_RSP + 16)(%rsp), %rax - movq %rax, TF_RSP(%rsp) - movq (TF_SS + 16)(%rsp), %rax - movq %rax, TF_SS(%rsp) - - /* Pull 8 bytes off the stack and store %rbp in the expected location.*/ - movq TF_RSP(%rsp), %rax - subq $8, %rax - movq %rax, TF_RSP(%rsp) - movq %rbp, (%rax) - - /* Write back overwritten %rax */ - movq (TF_RAX + 16)(%rsp),%rax - - addq $TF_RIP,%rsp - iretq -#endif /* !defined(GPROF) && defined(DDBPROF) */ - -ENTRY(pagezero) - movq $-PAGE_SIZE,%rdx - subq %rdx,%rdi - xorq %rax,%rax -1: - movnti %rax,(%rdi,%rdx) - movnti %rax,8(%rdi,%rdx) - movnti %rax,16(%rdi,%rdx) - movnti %rax,24(%rdi,%rdx) - addq $32,%rdx - jne 1b - sfence - ret - .section .codepatch,"a" .align 8, 0xcc .globl _C_LABEL(codepatch_begin) @@ -1228,20 +657,16 @@ _C_LABEL(codepatch_begin): _C_LABEL(codepatch_end): .previous -#if NXEN > 0 - /* Hypercall page needs to be page aligned */ - .text - .align NBPG, 0xcc - .globl _C_LABEL(xen_hypercall_page) -_C_LABEL(xen_hypercall_page): - .skip 0x1000, 0xcc -#endif /* NXEN > 0 */ + .data + +farjmp64: + .long longmode-KERNBASE + .word GSEL(GCODE_SEL, SEL_KPL) + + .globl _C_LABEL(cpu_private) + .comm _C_LABEL(cpu_private),NBPG,NBPG + +/* XXX we want some guard here */ + .space 512 +tmpstk: -#if NHYPERV > 0 - /* Hypercall page needs to be page aligned */ - .text - .align NBPG, 0xcc - .globl _C_LABEL(hv_hypercall_page) -_C_LABEL(hv_hypercall_page): - .skip 0x1000, 0xcc -#endif /* NXEN > 0 */ Index: arch/amd64/amd64/locore2.S =================================================================== RCS file: arch/amd64/amd64/locore2.S diff -N arch/amd64/amd64/locore2.S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ arch/amd64/amd64/locore2.S 29 May 2017 20:51:07 -0000 @@ -0,0 +1,721 @@ +/* $OpenBSD: locore.S,v 1.84 2017/02/06 09:15:51 mpi Exp $ */ +/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ + +/* + * Copyright-o-rama! + */ + +/* + * Copyright (c) 2001 Wasabi Systems, Inc. + * All rights reserved. + * + * Written by Frank van der Linden for Wasabi Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the NetBSD Project by + * Wasabi Systems, Inc. + * 4. The name of Wasabi Systems, Inc. may not be used to endorse + * or promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +/*- + * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Charles M. Hannum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)locore.s 7.3 (Berkeley) 5/13/91 + */ + +#include "assym.h" +#include "lapic.h" +#include "ksyms.h" +#include "xen.h" +#include "hyperv.h" + +#include <sys/syscall.h> + +#include <machine/param.h> +#include <machine/segments.h> +#include <machine/specialreg.h> +#include <machine/trap.h> +#include <machine/frameasm.h> + +#define SET_CURPROC(proc,cpu) \ + movq CPUVAR(SELF),cpu ; \ + movq proc,CPUVAR(CURPROC) ; \ + movq cpu,P_CPU(proc) + +#define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg +#define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB) + +#if NLAPIC > 0 +#include <machine/i82489reg.h> +#endif + +/* + * override user-land alignment before including asm.h + */ +#define ALIGN_DATA .align 8 +#define ALIGN_TEXT .align 16,0x90 +#define _ALIGN_TEXT ALIGN_TEXT + +#include <machine/asm.h> + + + +/* + * Initialization + */ + .data + +#if NLAPIC > 0 + .align NBPG, 0xcc + .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr) +_C_LABEL(local_apic): + .space LAPIC_ID +_C_LABEL(lapic_id): + .long 0x00000000 + .space LAPIC_TPRI-(LAPIC_ID+4) +_C_LABEL(lapic_tpr): + .space LAPIC_PPRI-LAPIC_TPRI +_C_LABEL(lapic_ppr): + .space LAPIC_ISR-LAPIC_PPRI +_C_LABEL(lapic_isr): + .space NBPG-LAPIC_ISR +#endif + + .globl _C_LABEL(cpu_id),_C_LABEL(cpu_vendor) + .globl _C_LABEL(cpuid_level),_C_LABEL(cpu_feature) + .globl _C_LABEL(cpu_ebxfeature) + .globl _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature) + .globl _C_LABEL(cpu_perf_eax) + .globl _C_LABEL(cpu_perf_ebx) + .globl _C_LABEL(cpu_perf_edx) + .globl _C_LABEL(cpu_apmi_edx) + .globl _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto) + .globl _C_LABEL(bootdev) + .globl _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase) + .globl _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr) + .globl _C_LABEL(biosbasemem),_C_LABEL(biosextmem) + .globl _C_LABEL(bootapiver) + .globl _C_LABEL(pg_nx) +_C_LABEL(cpu_id): .long 0 # saved from `cpuid' instruction +_C_LABEL(cpu_feature): .long 0 # feature flags from 'cpuid' + # instruction +_C_LABEL(cpu_ebxfeature):.long 0 # ext. ebx feature flags from 'cpuid' +_C_LABEL(cpu_ecxfeature):.long 0 # ext. ecx feature flags from 'cpuid' +_C_LABEL(ecpu_ecxfeature):.long 0 # extended ecx feature flags +_C_LABEL(cpu_perf_eax): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_perf_ebx): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_perf_edx): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_apmi_edx): .long 0 # adv. power mgmt. info. from 'cpuid' +_C_LABEL(cpuid_level): .long -1 # max. level accepted by 'cpuid' + # instruction +_C_LABEL(cpu_vendor): .space 16 # vendor string returned by `cpuid' + # instruction +_C_LABEL(ssym): .quad 0 # ptr to start of syms +_C_LABEL(esym): .quad 0 # ptr to end of syms +_C_LABEL(atdevbase): .quad 0 # location of start of iomem in virtual +_C_LABEL(bootapiver): .long 0 # /boot API version +_C_LABEL(bootdev): .long 0 # device we booted from +_C_LABEL(proc0paddr): .quad 0 +_C_LABEL(PTDpaddr): .quad 0 # paddr of PTD, for libkvm +#ifndef REALBASEMEM +_C_LABEL(biosbasemem): .long 0 # base memory reported by BIOS +#else +_C_LABEL(biosbasemem): .long REALBASEMEM +#endif +#ifndef REALEXTMEM +_C_LABEL(biosextmem): .long 0 # extended memory reported by BIOS +#else +_C_LABEL(biosextmem): .long REALEXTMEM +#endif +_C_LABEL(pg_nx): .quad 0 # NX PTE bit (if CPU supports) + +#define _RELOC(x) ((x) - KERNBASE) +#define RELOC(x) _RELOC(_C_LABEL(x)) + + .globl gdt64 + +gdt64: + .word gdt64_end-gdt64_start-1 + .quad _RELOC(gdt64_start) +.align 64, 0xcc + +gdt64_start: + .quad 0x0000000000000000 /* always empty */ + .quad 0x00af9a000000ffff /* kernel CS */ + .quad 0x00cf92000000ffff /* kernel DS */ +gdt64_end: + +/* + * Some hackage to deal with 64bit symbols in 32 bit mode. + * This may not be needed if things are cleaned up a little. + */ + +/*****************************************************************************/ + +/* + * Signal trampoline; copied to top of user stack. + * gdb's backtrace logic matches against the instructions in this. + */ + .section .rodata + .globl _C_LABEL(sigcode) +_C_LABEL(sigcode): + call *%rax + + movq %rsp,%rdi + pushq %rdi /* fake return address */ + movq $SYS_sigreturn,%rax + syscall + .globl _C_LABEL(sigcoderet) +_C_LABEL(sigcoderet): + movq $SYS_exit,%rax + syscall + .globl _C_LABEL(esigcode) +_C_LABEL(esigcode): + + .globl _C_LABEL(sigfill) +_C_LABEL(sigfill): + int3 +_C_LABEL(esigfill): + .globl _C_LABEL(sigfillsiz) +_C_LABEL(sigfillsiz): + .long _C_LABEL(esigfill) - _C_LABEL(sigfill) + + .text + ALIGN_TEXT + +/* + * void lgdt(struct region_descriptor *rdp); + * Change the global descriptor table. + */ +NENTRY(lgdt) + /* Reload the descriptor table. */ + movq %rdi,%rax + lgdt (%rax) + /* Flush the prefetch q. */ + jmp 1f + nop +1: /* Reload "stale" selectors. */ + movl $GSEL(GDATA_SEL, SEL_KPL),%eax + movl %eax,%ds + movl %eax,%es + movl %eax,%ss + /* Reload code selector by doing intersegment return. */ + popq %rax + pushq $GSEL(GCODE_SEL, SEL_KPL) + pushq %rax + lretq + +ENTRY(setjmp) + /* + * Only save registers that must be preserved across function + * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) + * and %rip. + */ + movq %rdi,%rax + movq %rbx,(%rax) + movq %rsp,8(%rax) + movq %rbp,16(%rax) + movq %r12,24(%rax) + movq %r13,32(%rax) + movq %r14,40(%rax) + movq %r15,48(%rax) + movq (%rsp),%rdx + movq %rdx,56(%rax) + xorl %eax,%eax + ret + +ENTRY(longjmp) + movq %rdi,%rax + movq (%rax),%rbx + movq 8(%rax),%rsp + movq 16(%rax),%rbp + movq 24(%rax),%r12 + movq 32(%rax),%r13 + movq 40(%rax),%r14 + movq 48(%rax),%r15 + movq 56(%rax),%rdx + movq %rdx,(%rsp) + xorl %eax,%eax + incl %eax + ret + +/*****************************************************************************/ + +/* + * int cpu_switchto(struct proc *old, struct proc *new) + * Switch from "old" proc to "new". + */ +ENTRY(cpu_switchto) + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movq %rdi, %r13 + movq %rsi, %r12 + + /* Record new proc. */ + movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC + SET_CURPROC(%r12,%rcx) + + movl CPUVAR(CPUID),%edi + + /* If old proc exited, don't bother. */ + testq %r13,%r13 + jz switch_exited + + /* + * Save old context. + * + * Registers: + * %rax, %rcx - scratch + * %r13 - old proc, then old pcb + * %r12 - new proc + * %edi - cpuid + */ + + movq P_ADDR(%r13),%r13 + + /* clear the old pmap's bit for the cpu */ + movq PCB_PMAP(%r13),%rcx + lock + btrq %rdi,PM_CPUS(%rcx) + + /* Save stack pointers. */ + movq %rsp,PCB_RSP(%r13) + movq %rbp,PCB_RBP(%r13) + +switch_exited: + /* did old proc run in userspace? then reset the segment regs */ + btrl $CPUF_USERSEGS_BIT, CPUVAR(FLAGS) + jnc restore_saved + + /* set %ds, %es, and %fs to expected value to prevent info leak */ + movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%fs + +restore_saved: + /* + * Restore saved context. + * + * Registers: + * %rax, %rcx, %rdx - scratch + * %r13 - new pcb + * %r12 - new process + */ + + /* No interrupts while loading new state. */ + cli + movq P_ADDR(%r12),%r13 + + /* Restore stack pointers. */ + movq PCB_RSP(%r13),%rsp + movq PCB_RBP(%r13),%rbp + + movq CPUVAR(TSS),%rcx + movq PCB_KSTACK(%r13),%rdx + movq %rdx,TSS_RSP0(%rcx) + + movq PCB_CR3(%r13),%rax + movq %rax,%cr3 + + /* Don't bother with the rest if switching to a system process. */ + testl $P_SYSTEM,P_FLAG(%r12) + jnz switch_restored + + /* set the new pmap's bit for the cpu */ + movl CPUVAR(CPUID),%edi + movq PCB_PMAP(%r13),%rcx + lock + btsq %rdi,PM_CPUS(%rcx) +#ifdef DIAGNOSTIC + jc _C_LABEL(switch_pmcpu_set) +#endif + +switch_restored: + /* Restore cr0 (including FPU state). */ + movl PCB_CR0(%r13),%ecx +#ifdef MULTIPROCESSOR + movq PCB_FPCPU(%r13),%r8 + cmpq CPUVAR(SELF),%r8 + jz 1f + orl $CR0_TS,%ecx +1: +#endif + movq %rcx,%cr0 + + SET_CURPCB(%r13) + + /* Interrupts are okay again. */ + sti + +switch_return: + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +ENTRY(cpu_idle_enter) + movq _C_LABEL(cpu_idle_enter_fcn),%rax + cmpq $0,%rax + je 1f + jmpq *%rax +1: + ret + +ENTRY(cpu_idle_cycle) + movq _C_LABEL(cpu_idle_cycle_fcn),%rax + cmpq $0,%rax + je 1f + call *%rax + ret +1: + sti + hlt + ret + +ENTRY(cpu_idle_leave) + movq _C_LABEL(cpu_idle_leave_fcn),%rax + cmpq $0,%rax + je 1f + jmpq *%rax +1: + ret + + .globl _C_LABEL(panic) + +#ifdef DIAGNOSTIC +NENTRY(switch_pmcpu_set) + movabsq $1f,%rdi + call _C_LABEL(panic) + /* NOTREACHED */ +1: .asciz "activate already active pmap" +#endif /* DIAGNOSTIC */ + +/* + * savectx(struct pcb *pcb); + * Update pcb, saving current processor state. + */ +ENTRY(savectx) + /* Save stack pointers. */ + movq %rsp,PCB_RSP(%rdi) + movq %rbp,PCB_RBP(%rdi) + + ret + +IDTVEC(syscall32) + sysret /* go away please */ + +/* + * syscall insn entry. This currently isn't much faster, but + * it can be made faster in the future. + */ +IDTVEC(syscall) + /* + * Enter here with interrupts blocked; %rcx contains the caller's + * %rip and the original rflags has been copied to %r11. %cs and + * %ss have been updated to the kernel segments, but %rsp is still + * the user-space value. + * First order of business is to swap to the kernel gs.base so that + * we can access our struct cpu_info and use the scratch space there + * to switch to our kernel stack. Once that's in place we can + * unblock interrupts and save the rest of the syscall frame. + */ + swapgs + movq %r15,CPUVAR(SCRATCH) + movq CPUVAR(CURPCB),%r15 + movq PCB_KSTACK(%r15),%r15 + xchgq %r15,%rsp + sti + + /* + * XXX don't need this whole frame, split of the + * syscall frame and trapframe is needed. + * First, leave some room for the trapno, error, + * ss:rsp, etc, so that all GP registers can be + * saved. Then, fill in the rest. + */ + pushq $(GSEL(GUDATA_SEL, SEL_UPL)) + pushq %r15 + subq $(TF_RSP-TF_TRAPNO),%rsp + movq CPUVAR(SCRATCH),%r15 + subq $32,%rsp + INTR_SAVE_GPRS + movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */ + movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp) + movq %rcx,TF_RIP(%rsp) + movq $2,TF_ERR(%rsp) /* ignored */ + + movq CPUVAR(CURPROC),%r14 + movq %rsp,P_MD_REGS(%r14) # save pointer to frame + andl $~MDP_IRET,P_MD_FLAGS(%r14) + movq %rsp,%rdi + call _C_LABEL(syscall) + +.Lsyscall_check_asts: + /* Check for ASTs on exit to user mode. */ + cli + CHECK_ASTPENDING(%r11) + je 2f + CLEAR_ASTPENDING(%r11) + sti + movq %rsp,%rdi + call _C_LABEL(ast) + jmp .Lsyscall_check_asts + +2: +#ifdef DIAGNOSTIC + cmpl $IPL_NONE,CPUVAR(ILEVEL) + jne .Lsyscall_spl_not_lowered +#endif /* DIAGNOSTIC */ + + /* Could registers have been changed that require an iretq? */ + testl $MDP_IRET, P_MD_FLAGS(%r14) + jne intr_fast_exit + + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_R10(%rsp),%r10 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + movq TF_RBP(%rsp),%rbp + movq TF_RBX(%rsp),%rbx + + INTR_RESTORE_SELECTORS + + movq TF_RDX(%rsp),%rdx + movq TF_RAX(%rsp),%rax + + movq TF_RIP(%rsp),%rcx + movq TF_RFLAGS(%rsp),%r11 + movq TF_RSP(%rsp),%rsp + sysretq + +#ifdef DIAGNOSTIC +.Lsyscall_spl_not_lowered: + movabsq $4f, %rdi + movl TF_RAX(%rsp),%esi + movl TF_RDI(%rsp),%edx + movl %ebx,%ecx + movl CPUVAR(ILEVEL),%r8d + xorq %rax,%rax + call _C_LABEL(printf) +#ifdef DDB + int $3 +#endif /* DDB */ + movl $IPL_NONE,CPUVAR(ILEVEL) + jmp .Lsyscall_check_asts +4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" +#endif + + +NENTRY(proc_trampoline) +#ifdef MULTIPROCESSOR + call _C_LABEL(proc_trampoline_mp) +#endif + movl $IPL_NONE,CPUVAR(ILEVEL) + movq %r13,%rdi + call *%r12 + movq CPUVAR(CURPROC),%r14 + jmp .Lsyscall_check_asts + + +/* + * Return via iretq, for real interrupts and signal returns + */ +NENTRY(intr_fast_exit) + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_R10(%rsp),%r10 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + movq TF_RBP(%rsp),%rbp + movq TF_RBX(%rsp),%rbx + + testq $SEL_RPL,TF_CS(%rsp) + je 5f + + INTR_RESTORE_SELECTORS + +5: movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R11(%rsp),%r11 + movq TF_RAX(%rsp),%rax + +#if !defined(GPROF) && defined(DDBPROF) + /* + * If we are returning from a probe trap we need to fix the + * stack layout and emulate the patched instruction. + * + * The code below does that by trashing %rax, so it MUST be + * restored afterward. + */ + cmpl $INTR_FAKE_TRAP, TF_ERR(%rsp) + je .Lprobe_fixup +#endif /* !defined(GPROF) && defined(DDBPROF) */ + + addq $TF_RIP,%rsp + + .globl _C_LABEL(doreti_iret) +_C_LABEL(doreti_iret): + iretq + + +#if !defined(GPROF) && defined(DDBPROF) +.Lprobe_fixup: + /* Reserve enough room to emulate "pushq %rbp". */ + subq $16, %rsp + + /* Shift hardware-saved registers. */ + movq (TF_RIP + 16)(%rsp), %rax + movq %rax, TF_RIP(%rsp) + movq (TF_CS + 16)(%rsp), %rax + movq %rax, TF_CS(%rsp) + movq (TF_RFLAGS + 16)(%rsp), %rax + movq %rax, TF_RFLAGS(%rsp) + movq (TF_RSP + 16)(%rsp), %rax + movq %rax, TF_RSP(%rsp) + movq (TF_SS + 16)(%rsp), %rax + movq %rax, TF_SS(%rsp) + + /* Pull 8 bytes off the stack and store %rbp in the expected location.*/ + movq TF_RSP(%rsp), %rax + subq $8, %rax + movq %rax, TF_RSP(%rsp) + movq %rbp, (%rax) + + /* Write back overwritten %rax */ + movq (TF_RAX + 16)(%rsp),%rax + + addq $TF_RIP,%rsp + iretq +#endif /* !defined(GPROF) && defined(DDBPROF) */ + +ENTRY(pagezero) + movq $-PAGE_SIZE,%rdx + subq %rdx,%rdi + xorq %rax,%rax +1: + movnti %rax,(%rdi,%rdx) + movnti %rax,8(%rdi,%rdx) + movnti %rax,16(%rdi,%rdx) + movnti %rax,24(%rdi,%rdx) + addq $32,%rdx + jne 1b + sfence + ret + +#if NXEN > 0 + /* Hypercall page needs to be page aligned */ + .text + .align NBPG, 0xcc + .globl _C_LABEL(xen_hypercall_page) +_C_LABEL(xen_hypercall_page): + .skip 0x1000, 0xcc +#endif /* NXEN > 0 */ + +#if NHYPERV > 0 + /* Hypercall page needs to be page aligned */ + .text + .align NBPG, 0xcc + .globl _C_LABEL(hv_hypercall_page) +_C_LABEL(hv_hypercall_page): + .skip 0x1000, 0xcc +#endif /* NXEN > 0 */ Index: arch/amd64/conf/Makefile.amd64 =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/Makefile.amd64,v retrieving revision 1.76 diff -u -p -u -r1.76 Makefile.amd64 --- arch/amd64/conf/Makefile.amd64 8 May 2017 00:13:38 -0000 1.76 +++ arch/amd64/conf/Makefile.amd64 30 May 2017 07:28:14 -0000 @@ -30,6 +30,7 @@ CWARNFLAGS= -Werror -Wall -Wimplicit-fun CMACHFLAGS= -mcmodel=kernel -mno-red-zone -mno-sse2 -mno-sse -mno-3dnow \ -mno-mmx -msoft-float -fno-omit-frame-pointer CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS} +SORTR= sort -R .if ${IDENT:M-DNO_PROPOLICE} CMACHFLAGS+= -fno-stack-protector .endif @@ -38,6 +39,7 @@ CMACHFLAGS+= -msave-args .endif .if ${IDENT:M-DSMALL_KERNEL} CMACHFLAGS+= -Wa,-n +SORTR= cat .endif DEBUG?= -g @@ -73,12 +75,13 @@ NORMAL_S= ${CC} ${AFLAGS} ${CPPFLAGS} -c # ${SYSTEM_LD_HEAD} # ${SYSTEM_LD} swapxxx.o # ${SYSTEM_LD_TAIL} -SYSTEM_HEAD= locore.o param.o ioconf.o -SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS} +SYSTEM_HEAD= locore.o gap.o +SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o SYSTEM_DEP= Makefile ${SYSTEM_OBJ} ${LDSCRIPT} SYSTEM_LD_HEAD= @rm -f $@ SYSTEM_LD= @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o $${OBJS}'; \ - ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS} + echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > lorder; \ + ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder` SYSTEM_LD_TAIL= @${SIZE} $@; chmod 755 $@ .if ${DEBUG} == "-g" @@ -122,8 +125,16 @@ vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP} sh $S/conf/newvers.sh ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c +gap.S: ${SYSTEM_SWAP_DEP} Makefile + #echo "#include <machine/asm.h>\n\t.text\n\t.space $$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 16,0xcc" > gap.S + sh $S/conf/makegap.sh > gap.S + +gap.o: gap.S + ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S + clean: - rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c + rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} \ + gap.S ldorder param.c cleandir: clean rm -f Makefile *.h ioconf.c options machine ${_mach} vers.c @@ -136,7 +147,8 @@ db_structinfo.h: $S/ddb/db_structinfo.c rm -f db_structinfo.o locore.o: ${_machdir}/${_mach}/locore.S assym.h -mutex.o vector.o copy.o spl.o mptramp.o acpi_wakecode.o vmm_support.o: assym.h +locore2.o mutex.o vector.o copy.o spl.o: assym.h +mptramp.o acpi_wakecode.o vmm_support.o: assym.h # The install target can be redefined by putting a # install-kernel-${MACHINE_NAME} target into /etc/mk.conf Index: arch/amd64/conf/files.amd64 =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/files.amd64,v retrieving revision 1.88 diff -u -p -u -r1.88 files.amd64 --- arch/amd64/conf/files.amd64 30 Apr 2017 13:04:49 -0000 1.88 +++ arch/amd64/conf/files.amd64 28 May 2017 13:19:03 -0000 @@ -11,6 +11,7 @@ file arch/amd64/amd64/machdep.c file arch/amd64/amd64/hibernate_machdep.c hibernate file arch/amd64/amd64/identcpu.c file arch/amd64/amd64/via.c +file arch/amd64/amd64/locore2.S file arch/amd64/amd64/aes_intel.S crypto file arch/amd64/amd64/aesni.c crypto file arch/amd64/amd64/amd64errata.c Index: arch/i386/conf/Makefile.i386 =================================================================== RCS file: /cvs/src/sys/arch/i386/conf/Makefile.i386,v retrieving revision 1.103 diff -u -p -u -r1.103 Makefile.i386 --- arch/i386/conf/Makefile.i386 28 May 2017 13:20:37 -0000 1.103 +++ arch/i386/conf/Makefile.i386 30 May 2017 07:26:53 -0000 @@ -29,9 +29,13 @@ CWARNFLAGS= -Werror -Wall -Wimplicit-fun CMACHFLAGS= CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS} +SORTR= sort -R .if ${IDENT:M-DNO_PROPOLICE} CMACHFLAGS+= -fno-stack-protector .endif + .if ${IDENT:M-DSMALL_KERNEL} +SORTR= cat +.endif DEBUG?= -g COPTS?= -O2 @@ -72,12 +76,13 @@ NORMAL_S= ${CC} ${AFLAGS} ${CPPFLAGS} -c # ${SYSTEM_LD_HEAD} # ${SYSTEM_LD} swapxxx.o # ${SYSTEM_LD_TAIL} -SYSTEM_HEAD= locore.o param.o ioconf.o -SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS} +SYSTEM_HEAD= locore.o gap.o +SYSTEM_OBJ= ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o SYSTEM_DEP= Makefile ${SYSTEM_OBJ} ${LDSCRIPT} SYSTEM_LD_HEAD= @rm -f $@ SYSTEM_LD= @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o $${OBJS}'; \ - ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS} + echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > lorder; \ + ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder` SYSTEM_LD_TAIL= @${SIZE} $@; chmod 755 $@ .if ${DEBUG} == "-g" @@ -120,6 +125,13 @@ ioconf.o: ioconf.c vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP} sh $S/conf/newvers.sh ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c + +gap.S: ${SYSTEM_SWAP_DEP} Makefile + #echo "#include <machine/asm.h>\n\t.text\n\t.space $$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 16,0xcc" > gap.S + sh $S/conf/makegap.sh > gap.S + +gap.o: gap.S + ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S clean: rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c Index: arch/i386/conf/files.i386 =================================================================== RCS file: /cvs/src/sys/arch/i386/conf/files.i386,v retrieving revision 1.232 diff -u -p -u -r1.232 files.i386 --- arch/i386/conf/files.i386 30 Apr 2017 13:04:49 -0000 1.232 +++ arch/i386/conf/files.i386 29 May 2017 12:27:14 -0000 @@ -23,6 +23,7 @@ file arch/i386/i386/in_cksum.s file arch/i386/i386/machdep.c file arch/i386/i386/hibernate_machdep.c hibernate file arch/i386/i386/via.c +file arch/i386/i386/locore2.S file arch/i386/i386/amd64errata.c !small_kernel file arch/i386/i386/longrun.c !small_kernel file arch/i386/i386/mem.c Index: arch/i386/i386/autoconf.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/autoconf.c,v retrieving revision 1.101 diff -u -p -u -r1.101 autoconf.c --- arch/i386/i386/autoconf.c 8 Jun 2016 17:24:44 -0000 1.101 +++ arch/i386/i386/autoconf.c 29 May 2017 13:07:46 -0000 @@ -109,6 +109,21 @@ void viac3_crypto_setup(void); extern int i386_has_xcrypt; #endif +void +unmap_startup(void) +{ + extern void *kernel_text, *endboot; + vaddr_t p; + + printf("unmap kernel init code %lx-%lx\n", + (vaddr_t)&kernel_text, (vaddr_t)&endboot); + p = (vaddr_t)&kernel_text; + do { + pmap_kremove(p, PAGE_SIZE); + p += NBPG; + } while (p < (vaddr_t)&endboot); +} + /* * Determine i/o configuration for a machine. */ @@ -154,6 +169,8 @@ cpu_configure(void) proc0.p_addr->u_pcb.pcb_cr0 = rcr0(); + unmap_startup(); + #ifdef MULTIPROCESSOR /* propagate TSS configuration to the idle pcb's. */ cpu_init_idle_pcbs(); @@ -165,6 +182,7 @@ cpu_configure(void) * until we can checksum blocks to figure it out. */ cold = 0; + /* * At this point the RNG is running, and if FSXR is set we can Index: arch/i386/i386/locore.s =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v retrieving revision 1.173 diff -u -p -u -r1.173 locore.s --- arch/i386/i386/locore.s 12 May 2017 08:46:28 -0000 1.173 +++ arch/i386/i386/locore.s 30 May 2017 07:53:26 -0000 @@ -55,206 +55,20 @@ #include <dev/isa/isareg.h> -#if NLAPIC > 0 -#include <machine/i82489reg.h> -#endif - -#ifndef SMALL_KERNEL -/* - * As stac/clac SMAP instructions are 3 bytes, we want the fastest - * 3 byte nop sequence possible here. This will be replaced by - * stac/clac instructions if SMAP is detected after booting. - * - * Intel documents multi-byte NOP sequences as being available - * on all family 0x6 and 0xf processors (ie 686+) - * So use 3 of the single byte nops for compatibility - */ -#define SMAP_NOP .byte 0x90, 0x90, 0x90 -#define SMAP_STAC CODEPATCH_START ;\ - SMAP_NOP ;\ - CODEPATCH_END(CPTAG_STAC) -#define SMAP_CLAC CODEPATCH_START ;\ - SMAP_NOP ;\ - CODEPATCH_END(CPTAG_CLAC) - -#else - -#define SMAP_STAC -#define SMAP_CLAC - -#endif - - /* * override user-land alignment before including asm.h */ #define ALIGN_DATA .align 4 #define ALIGN_TEXT .align 4,0x90 /* 4-byte boundaries, NOP-filled */ -#define SUPERALIGN_TEXT .align 16,0x90 /* 16-byte boundaries better for 486 */ #define _ALIGN_TEXT ALIGN_TEXT #include <machine/asm.h> -#define CPL _C_LABEL(lapic_tpr) - -#define GET_CURPCB(reg) \ - movl CPUVAR(CURPCB), reg - -#define CHECK_ASTPENDING(treg) \ - movl CPUVAR(CURPROC),treg ; \ - cmpl $0, treg ; \ - je 1f ; \ - cmpl $0,P_MD_ASTPENDING(treg) ; \ - 1: - -#define CLEAR_ASTPENDING(cpreg) \ - movl $0,P_MD_ASTPENDING(cpreg) - -/* - * These are used on interrupt or trap entry or exit. - */ -#define INTRENTRY \ - cld ; \ - pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ebx ; \ - pushl %ebp ; \ - pushl %esi ; \ - pushl %edi ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %gs ; \ - movl $GSEL(GDATA_SEL, SEL_KPL),%eax ; \ - movw %ax,%ds ; \ - movw %ax,%es ; \ - xorl %eax,%eax ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \ - movw %ax,%gs ; \ - pushl %fs ; \ - movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \ - movw %ax,%fs - -#define INTR_RESTORE_ALL \ - popl %fs ; \ - popl %gs ; \ - popl %es ; \ - popl %ds ; \ - popl %edi ; \ - popl %esi ; \ - popl %ebp ; \ - popl %ebx ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax - -#define INTRFASTEXIT \ - INTR_RESTORE_ALL ;\ - addl $8,%esp ; \ - iret - -#define INTR_FAKE_TRAP 0xbadabada - -/* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). - */ - .globl _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde) - .set _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT) - .set _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG) - .set _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4) # XXX 4 == sizeof pde - -/* - * APTmap, APTD is the alternate recursive pagemap. - * It's used when modifying another process's page tables. - */ - .globl _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde) - .set _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT) - .set _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG) - # XXX 4 == sizeof pde - .set _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4) - /* * Initialization */ .data - .globl _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor) - .globl _C_LABEL(cpu_brandstr) - .globl _C_LABEL(cpuid_level) - .globl _C_LABEL(cpu_miscinfo) - .globl _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature) - .globl _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature) - .globl _C_LABEL(ecpu_ecxfeature) - .globl _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx) - .globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx) - .globl _C_LABEL(cpu_perf_eax) - .globl _C_LABEL(cpu_perf_ebx) - .globl _C_LABEL(cpu_perf_edx) - .globl _C_LABEL(cpu_apmi_edx) - .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) - .globl _C_LABEL(cpu_pae) - .globl _C_LABEL(esym) - .globl _C_LABEL(ssym) - .globl _C_LABEL(nkptp_max) - .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) - .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize) - .globl _C_LABEL(gdt) - .globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv) - .globl _C_LABEL(lapic_tpr) - -#if NLAPIC > 0 - .align NBPG - .globl _C_LABEL(local_apic), _C_LABEL(lapic_id) -_C_LABEL(local_apic): - .space LAPIC_ID -_C_LABEL(lapic_id): - .long 0x00000000 - .space LAPIC_TPRI-(LAPIC_ID+4) -_C_LABEL(lapic_tpr): - .space LAPIC_PPRI-LAPIC_TPRI -_C_LABEL(lapic_ppr): - .space LAPIC_ISR-LAPIC_PPRI -_C_LABEL(lapic_isr): - .space NBPG-LAPIC_ISR -#else -_C_LABEL(lapic_tpr): - .long 0 -#endif - -_C_LABEL(cpu): .long 0 # are we 386, 386sx, 486, 586 or 686 -_C_LABEL(cpu_id): .long 0 # saved from 'cpuid' instruction -_C_LABEL(cpu_pae): .long 0 # are we using PAE paging mode? -_C_LABEL(cpu_miscinfo): .long 0 # misc info (apic/brand id) from 'cpuid' -_C_LABEL(cpu_feature): .long 0 # feature flags from 'cpuid' instruction -_C_LABEL(ecpu_feature): .long 0 # extended feature flags from 'cpuid' -_C_LABEL(cpu_ecxfeature):.long 0 # ecx feature flags from 'cpuid' -_C_LABEL(ecpu_eaxfeature): .long 0 # extended eax feature flags -_C_LABEL(ecpu_ecxfeature): .long 0 # extended ecx feature flags -_C_LABEL(cpuid_level): .long -1 # max. lvl accepted by 'cpuid' insn -_C_LABEL(cpu_cache_eax):.long 0 -_C_LABEL(cpu_cache_ebx):.long 0 -_C_LABEL(cpu_cache_ecx):.long 0 -_C_LABEL(cpu_cache_edx):.long 0 -_C_LABEL(cpu_perf_eax): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_perf_ebx): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_perf_edx): .long 0 # arch. perf. mon. flags from 'cpuid' -_C_LABEL(cpu_apmi_edx): .long 0 # adv. power management info. 'cpuid' -_C_LABEL(cpu_vendor): .space 16 # vendor string returned by 'cpuid' instruction -_C_LABEL(cpu_brandstr): .space 48 # brand string returned by 'cpuid' -_C_LABEL(cold): .long 1 # cold till we are not -_C_LABEL(ssym): .long 0 # ptr to start of syms -_C_LABEL(esym): .long 0 # ptr to end of syms -_C_LABEL(cnvmem): .long 0 # conventional memory size -_C_LABEL(extmem): .long 0 # extended memory size -_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual -_C_LABEL(bootapiver): .long 0 # /boot API version -_C_LABEL(bootargc): .long 0 # /boot argc -_C_LABEL(bootargv): .long 0 # /boot argv -_C_LABEL(bootdev): .long 0 # device we booted from -_C_LABEL(proc0paddr): .long 0 -_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm -_C_LABEL(PTDsize): .long NBPG # size of PTD, for libkvm - .space 512 tmpstk: @@ -666,1092 +480,3 @@ begin: call _C_LABEL(main) /* NOTREACHED */ - -NENTRY(proc_trampoline) -#ifdef MULTIPROCESSOR - call _C_LABEL(proc_trampoline_mp) -#endif - movl $IPL_NONE,CPL - pushl %ebx - call *%esi - addl $4,%esp - INTRFASTEXIT - /* NOTREACHED */ - - /* This must come before any use of the CODEPATCH macros */ - .section .codepatch,"a" - .align 8 - .globl _C_LABEL(codepatch_begin) -_C_LABEL(codepatch_begin): - .previous - - .section .codepatchend,"a" - .globl _C_LABEL(codepatch_end) -_C_LABEL(codepatch_end): - .previous - -/*****************************************************************************/ - -/* - * Signal trampoline; copied to top of user stack. - */ - .section .rodata - .globl _C_LABEL(sigcode) -_C_LABEL(sigcode): - call *SIGF_HANDLER(%esp) - leal SIGF_SC(%esp),%eax # scp (the call may have clobbered the - # copy at SIGF_SCP(%esp)) - pushl %eax - pushl %eax # junk to fake return address - movl $SYS_sigreturn,%eax - int $0x80 # enter kernel with args on stack - .globl _C_LABEL(sigcoderet) -_C_LABEL(sigcoderet): - movl $SYS_exit,%eax - int $0x80 # exit if sigreturn fails - .globl _C_LABEL(esigcode) -_C_LABEL(esigcode): - - .globl _C_LABEL(sigfill) -_C_LABEL(sigfill): - int3 -_C_LABEL(esigfill): - - .data - .globl _C_LABEL(sigfillsiz) -_C_LABEL(sigfillsiz): - .long _C_LABEL(esigfill) - _C_LABEL(sigfill) - - .text - -/*****************************************************************************/ - -/* - * The following primitives are used to fill and copy regions of memory. - */ - -/* Frame pointer reserve on stack. */ -#ifdef DDB -#define FPADD 4 -#else -#define FPADD 0 -#endif - -/* - * kcopy(caddr_t from, caddr_t to, size_t len); - * Copy len bytes, abort on fault. - */ -ENTRY(kcopy) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - GET_CURPCB(%eax) # load curpcb into eax and set on-fault - pushl PCB_ONFAULT(%eax) - movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) - - movl 16+FPADD(%esp),%esi - movl 20+FPADD(%esp),%edi - movl 24+FPADD(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax # overlapping? - jb 1f - shrl $2,%ecx # nope, copy forward by 32-bit words - rep - movsl - movl 24+FPADD(%esp),%ecx - andl $3,%ecx # any bytes left? - rep - movsb - - GET_CURPCB(%edx) # XXX save curpcb? - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax -#ifdef DDB - leave -#endif - ret - - ALIGN_TEXT -1: addl %ecx,%edi # copy backward - addl %ecx,%esi - std - andl $3,%ecx # any fractional bytes? - decl %edi - decl %esi - rep - movsb - movl 24+FPADD(%esp),%ecx # copy remainder by 32-bit words - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - cld - - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax -#ifdef DDB - leave -#endif - ret - -/*****************************************************************************/ - -/* - * The following primitives are used to copy data in and out of the user's - * address space. - */ - -/* - * copyout(caddr_t from, caddr_t to, size_t len); - * Copy len bytes into the user's address space. - */ -ENTRY(copyout) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - pushl $0 - - movl 16+FPADD(%esp),%esi - movl 20+FPADD(%esp),%edi - movl 24+FPADD(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is writable. The 486 will do this for us; the - * 386 will not. (We assume that pages in user space that are not - * writable by the user are not writable by the kernel either.) - */ - movl %edi,%edx - addl %eax,%edx - jc _C_LABEL(copy_fault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_fault) - - GET_CURPCB(%edx) - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) - SMAP_STAC - - /* bcopy(%esi, %edi, %eax); */ - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movl %eax,%ecx - andl $3,%ecx - rep - movsb - - SMAP_CLAC - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax -#ifdef DDB - leave -#endif - ret - -/* - * copyin(caddr_t from, caddr_t to, size_t len); - * Copy len bytes from the user's address space. - */ -ENTRY(copyin) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - GET_CURPCB(%eax) - pushl $0 - movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) - SMAP_STAC - - movl 16+FPADD(%esp),%esi - movl 20+FPADD(%esp),%edi - movl 24+FPADD(%esp),%eax - - /* - * We check that the end of the destination buffer is not past the end - * of the user's address space. If it's not, then we only need to - * check that each page is readable, and the CPU will do that for us. - */ - movl %esi,%edx - addl %eax,%edx - jc _C_LABEL(copy_fault) - cmpl $VM_MAXUSER_ADDRESS,%edx - ja _C_LABEL(copy_fault) - - /* bcopy(%esi, %edi, %eax); */ - movl %eax,%ecx - shrl $2,%ecx - rep - movsl - movb %al,%cl - andb $3,%cl - rep - movsb - - SMAP_CLAC - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - xorl %eax,%eax -#ifdef DDB - leave -#endif - ret - -ENTRY(copy_fault) - SMAP_CLAC - GET_CURPCB(%edx) - popl PCB_ONFAULT(%edx) - popl %edi - popl %esi - movl $EFAULT,%eax -#ifdef DDB - leave -#endif - ret - -/* - * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, into the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - */ -ENTRY(copyoutstr) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - - movl 12+FPADD(%esp),%esi # esi = from - movl 16+FPADD(%esp),%edi # edi = to - movl 20+FPADD(%esp),%edx # edx = maxlen - -5: GET_CURPCB(%eax) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) - SMAP_STAC - /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). - */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %edi,%eax - jbe _C_LABEL(copystr_fault) # die if CF == 1 || ZF == 1 - # i.e. make sure that %edi - # is below VM_MAXUSER_ADDRESS - - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20+FPADD(%esp) - -1: incl %edx - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%edi - jae _C_LABEL(copystr_fault) - movl $ENAMETOOLONG,%eax - jmp copystr_return - -/* - * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long, from the - * user's address space. Return the number of characters copied (including the - * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else - * return 0 or EFAULT. - */ -ENTRY(copyinstr) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - GET_CURPCB(%ecx) - movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) - SMAP_STAC - - movl 12+FPADD(%esp),%esi # %esi = from - movl 16+FPADD(%esp),%edi # %edi = to - movl 20+FPADD(%esp),%edx # %edx = maxlen - - /* - * Get min(%edx, VM_MAXUSER_ADDRESS-%esi). - */ - movl $VM_MAXUSER_ADDRESS,%eax - subl %esi,%eax - jbe _C_LABEL(copystr_fault) # Error if CF == 1 || ZF == 1 - # i.e. make sure that %esi - # is below VM_MAXUSER_ADDRESS - cmpl %edx,%eax - jae 1f - movl %eax,%edx - movl %eax,20+FPADD(%esp) - -1: incl %edx - -1: decl %edx - jz 2f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp copystr_return - -2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ - cmpl $VM_MAXUSER_ADDRESS,%esi - jae _C_LABEL(copystr_fault) - movl $ENAMETOOLONG,%eax - jmp copystr_return - -ENTRY(copystr_fault) - movl $EFAULT,%eax - -copystr_return: - SMAP_CLAC - /* Set *lencopied and return %eax. */ - GET_CURPCB(%ecx) - movl $0,PCB_ONFAULT(%ecx) - movl 20+FPADD(%esp),%ecx - subl %edx,%ecx - movl 24+FPADD(%esp),%edx - testl %edx,%edx - jz 8f - movl %ecx,(%edx) - -8: popl %edi - popl %esi -#ifdef DDB - leave -#endif - ret - -/* - * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); - * Copy a NUL-terminated string, at most maxlen characters long. Return the - * number of characters copied (including the NUL) in *lencopied. If the - * string is too long, return ENAMETOOLONG; else return 0. - */ -ENTRY(copystr) -#ifdef DDB - pushl %ebp - movl %esp,%ebp -#endif - pushl %esi - pushl %edi - - movl 12+FPADD(%esp),%esi # esi = from - movl 16+FPADD(%esp),%edi # edi = to - movl 20+FPADD(%esp),%edx # edx = maxlen - incl %edx - -1: decl %edx - jz 4f - lodsb - stosb - testb %al,%al - jnz 1b - - /* Success -- 0 byte reached. */ - decl %edx - xorl %eax,%eax - jmp 6f - -4: /* edx is zero -- return ENAMETOOLONG. */ - movl $ENAMETOOLONG,%eax - -6: /* Set *lencopied and return %eax. */ - movl 20+FPADD(%esp),%ecx - subl %edx,%ecx - movl 24+FPADD(%esp),%edx - testl %edx,%edx - jz 7f - movl %ecx,(%edx) - -7: popl %edi - popl %esi -#ifdef DDB - leave -#endif - ret - -/*****************************************************************************/ - -/* - * The following is i386-specific nonsense. - */ - -/* - * void lgdt(struct region_descriptor *rdp); - * Change the global descriptor table. - */ -NENTRY(lgdt) - /* Reload the descriptor table. */ - movl 4(%esp),%eax - lgdt (%eax) - /* Flush the prefetch q. */ - jmp 1f - nop -1: /* Reload "stale" selectors. */ - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%ds - movw %ax,%es - movw %ax,%ss - movl $GSEL(GCPU_SEL, SEL_KPL),%eax - movw %ax,%fs - /* Reload code selector by doing intersegment return. */ - popl %eax - pushl $GSEL(GCODE_SEL, SEL_KPL) - pushl %eax - lret - -ENTRY(setjmp) - movl 4(%esp),%eax - movl %ebx,(%eax) # save ebx - movl %esp,4(%eax) # save esp - movl %ebp,8(%eax) # save ebp - movl %esi,12(%eax) # save esi - movl %edi,16(%eax) # save edi - movl (%esp),%edx # get rta - movl %edx,20(%eax) # save eip - xorl %eax,%eax # return (0); - ret - -ENTRY(longjmp) - movl 4(%esp),%eax - movl (%eax),%ebx # restore ebx - movl 4(%eax),%esp # restore esp - movl 8(%eax),%ebp # restore ebp - movl 12(%eax),%esi # restore esi - movl 16(%eax),%edi # restore edi - movl 20(%eax),%edx # get rta - movl %edx,(%esp) # put in return frame - xorl %eax,%eax # return (1); - incl %eax - ret - -/*****************************************************************************/ - -/* - * cpu_switchto(struct proc *old, struct proc *new) - * Switch from the "old" proc to the "new" proc. If "old" is NULL, we - * don't need to bother saving old context. - */ -ENTRY(cpu_switchto) - pushl %ebx - pushl %esi - pushl %edi - - movl 16(%esp), %esi - movl 20(%esp), %edi - - /* If old process exited, don't bother. */ - testl %esi,%esi - jz switch_exited - - /* Save old stack pointers. */ - movl P_ADDR(%esi),%ebx - movl %esp,PCB_ESP(%ebx) - movl %ebp,PCB_EBP(%ebx) - -switch_exited: - /* Restore saved context. */ - - /* No interrupts while loading new state. */ - cli - - /* Record new process. */ - movl %edi, CPUVAR(CURPROC) - movb $SONPROC, P_STAT(%edi) - - /* Restore stack pointers. */ - movl P_ADDR(%edi),%ebx - movl PCB_ESP(%ebx),%esp - movl PCB_EBP(%ebx),%ebp - - /* Record new pcb. */ - movl %ebx, CPUVAR(CURPCB) - - /* - * Activate the address space. The pcb copy of %cr3 will - * be refreshed from the pmap, and because we're - * curproc they'll both be reloaded into the CPU. - */ - pushl %edi - pushl %esi - call _C_LABEL(pmap_switch) - addl $8,%esp - - /* Load TSS info. */ - movl CPUVAR(GDT),%eax - movl P_MD_TSS_SEL(%edi),%edx - - /* Switch TSS. */ - andl $~0x0200,4-SEL_KPL(%eax,%edx,1) - ltr %dx - - /* Restore cr0 (including FPU state). */ - movl PCB_CR0(%ebx),%ecx -#ifdef MULTIPROCESSOR - /* - * If our floating point registers are on a different CPU, - * clear CR0_TS so we'll trap rather than reuse bogus state. - */ - movl CPUVAR(SELF), %esi - cmpl PCB_FPCPU(%ebx), %esi - jz 1f - orl $CR0_TS,%ecx -1: -#endif - movl %ecx,%cr0 - - /* Interrupts are okay again. */ - sti - - popl %edi - popl %esi - popl %ebx - ret - -ENTRY(cpu_idle_enter) - movl _C_LABEL(cpu_idle_enter_fcn),%eax - cmpl $0,%eax - je 1f - jmpl *%eax -1: - ret - -ENTRY(cpu_idle_cycle) - movl _C_LABEL(cpu_idle_cycle_fcn),%eax - cmpl $0,%eax - je 1f - call *%eax - ret -1: - sti - hlt - ret - -ENTRY(cpu_idle_leave) - movl _C_LABEL(cpu_idle_leave_fcn),%eax - cmpl $0,%eax - je 1f - jmpl *%eax -1: - ret - -/* - * savectx(struct pcb *pcb); - * Update pcb, saving current processor state. - */ -ENTRY(savectx) - movl 4(%esp),%edx # edx = p->p_addr - - /* Save stack pointers. */ - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - - movl PCB_FLAGS(%edx),%ecx - orl $PCB_SAVECTX,%ecx - movl %ecx,PCB_FLAGS(%edx) - - ret - -/*****************************************************************************/ - -/* - * Trap and fault vector routines - * - * On exit from the kernel to user mode, we always need to check for ASTs. In - * addition, we need to do this atomically; otherwise an interrupt may occur - * which causes an AST, but it won't get processed until the next kernel entry - * (possibly the next clock tick). Thus, we disable interrupt before checking, - * and only enable them again on the final `iret' or before calling the AST - * handler. - */ -#define IDTVEC(name) ALIGN_TEXT; .globl X##name; X##name: - -#define TRAP(a) pushl $(a) ; jmp _C_LABEL(alltraps) -#define ZTRAP(a) pushl $0 ; TRAP(a) - - - .text -IDTVEC(div) - ZTRAP(T_DIVIDE) -IDTVEC(dbg) - subl $4,%esp - pushl %eax - movl %dr6,%eax - movl %eax,4(%esp) - andb $~0xf,%al - movl %eax,%dr6 - popl %eax - TRAP(T_TRCTRAP) -IDTVEC(nmi) - ZTRAP(T_NMI) -IDTVEC(bpt) - ZTRAP(T_BPTFLT) -IDTVEC(ofl) - ZTRAP(T_OFLOW) -IDTVEC(bnd) - ZTRAP(T_BOUND) -IDTVEC(ill) - ZTRAP(T_PRIVINFLT) -IDTVEC(dna) -#if NNPX > 0 - pushl $0 # dummy error code - pushl $T_DNA - INTRENTRY -#ifdef MULTIPROCESSOR - pushl CPUVAR(SELF) -#else - pushl $_C_LABEL(cpu_info_primary) -#endif - call *_C_LABEL(npxdna_func) - addl $4,%esp - testl %eax,%eax - jz calltrap - INTRFASTEXIT -#else - ZTRAP(T_DNA) -#endif -IDTVEC(dble) - TRAP(T_DOUBLEFLT) -IDTVEC(fpusegm) - ZTRAP(T_FPOPFLT) -IDTVEC(tss) - TRAP(T_TSSFLT) -IDTVEC(missing) - TRAP(T_SEGNPFLT) -IDTVEC(stk) - TRAP(T_STKFLT) -IDTVEC(prot) - TRAP(T_PROTFLT) -IDTVEC(f00f_redirect) - pushl $T_PAGEFLT - INTRENTRY - testb $PGEX_U,TF_ERR(%esp) - jnz calltrap - movl %cr2,%eax - subl _C_LABEL(idt),%eax - cmpl $(6*8),%eax - jne calltrap - movb $T_PRIVINFLT,TF_TRAPNO(%esp) - jmp calltrap -IDTVEC(page) - TRAP(T_PAGEFLT) -IDTVEC(rsvd) - ZTRAP(T_RESERVED) -IDTVEC(mchk) - ZTRAP(T_MACHK) -IDTVEC(simd) - ZTRAP(T_XFTRAP) -IDTVEC(intrspurious) - /* - * The Pentium Pro local APIC may erroneously call this vector for a - * default IR7. Just ignore it. - * - * (The local APIC does this when CPL is raised while it's on the - * way to delivering an interrupt.. presumably enough has been set - * up that it's inconvenient to abort delivery completely..) - */ - iret -IDTVEC(fpu) -#if NNPX > 0 - /* - * Handle like an interrupt so that we can call npxintr to clear the - * error. It would be better to handle npx interrupts as traps but - * this is difficult for nested interrupts. - */ - subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY - pushl CPL # if_ppl in intrframe - pushl %esp # push address of intrframe - incl _C_LABEL(uvmexp)+V_TRAP - call _C_LABEL(npxintr) - addl $8,%esp # pop address and if_ppl - INTRFASTEXIT -#else - ZTRAP(T_ARITHTRAP) -#endif -IDTVEC(align) - ZTRAP(T_ALIGNFLT) - /* 18 - 31 reserved for future exp */ - -/* - * If an error is detected during trap, syscall, or interrupt exit, trap() will - * change %eip to point to one of these labels. We clean up the stack, if - * necessary, and resume as if we were handling a general protection fault. - * This will cause the process to get a SIGBUS. - */ -NENTRY(resume_iret) - ZTRAP(T_PROTFLT) -NENTRY(resume_pop_ds) - pushl %es - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movw %ax,%es -NENTRY(resume_pop_es) - pushl %gs - xorl %eax,%eax /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ - movw %ax,%gs -NENTRY(resume_pop_gs) - pushl %fs - movl $GSEL(GCPU_SEL, SEL_KPL),%eax - movw %ax,%fs -NENTRY(resume_pop_fs) - movl $T_PROTFLT,TF_TRAPNO(%esp) - sti - jmp calltrap - -/* - * All traps go through here. Call the generic trap handler, and - * check for ASTs afterwards. - */ -NENTRY(alltraps) - INTRENTRY - sti -calltrap: -#ifdef DIAGNOSTIC - movl CPL,%ebx -#endif /* DIAGNOSTIC */ -#if !defined(GPROF) && defined(DDBPROF) - cmpl $T_BPTFLT,TF_TRAPNO(%esp) - jne .Lreal_trap - - pushl %esp - call _C_LABEL(db_prof_hook) - addl $4,%esp - cmpl $1,%eax - jne .Lreal_trap - - /* - * Abuse the error field to indicate that INTRFASTEXIT needs - * to emulate the patched instruction. - */ - movl $INTR_FAKE_TRAP, TF_ERR(%esp) - jz 2f -.Lreal_trap: -#endif /* !defined(GPROF) && defined(DDBPROF) */ - pushl %esp - call _C_LABEL(trap) - addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%ecx) - je 1f - testb $SEL_RPL,TF_CS(%esp) -#ifdef VM86 - jnz 5f - testl $PSL_VM,TF_EFLAGS(%esp) -#endif - jz 1f -5: CLEAR_ASTPENDING(%ecx) - sti - pushl %esp - call _C_LABEL(ast) - addl $4,%esp - jmp 2b -1: -#if !defined(GPROF) && defined(DDBPROF) - /* - * If we are returning from a probe trap we need to fix the - * stack layout and emulate the patched instruction. - * - * The code below does that by trashing %eax, so it MUST be - * restored afterward. - */ - cmpl $INTR_FAKE_TRAP, TF_ERR(%esp) - je .Lprobe_fixup -#endif /* !defined(GPROF) && defined(DDBPROF) */ -#ifndef DIAGNOSTIC - INTRFASTEXIT -#else - cmpl CPL,%ebx - jne 3f - INTRFASTEXIT -3: sti - pushl $4f - call _C_LABEL(printf) - addl $4,%esp -#if defined(DDB) && 0 - int $3 -#endif /* DDB */ - movl %ebx,CPL - jmp 2b -4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" -#endif /* DIAGNOSTIC */ - -#if !defined(GPROF) && defined(DDBPROF) -.Lprobe_fixup: - /* Restore all register unwinding the stack. */ - INTR_RESTORE_ALL - - /* - * Use the space left by ``err'' and ``trapno'' to emulate - * "pushl %ebp". - * - * Temporarily save %eax. - */ - movl %eax,0(%esp) - - /* Shift hardware-saved registers: eip, cs, eflags */ - movl 8(%esp),%eax - movl %eax,4(%esp) - movl 12(%esp),%eax - movl %eax,8(%esp) - movl 16(%esp),%eax - movl %eax,12(%esp) - - /* Store %ebp in the expected location to finish the emulation. */ - movl %ebp,16(%esp) - - popl %eax - iret -#endif /* !defined(GPROF) && defined(DDBPROF) */ -/* - * Trap gate entry for syscall - */ -IDTVEC(syscall) - subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY - pushl %esp - call _C_LABEL(syscall) - addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ - cli - CHECK_ASTPENDING(%ecx) - je 1f - /* Always returning to user mode here. */ - CLEAR_ASTPENDING(%ecx) - sti - pushl %esp - call _C_LABEL(ast) - addl $4,%esp - jmp 2b -1: INTRFASTEXIT - -#include <i386/i386/vector.s> -#include <i386/isa/icu.s> - -/* - * bzero (void *b, size_t len) - * write len zero bytes to the string b. - */ - -ENTRY(bzero) - pushl %edi - movl 8(%esp),%edi - movl 12(%esp),%edx - - xorl %eax,%eax /* set fill data to 0 */ - - /* - * if the string is too short, it's really not worth the overhead - * of aligning to word boundaries, etc. So we jump to a plain - * unaligned set. - */ - cmpl $16,%edx - jb 7f - - movl %edi,%ecx /* compute misalignment */ - negl %ecx - andl $3,%ecx - subl %ecx,%edx - rep /* zero until word aligned */ - stosb - - cmpl $CPUCLASS_486,_C_LABEL(cpu_class) - jne 8f - - movl %edx,%ecx - shrl $6,%ecx - jz 8f - andl $63,%edx -1: movl %eax,(%edi) - movl %eax,4(%edi) - movl %eax,8(%edi) - movl %eax,12(%edi) - movl %eax,16(%edi) - movl %eax,20(%edi) - movl %eax,24(%edi) - movl %eax,28(%edi) - movl %eax,32(%edi) - movl %eax,36(%edi) - movl %eax,40(%edi) - movl %eax,44(%edi) - movl %eax,48(%edi) - movl %eax,52(%edi) - movl %eax,56(%edi) - movl %eax,60(%edi) - addl $64,%edi - decl %ecx - jnz 1b - -8: movl %edx,%ecx /* zero by words */ - shrl $2,%ecx - andl $3,%edx - rep - stosl - -7: movl %edx,%ecx /* zero remainder bytes */ - rep - stosb - - popl %edi - ret - -#if !defined(SMALL_KERNEL) -ENTRY(sse2_pagezero) - pushl %ebx - movl 8(%esp),%ecx - movl %ecx,%eax - addl $4096,%eax - xor %ebx,%ebx -1: - movnti %ebx,(%ecx) - addl $4,%ecx - cmpl %ecx,%eax - jne 1b - sfence - popl %ebx - ret - -ENTRY(i686_pagezero) - pushl %edi - pushl %ebx - - movl 12(%esp), %edi - movl $1024, %ecx - - ALIGN_TEXT -1: - xorl %eax, %eax - repe - scasl - jnz 2f - - popl %ebx - popl %edi - ret - - ALIGN_TEXT - -2: - incl %ecx - subl $4, %edi - - movl %ecx, %edx - cmpl $16, %ecx - - jge 3f - - movl %edi, %ebx - andl $0x3f, %ebx - shrl %ebx - shrl %ebx - movl $16, %ecx - subl %ebx, %ecx - -3: - subl %ecx, %edx - rep - stosl - - movl %edx, %ecx - testl %edx, %edx - jnz 1b - - popl %ebx - popl %edi - ret -#endif - -/* - * int cpu_paenable(void *); - */ -ENTRY(cpu_paenable) - movl $-1, %eax - testl $CPUID_PAE, _C_LABEL(cpu_feature) - jz 1f - - pushl %esi - pushl %edi - movl 12(%esp), %esi - movl %cr3, %edi - orl $0xfe0, %edi /* PDPT will be in the last four slots! */ - movl %edi, %cr3 - addl $KERNBASE, %edi /* and make it back virtual again */ - movl $8, %ecx - rep - movsl - - movl $MSR_EFER, %ecx - rdmsr - orl $EFER_NXE, %eax - wrmsr - - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 /* BANG!!! */ - - movl 12(%esp), %eax - subl $KERNBASE, %eax - movl %eax, %cr3 /* reload real PDPT */ - movl $4*NBPG, %eax - movl %eax, _C_LABEL(PTDsize) - - xorl %eax, %eax - popl %edi - popl %esi -1: - ret - -#if NLAPIC > 0 -#include <i386/i386/apicvec.s> -#endif - -#include <i386/i386/mutex.S> - -.globl _C_LABEL(_stac) -_C_LABEL(_stac): - stac - -.globl _C_LABEL(_clac) -_C_LABEL(_clac): - clac Index: arch/i386/i386/locore2.S =================================================================== RCS file: arch/i386/i386/locore2.S diff -N arch/i386/i386/locore2.S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ arch/i386/i386/locore2.S 30 May 2017 07:52:22 -0000 @@ -0,0 +1,1346 @@ +/* $OpenBSD: locore.s,v 1.173 2017/05/12 08:46:28 mpi Exp $ */ +/* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ + +/*- + * Copyright (c) 1993, 1994, 1995 Charles M. Hannum. All rights reserved. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)locore.s 7.3 (Berkeley) 5/13/91 + */ + +#include "npx.h" +#include "assym.h" +#include "apm.h" +#include "lapic.h" +#include "ksyms.h" + +#include <sys/errno.h> +#include <sys/syscall.h> + +#include <machine/codepatch.h> +#include <machine/cputypes.h> +#include <machine/param.h> +#include <machine/pte.h> +#include <machine/segments.h> +#include <machine/specialreg.h> +#include <machine/trap.h> + +#include <dev/isa/isareg.h> + +#if NLAPIC > 0 +#include <machine/i82489reg.h> +#endif + +#ifndef SMALL_KERNEL +/* + * As stac/clac SMAP instructions are 3 bytes, we want the fastest + * 3 byte nop sequence possible here. This will be replaced by + * stac/clac instructions if SMAP is detected after booting. + * + * Intel documents multi-byte NOP sequences as being available + * on all family 0x6 and 0xf processors (ie 686+) + * So use 3 of the single byte nops for compatibility + */ +#define SMAP_NOP .byte 0x90, 0x90, 0x90 +#define SMAP_STAC CODEPATCH_START ;\ + SMAP_NOP ;\ + CODEPATCH_END(CPTAG_STAC) +#define SMAP_CLAC CODEPATCH_START ;\ + SMAP_NOP ;\ + CODEPATCH_END(CPTAG_CLAC) + +#else + +#define SMAP_STAC +#define SMAP_CLAC + +#endif + + +/* + * override user-land alignment before including asm.h + */ + +#define ALIGN_DATA .align 4 +#define ALIGN_TEXT .align 4,0x90 /* 4-byte boundaries, NOP-filled */ +#define SUPERALIGN_TEXT .align 16,0x90 /* 16-byte boundaries better for 486 */ +#define _ALIGN_TEXT ALIGN_TEXT +#include <machine/asm.h> + +#define CPL _C_LABEL(lapic_tpr) + +#define GET_CURPCB(reg) \ + movl CPUVAR(CURPCB), reg + +#define CHECK_ASTPENDING(treg) \ + movl CPUVAR(CURPROC),treg ; \ + cmpl $0, treg ; \ + je 1f ; \ + cmpl $0,P_MD_ASTPENDING(treg) ; \ + 1: + +#define CLEAR_ASTPENDING(cpreg) \ + movl $0,P_MD_ASTPENDING(cpreg) + +/* + * These are used on interrupt or trap entry or exit. + */ +#define INTRENTRY \ + cld ; \ + pushl %eax ; \ + pushl %ecx ; \ + pushl %edx ; \ + pushl %ebx ; \ + pushl %ebp ; \ + pushl %esi ; \ + pushl %edi ; \ + pushl %ds ; \ + pushl %es ; \ + pushl %gs ; \ + movl $GSEL(GDATA_SEL, SEL_KPL),%eax ; \ + movw %ax,%ds ; \ + movw %ax,%es ; \ + xorl %eax,%eax ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \ + movw %ax,%gs ; \ + pushl %fs ; \ + movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \ + movw %ax,%fs + +#define INTR_RESTORE_ALL \ + popl %fs ; \ + popl %gs ; \ + popl %es ; \ + popl %ds ; \ + popl %edi ; \ + popl %esi ; \ + popl %ebp ; \ + popl %ebx ; \ + popl %edx ; \ + popl %ecx ; \ + popl %eax + +#define INTRFASTEXIT \ + INTR_RESTORE_ALL ;\ + addl $8,%esp ; \ + iret + +#define INTR_FAKE_TRAP 0xbadabada + +/* + * PTmap is recursive pagemap at top of virtual address space. + * Within PTmap, the page directory can be found (third indirection). + */ + .globl _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde) + .set _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT) + .set _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG) + .set _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4) # XXX 4 == sizeof pde + +/* + * APTmap, APTD is the alternate recursive pagemap. + * It's used when modifying another process's page tables. + */ + .globl _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde) + .set _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT) + .set _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG) + # XXX 4 == sizeof pde + .set _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4) + + + .data + + .globl _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor) + .globl _C_LABEL(cpu_brandstr) + .globl _C_LABEL(cpuid_level) + .globl _C_LABEL(cpu_miscinfo) + .globl _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature) + .globl _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature) + .globl _C_LABEL(ecpu_ecxfeature) + .globl _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx) + .globl _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx) + .globl _C_LABEL(cpu_perf_eax) + .globl _C_LABEL(cpu_perf_ebx) + .globl _C_LABEL(cpu_perf_edx) + .globl _C_LABEL(cpu_apmi_edx) + .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) + .globl _C_LABEL(cpu_pae) + .globl _C_LABEL(esym) + .globl _C_LABEL(ssym) + .globl _C_LABEL(nkptp_max) + .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) + .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize) + .globl _C_LABEL(gdt) + .globl _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv) + .globl _C_LABEL(lapic_tpr) + +#if NLAPIC > 0 + .align NBPG + .globl _C_LABEL(local_apic) +_C_LABEL(local_apic): + .space LAPIC_ID + .globl _C_LABEL(lapic_ppr) +_C_LABEL(lapic_id): + .long 0x00000000 + .space LAPIC_TPRI-(LAPIC_ID+4) +_C_LABEL(lapic_tpr): + .space LAPIC_PPRI-LAPIC_TPRI +_C_LABEL(lapic_ppr): + .space LAPIC_ISR-LAPIC_PPRI +_C_LABEL(lapic_isr): + .space NBPG-LAPIC_ISR +#else +_C_LABEL(lapic_tpr): + .long 0 +#endif + +_C_LABEL(cpu): .long 0 # are we 386, 386sx, 486, 586 or 686 +_C_LABEL(cpu_id): .long 0 # saved from 'cpuid' instruction +_C_LABEL(cpu_pae): .long 0 # are we using PAE paging mode? +_C_LABEL(cpu_miscinfo): .long 0 # misc info (apic/brand id) from 'cpuid' +_C_LABEL(cpu_feature): .long 0 # feature flags from 'cpuid' instruction +_C_LABEL(ecpu_feature): .long 0 # extended feature flags from 'cpuid' +_C_LABEL(cpu_ecxfeature):.long 0 # ecx feature flags from 'cpuid' +_C_LABEL(ecpu_eaxfeature): .long 0 # extended eax feature flags +_C_LABEL(ecpu_ecxfeature): .long 0 # extended ecx feature flags +_C_LABEL(cpuid_level): .long -1 # max. lvl accepted by 'cpuid' insn +_C_LABEL(cpu_cache_eax):.long 0 +_C_LABEL(cpu_cache_ebx):.long 0 +_C_LABEL(cpu_cache_ecx):.long 0 +_C_LABEL(cpu_cache_edx):.long 0 +_C_LABEL(cpu_perf_eax): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_perf_ebx): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_perf_edx): .long 0 # arch. perf. mon. flags from 'cpuid' +_C_LABEL(cpu_apmi_edx): .long 0 # adv. power management info. 'cpuid' +_C_LABEL(cpu_vendor): .space 16 # vendor string returned by 'cpuid' instruction +_C_LABEL(cpu_brandstr): .space 48 # brand string returned by 'cpuid' +_C_LABEL(cold): .long 1 # cold till we are not +_C_LABEL(ssym): .long 0 # ptr to start of syms +_C_LABEL(esym): .long 0 # ptr to end of syms +_C_LABEL(cnvmem): .long 0 # conventional memory size +_C_LABEL(extmem): .long 0 # extended memory size +_C_LABEL(atdevbase): .long 0 # location of start of iomem in virtual +_C_LABEL(bootapiver): .long 0 # /boot API version +_C_LABEL(bootargc): .long 0 # /boot argc +_C_LABEL(bootargv): .long 0 # /boot argv +_C_LABEL(bootdev): .long 0 # device we booted from +_C_LABEL(proc0paddr): .long 0 +_C_LABEL(PTDpaddr): .long 0 # paddr of PTD, for libkvm +_C_LABEL(PTDsize): .long NBPG # size of PTD, for libkvm + + .text + +NENTRY(proc_trampoline) +#ifdef MULTIPROCESSOR + call _C_LABEL(proc_trampoline_mp) +#endif + movl $IPL_NONE,CPL + pushl %ebx + call *%esi + addl $4,%esp + INTRFASTEXIT + /* NOTREACHED */ + + /* This must come before any use of the CODEPATCH macros */ + .section .codepatch,"a" + .align 8 + .globl _C_LABEL(codepatch_begin) +_C_LABEL(codepatch_begin): + .previous + + .section .codepatchend,"a" + .globl _C_LABEL(codepatch_end) +_C_LABEL(codepatch_end): + .previous + +/*****************************************************************************/ + +/* + * Signal trampoline; copied to top of user stack. + */ + .section .rodata + .globl _C_LABEL(sigcode) +_C_LABEL(sigcode): + call *SIGF_HANDLER(%esp) + leal SIGF_SC(%esp),%eax # scp (the call may have clobbered the + # copy at SIGF_SCP(%esp)) + pushl %eax + pushl %eax # junk to fake return address + movl $SYS_sigreturn,%eax + int $0x80 # enter kernel with args on stack + .globl _C_LABEL(sigcoderet) +_C_LABEL(sigcoderet): + movl $SYS_exit,%eax + int $0x80 # exit if sigreturn fails + .globl _C_LABEL(esigcode) +_C_LABEL(esigcode): + + .globl _C_LABEL(sigfill) +_C_LABEL(sigfill): + int3 +_C_LABEL(esigfill): + + .data + .globl _C_LABEL(sigfillsiz) +_C_LABEL(sigfillsiz): + .long _C_LABEL(esigfill) - _C_LABEL(sigfill) + + .text + +/*****************************************************************************/ + +/* + * The following primitives are used to fill and copy regions of memory. + */ + +/* Frame pointer reserve on stack. */ +#ifdef DDB +#define FPADD 4 +#else +#define FPADD 0 +#endif + +/* + * kcopy(caddr_t from, caddr_t to, size_t len); + * Copy len bytes, abort on fault. + */ +ENTRY(kcopy) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + GET_CURPCB(%eax) # load curpcb into eax and set on-fault + pushl PCB_ONFAULT(%eax) + movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) + + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%ecx + movl %edi,%eax + subl %esi,%eax + cmpl %ecx,%eax # overlapping? + jb 1f + shrl $2,%ecx # nope, copy forward by 32-bit words + rep + movsl + movl 24+FPADD(%esp),%ecx + andl $3,%ecx # any bytes left? + rep + movsb + + GET_CURPCB(%edx) # XXX save curpcb? + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + xorl %eax,%eax +#ifdef DDB + leave +#endif + ret + + ALIGN_TEXT +1: addl %ecx,%edi # copy backward + addl %ecx,%esi + std + andl $3,%ecx # any fractional bytes? + decl %edi + decl %esi + rep + movsb + movl 24+FPADD(%esp),%ecx # copy remainder by 32-bit words + shrl $2,%ecx + subl $3,%esi + subl $3,%edi + rep + movsl + cld + + GET_CURPCB(%edx) + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + xorl %eax,%eax +#ifdef DDB + leave +#endif + ret + +/*****************************************************************************/ + +/* + * The following primitives are used to copy data in and out of the user's + * address space. + */ + +/* + * copyout(caddr_t from, caddr_t to, size_t len); + * Copy len bytes into the user's address space. + */ +ENTRY(copyout) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + pushl $0 + + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax + + /* + * We check that the end of the destination buffer is not past the end + * of the user's address space. If it's not, then we only need to + * check that each page is writable. The 486 will do this for us; the + * 386 will not. (We assume that pages in user space that are not + * writable by the user are not writable by the kernel either.) + */ + movl %edi,%edx + addl %eax,%edx + jc _C_LABEL(copy_fault) + cmpl $VM_MAXUSER_ADDRESS,%edx + ja _C_LABEL(copy_fault) + + GET_CURPCB(%edx) + movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) + SMAP_STAC + + /* bcopy(%esi, %edi, %eax); */ + movl %eax,%ecx + shrl $2,%ecx + rep + movsl + movl %eax,%ecx + andl $3,%ecx + rep + movsb + + SMAP_CLAC + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + xorl %eax,%eax +#ifdef DDB + leave +#endif + ret + +/* + * copyin(caddr_t from, caddr_t to, size_t len); + * Copy len bytes from the user's address space. + */ +ENTRY(copyin) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + GET_CURPCB(%eax) + pushl $0 + movl $_C_LABEL(copy_fault),PCB_ONFAULT(%eax) + SMAP_STAC + + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax + + /* + * We check that the end of the destination buffer is not past the end + * of the user's address space. If it's not, then we only need to + * check that each page is readable, and the CPU will do that for us. + */ + movl %esi,%edx + addl %eax,%edx + jc _C_LABEL(copy_fault) + cmpl $VM_MAXUSER_ADDRESS,%edx + ja _C_LABEL(copy_fault) + + /* bcopy(%esi, %edi, %eax); */ + movl %eax,%ecx + shrl $2,%ecx + rep + movsl + movb %al,%cl + andb $3,%cl + rep + movsb + + SMAP_CLAC + GET_CURPCB(%edx) + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + xorl %eax,%eax +#ifdef DDB + leave +#endif + ret + +ENTRY(copy_fault) + SMAP_CLAC + GET_CURPCB(%edx) + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + movl $EFAULT,%eax +#ifdef DDB + leave +#endif + ret + +/* + * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); + * Copy a NUL-terminated string, at most maxlen characters long, into the + * user's address space. Return the number of characters copied (including the + * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else + * return 0 or EFAULT. + */ +ENTRY(copyoutstr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + + movl 12+FPADD(%esp),%esi # esi = from + movl 16+FPADD(%esp),%edi # edi = to + movl 20+FPADD(%esp),%edx # edx = maxlen + +5: GET_CURPCB(%eax) + movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax) + SMAP_STAC + /* + * Get min(%edx, VM_MAXUSER_ADDRESS-%edi). + */ + movl $VM_MAXUSER_ADDRESS,%eax + subl %edi,%eax + jbe _C_LABEL(copystr_fault) # die if CF == 1 || ZF == 1 + # i.e. make sure that %edi + # is below VM_MAXUSER_ADDRESS + + cmpl %edx,%eax + jae 1f + movl %eax,%edx + movl %eax,20+FPADD(%esp) + +1: incl %edx + +1: decl %edx + jz 2f + lodsb + stosb + testb %al,%al + jnz 1b + + /* Success -- 0 byte reached. */ + decl %edx + xorl %eax,%eax + jmp copystr_return + +2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ + cmpl $VM_MAXUSER_ADDRESS,%edi + jae _C_LABEL(copystr_fault) + movl $ENAMETOOLONG,%eax + jmp copystr_return + +/* + * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); + * Copy a NUL-terminated string, at most maxlen characters long, from the + * user's address space. Return the number of characters copied (including the + * NUL) in *lencopied. If the string is too long, return ENAMETOOLONG; else + * return 0 or EFAULT. + */ +ENTRY(copyinstr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + GET_CURPCB(%ecx) + movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) + SMAP_STAC + + movl 12+FPADD(%esp),%esi # %esi = from + movl 16+FPADD(%esp),%edi # %edi = to + movl 20+FPADD(%esp),%edx # %edx = maxlen + + /* + * Get min(%edx, VM_MAXUSER_ADDRESS-%esi). + */ + movl $VM_MAXUSER_ADDRESS,%eax + subl %esi,%eax + jbe _C_LABEL(copystr_fault) # Error if CF == 1 || ZF == 1 + # i.e. make sure that %esi + # is below VM_MAXUSER_ADDRESS + cmpl %edx,%eax + jae 1f + movl %eax,%edx + movl %eax,20+FPADD(%esp) + +1: incl %edx + +1: decl %edx + jz 2f + lodsb + stosb + testb %al,%al + jnz 1b + + /* Success -- 0 byte reached. */ + decl %edx + xorl %eax,%eax + jmp copystr_return + +2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */ + cmpl $VM_MAXUSER_ADDRESS,%esi + jae _C_LABEL(copystr_fault) + movl $ENAMETOOLONG,%eax + jmp copystr_return + +ENTRY(copystr_fault) + movl $EFAULT,%eax + +copystr_return: + SMAP_CLAC + /* Set *lencopied and return %eax. */ + GET_CURPCB(%ecx) + movl $0,PCB_ONFAULT(%ecx) + movl 20+FPADD(%esp),%ecx + subl %edx,%ecx + movl 24+FPADD(%esp),%edx + testl %edx,%edx + jz 8f + movl %ecx,(%edx) + +8: popl %edi + popl %esi +#ifdef DDB + leave +#endif + ret + +/* + * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied); + * Copy a NUL-terminated string, at most maxlen characters long. Return the + * number of characters copied (including the NUL) in *lencopied. If the + * string is too long, return ENAMETOOLONG; else return 0. + */ +ENTRY(copystr) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + + movl 12+FPADD(%esp),%esi # esi = from + movl 16+FPADD(%esp),%edi # edi = to + movl 20+FPADD(%esp),%edx # edx = maxlen + incl %edx + +1: decl %edx + jz 4f + lodsb + stosb + testb %al,%al + jnz 1b + + /* Success -- 0 byte reached. */ + decl %edx + xorl %eax,%eax + jmp 6f + +4: /* edx is zero -- return ENAMETOOLONG. */ + movl $ENAMETOOLONG,%eax + +6: /* Set *lencopied and return %eax. */ + movl 20+FPADD(%esp),%ecx + subl %edx,%ecx + movl 24+FPADD(%esp),%edx + testl %edx,%edx + jz 7f + movl %ecx,(%edx) + +7: popl %edi + popl %esi +#ifdef DDB + leave +#endif + ret + +/*****************************************************************************/ + +/* + * The following is i386-specific nonsense. + */ + +/* + * void lgdt(struct region_descriptor *rdp); + * Change the global descriptor table. + */ +NENTRY(lgdt) + /* Reload the descriptor table. */ + movl 4(%esp),%eax + lgdt (%eax) + /* Flush the prefetch q. */ + jmp 1f + nop +1: /* Reload "stale" selectors. */ + movl $GSEL(GDATA_SEL, SEL_KPL),%eax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs + /* Reload code selector by doing intersegment return. */ + popl %eax + pushl $GSEL(GCODE_SEL, SEL_KPL) + pushl %eax + lret + +ENTRY(setjmp) + movl 4(%esp),%eax + movl %ebx,(%eax) # save ebx + movl %esp,4(%eax) # save esp + movl %ebp,8(%eax) # save ebp + movl %esi,12(%eax) # save esi + movl %edi,16(%eax) # save edi + movl (%esp),%edx # get rta + movl %edx,20(%eax) # save eip + xorl %eax,%eax # return (0); + ret + +ENTRY(longjmp) + movl 4(%esp),%eax + movl (%eax),%ebx # restore ebx + movl 4(%eax),%esp # restore esp + movl 8(%eax),%ebp # restore ebp + movl 12(%eax),%esi # restore esi + movl 16(%eax),%edi # restore edi + movl 20(%eax),%edx # get rta + movl %edx,(%esp) # put in return frame + xorl %eax,%eax # return (1); + incl %eax + ret + +/*****************************************************************************/ + +/* + * cpu_switchto(struct proc *old, struct proc *new) + * Switch from the "old" proc to the "new" proc. If "old" is NULL, we + * don't need to bother saving old context. + */ +ENTRY(cpu_switchto) + pushl %ebx + pushl %esi + pushl %edi + + movl 16(%esp), %esi + movl 20(%esp), %edi + + /* If old process exited, don't bother. */ + testl %esi,%esi + jz switch_exited + + /* Save old stack pointers. */ + movl P_ADDR(%esi),%ebx + movl %esp,PCB_ESP(%ebx) + movl %ebp,PCB_EBP(%ebx) + +switch_exited: + /* Restore saved context. */ + + /* No interrupts while loading new state. */ + cli + + /* Record new process. */ + movl %edi, CPUVAR(CURPROC) + movb $SONPROC, P_STAT(%edi) + + /* Restore stack pointers. */ + movl P_ADDR(%edi),%ebx + movl PCB_ESP(%ebx),%esp + movl PCB_EBP(%ebx),%ebp + + /* Record new pcb. */ + movl %ebx, CPUVAR(CURPCB) + + /* + * Activate the address space. The pcb copy of %cr3 will + * be refreshed from the pmap, and because we're + * curproc they'll both be reloaded into the CPU. + */ + pushl %edi + pushl %esi + call _C_LABEL(pmap_switch) + addl $8,%esp + + /* Load TSS info. */ + movl CPUVAR(GDT),%eax + movl P_MD_TSS_SEL(%edi),%edx + + /* Switch TSS. */ + andl $~0x0200,4-SEL_KPL(%eax,%edx,1) + ltr %dx + + /* Restore cr0 (including FPU state). */ + movl PCB_CR0(%ebx),%ecx +#ifdef MULTIPROCESSOR + /* + * If our floating point registers are on a different CPU, + * clear CR0_TS so we'll trap rather than reuse bogus state. + */ + movl CPUVAR(SELF), %esi + cmpl PCB_FPCPU(%ebx), %esi + jz 1f + orl $CR0_TS,%ecx +1: +#endif + movl %ecx,%cr0 + + /* Interrupts are okay again. */ + sti + + popl %edi + popl %esi + popl %ebx + ret + +ENTRY(cpu_idle_enter) + movl _C_LABEL(cpu_idle_enter_fcn),%eax + cmpl $0,%eax + je 1f + jmpl *%eax +1: + ret + +ENTRY(cpu_idle_cycle) + movl _C_LABEL(cpu_idle_cycle_fcn),%eax + cmpl $0,%eax + je 1f + call *%eax + ret +1: + sti + hlt + ret + +ENTRY(cpu_idle_leave) + movl _C_LABEL(cpu_idle_leave_fcn),%eax + cmpl $0,%eax + je 1f + jmpl *%eax +1: + ret + +/* + * savectx(struct pcb *pcb); + * Update pcb, saving current processor state. + */ +ENTRY(savectx) + movl 4(%esp),%edx # edx = p->p_addr + + /* Save stack pointers. */ + movl %esp,PCB_ESP(%edx) + movl %ebp,PCB_EBP(%edx) + + movl PCB_FLAGS(%edx),%ecx + orl $PCB_SAVECTX,%ecx + movl %ecx,PCB_FLAGS(%edx) + + ret + +/*****************************************************************************/ + +/* + * Trap and fault vector routines + * + * On exit from the kernel to user mode, we always need to check for ASTs. In + * addition, we need to do this atomically; otherwise an interrupt may occur + * which causes an AST, but it won't get processed until the next kernel entry + * (possibly the next clock tick). Thus, we disable interrupt before checking, + * and only enable them again on the final `iret' or before calling the AST + * handler. + */ +#define IDTVEC(name) ALIGN_TEXT; .globl X##name; X##name: + +#define TRAP(a) pushl $(a) ; jmp _C_LABEL(alltraps) +#define ZTRAP(a) pushl $0 ; TRAP(a) + + + .text +IDTVEC(div) + ZTRAP(T_DIVIDE) +IDTVEC(dbg) + subl $4,%esp + pushl %eax + movl %dr6,%eax + movl %eax,4(%esp) + andb $~0xf,%al + movl %eax,%dr6 + popl %eax + TRAP(T_TRCTRAP) +IDTVEC(nmi) + ZTRAP(T_NMI) +IDTVEC(bpt) + ZTRAP(T_BPTFLT) +IDTVEC(ofl) + ZTRAP(T_OFLOW) +IDTVEC(bnd) + ZTRAP(T_BOUND) +IDTVEC(ill) + ZTRAP(T_PRIVINFLT) +IDTVEC(dna) +#if NNPX > 0 + pushl $0 # dummy error code + pushl $T_DNA + INTRENTRY +#ifdef MULTIPROCESSOR + pushl CPUVAR(SELF) +#else + pushl $_C_LABEL(cpu_info_primary) +#endif + call *_C_LABEL(npxdna_func) + addl $4,%esp + testl %eax,%eax + jz calltrap + INTRFASTEXIT +#else + ZTRAP(T_DNA) +#endif +IDTVEC(dble) + TRAP(T_DOUBLEFLT) +IDTVEC(fpusegm) + ZTRAP(T_FPOPFLT) +IDTVEC(tss) + TRAP(T_TSSFLT) +IDTVEC(missing) + TRAP(T_SEGNPFLT) +IDTVEC(stk) + TRAP(T_STKFLT) +IDTVEC(prot) + TRAP(T_PROTFLT) +IDTVEC(f00f_redirect) + pushl $T_PAGEFLT + INTRENTRY + testb $PGEX_U,TF_ERR(%esp) + jnz calltrap + movl %cr2,%eax + subl _C_LABEL(idt),%eax + cmpl $(6*8),%eax + jne calltrap + movb $T_PRIVINFLT,TF_TRAPNO(%esp) + jmp calltrap +IDTVEC(page) + TRAP(T_PAGEFLT) +IDTVEC(rsvd) + ZTRAP(T_RESERVED) +IDTVEC(mchk) + ZTRAP(T_MACHK) +IDTVEC(simd) + ZTRAP(T_XFTRAP) +IDTVEC(intrspurious) + /* + * The Pentium Pro local APIC may erroneously call this vector for a + * default IR7. Just ignore it. + * + * (The local APIC does this when CPL is raised while it's on the + * way to delivering an interrupt.. presumably enough has been set + * up that it's inconvenient to abort delivery completely..) + */ + iret +IDTVEC(fpu) +#if NNPX > 0 + /* + * Handle like an interrupt so that we can call npxintr to clear the + * error. It would be better to handle npx interrupts as traps but + * this is difficult for nested interrupts. + */ + subl $8,%esp /* space for tf_{err,trapno} */ + INTRENTRY + pushl CPL # if_ppl in intrframe + pushl %esp # push address of intrframe + incl _C_LABEL(uvmexp)+V_TRAP + call _C_LABEL(npxintr) + addl $8,%esp # pop address and if_ppl + INTRFASTEXIT +#else + ZTRAP(T_ARITHTRAP) +#endif +IDTVEC(align) + ZTRAP(T_ALIGNFLT) + /* 18 - 31 reserved for future exp */ + +/* + * If an error is detected during trap, syscall, or interrupt exit, trap() will + * change %eip to point to one of these labels. We clean up the stack, if + * necessary, and resume as if we were handling a general protection fault. + * This will cause the process to get a SIGBUS. + */ +NENTRY(resume_iret) + ZTRAP(T_PROTFLT) +NENTRY(resume_pop_ds) + pushl %es + movl $GSEL(GDATA_SEL, SEL_KPL),%eax + movw %ax,%es +NENTRY(resume_pop_es) + pushl %gs + xorl %eax,%eax /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ + movw %ax,%gs +NENTRY(resume_pop_gs) + pushl %fs + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs +NENTRY(resume_pop_fs) + movl $T_PROTFLT,TF_TRAPNO(%esp) + sti + jmp calltrap + +/* + * All traps go through here. Call the generic trap handler, and + * check for ASTs afterwards. + */ +NENTRY(alltraps) + INTRENTRY + sti +calltrap: +#ifdef DIAGNOSTIC + movl CPL,%ebx +#endif /* DIAGNOSTIC */ +#if !defined(GPROF) && defined(DDBPROF) + cmpl $T_BPTFLT,TF_TRAPNO(%esp) + jne .Lreal_trap + + pushl %esp + call _C_LABEL(db_prof_hook) + addl $4,%esp + cmpl $1,%eax + jne .Lreal_trap + + /* + * Abuse the error field to indicate that INTRFASTEXIT needs + * to emulate the patched instruction. + */ + movl $INTR_FAKE_TRAP, TF_ERR(%esp) + jz 2f +.Lreal_trap: +#endif /* !defined(GPROF) && defined(DDBPROF) */ + pushl %esp + call _C_LABEL(trap) + addl $4,%esp +2: /* Check for ASTs on exit to user mode. */ + cli + CHECK_ASTPENDING(%ecx) + je 1f + testb $SEL_RPL,TF_CS(%esp) +#ifdef VM86 + jnz 5f + testl $PSL_VM,TF_EFLAGS(%esp) +#endif + jz 1f +5: CLEAR_ASTPENDING(%ecx) + sti + pushl %esp + call _C_LABEL(ast) + addl $4,%esp + jmp 2b +1: +#if !defined(GPROF) && defined(DDBPROF) + /* + * If we are returning from a probe trap we need to fix the + * stack layout and emulate the patched instruction. + * + * The code below does that by trashing %eax, so it MUST be + * restored afterward. + */ + cmpl $INTR_FAKE_TRAP, TF_ERR(%esp) + je .Lprobe_fixup +#endif /* !defined(GPROF) && defined(DDBPROF) */ +#ifndef DIAGNOSTIC + INTRFASTEXIT +#else + cmpl CPL,%ebx + jne 3f + INTRFASTEXIT +3: sti + pushl $4f + call _C_LABEL(printf) + addl $4,%esp +#if defined(DDB) && 0 + int $3 +#endif /* DDB */ + movl %ebx,CPL + jmp 2b +4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" +#endif /* DIAGNOSTIC */ + +#if !defined(GPROF) && defined(DDBPROF) +.Lprobe_fixup: + /* Restore all register unwinding the stack. */ + INTR_RESTORE_ALL + + /* + * Use the space left by ``err'' and ``trapno'' to emulate + * "pushl %ebp". + * + * Temporarily save %eax. + */ + movl %eax,0(%esp) + + /* Shift hardware-saved registers: eip, cs, eflags */ + movl 8(%esp),%eax + movl %eax,4(%esp) + movl 12(%esp),%eax + movl %eax,8(%esp) + movl 16(%esp),%eax + movl %eax,12(%esp) + + /* Store %ebp in the expected location to finish the emulation. */ + movl %ebp,16(%esp) + + popl %eax + iret +#endif /* !defined(GPROF) && defined(DDBPROF) */ +/* + * Trap gate entry for syscall + */ +IDTVEC(syscall) + subl $8,%esp /* space for tf_{err,trapno} */ + INTRENTRY + pushl %esp + call _C_LABEL(syscall) + addl $4,%esp +2: /* Check for ASTs on exit to user mode. */ + cli + CHECK_ASTPENDING(%ecx) + je 1f + /* Always returning to user mode here. */ + CLEAR_ASTPENDING(%ecx) + sti + pushl %esp + call _C_LABEL(ast) + addl $4,%esp + jmp 2b +1: INTRFASTEXIT + +#include <i386/i386/vector.s> +#include <i386/isa/icu.s> + +/* + * bzero (void *b, size_t len) + * write len zero bytes to the string b. + */ + +ENTRY(bzero) + pushl %edi + movl 8(%esp),%edi + movl 12(%esp),%edx + + xorl %eax,%eax /* set fill data to 0 */ + + /* + * if the string is too short, it's really not worth the overhead + * of aligning to word boundaries, etc. So we jump to a plain + * unaligned set. + */ + cmpl $16,%edx + jb 7f + + movl %edi,%ecx /* compute misalignment */ + negl %ecx + andl $3,%ecx + subl %ecx,%edx + rep /* zero until word aligned */ + stosb + + cmpl $CPUCLASS_486,_C_LABEL(cpu_class) + jne 8f + + movl %edx,%ecx + shrl $6,%ecx + jz 8f + andl $63,%edx +1: movl %eax,(%edi) + movl %eax,4(%edi) + movl %eax,8(%edi) + movl %eax,12(%edi) + movl %eax,16(%edi) + movl %eax,20(%edi) + movl %eax,24(%edi) + movl %eax,28(%edi) + movl %eax,32(%edi) + movl %eax,36(%edi) + movl %eax,40(%edi) + movl %eax,44(%edi) + movl %eax,48(%edi) + movl %eax,52(%edi) + movl %eax,56(%edi) + movl %eax,60(%edi) + addl $64,%edi + decl %ecx + jnz 1b + +8: movl %edx,%ecx /* zero by words */ + shrl $2,%ecx + andl $3,%edx + rep + stosl + +7: movl %edx,%ecx /* zero remainder bytes */ + rep + stosb + + popl %edi + ret + +#if !defined(SMALL_KERNEL) +ENTRY(sse2_pagezero) + pushl %ebx + movl 8(%esp),%ecx + movl %ecx,%eax + addl $4096,%eax + xor %ebx,%ebx +1: + movnti %ebx,(%ecx) + addl $4,%ecx + cmpl %ecx,%eax + jne 1b + sfence + popl %ebx + ret + +ENTRY(i686_pagezero) + pushl %edi + pushl %ebx + + movl 12(%esp), %edi + movl $1024, %ecx + + ALIGN_TEXT +1: + xorl %eax, %eax + repe + scasl + jnz 2f + + popl %ebx + popl %edi + ret + + ALIGN_TEXT + +2: + incl %ecx + subl $4, %edi + + movl %ecx, %edx + cmpl $16, %ecx + + jge 3f + + movl %edi, %ebx + andl $0x3f, %ebx + shrl %ebx + shrl %ebx + movl $16, %ecx + subl %ebx, %ecx + +3: + subl %ecx, %edx + rep + stosl + + movl %edx, %ecx + testl %edx, %edx + jnz 1b + + popl %ebx + popl %edi + ret +#endif + +/* + * int cpu_paenable(void *); + */ +ENTRY(cpu_paenable) + movl $-1, %eax + testl $CPUID_PAE, _C_LABEL(cpu_feature) + jz 1f + + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl %cr3, %edi + orl $0xfe0, %edi /* PDPT will be in the last four slots! */ + movl %edi, %cr3 + addl $KERNBASE, %edi /* and make it back virtual again */ + movl $8, %ecx + rep + movsl + + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_NXE, %eax + wrmsr + + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 /* BANG!!! */ + + movl 12(%esp), %eax + subl $KERNBASE, %eax + movl %eax, %cr3 /* reload real PDPT */ + movl $4*NBPG, %eax + movl %eax, _C_LABEL(PTDsize) + + xorl %eax, %eax + popl %edi + popl %esi +1: + ret + +#if NLAPIC > 0 +#include <i386/i386/apicvec.s> +#endif + +#include <i386/i386/mutex.S> + +.globl _C_LABEL(_stac) +_C_LABEL(_stac): + stac + +.globl _C_LABEL(_clac) +_C_LABEL(_clac): + clac Index: conf/makegap.sh =================================================================== RCS file: conf/makegap.sh diff -N conf/makegap.sh --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ conf/makegap.sh 30 May 2017 07:33:02 -0000 @@ -0,0 +1,28 @@ +#!/bin/sh - + +cat << __EOF__ +#include <machine/asm.h> +#include <machine/param.h> + + .text + .space $RANDOM*3, 0xcc + .align PAGE_SIZE, 0xcc + + .globl endboot +_C_LABEL(endboot): + .space PAGE_SIZE + $RANDOM % PAGE_SIZE, 0xcc + .align 16, 0xcc + + /* + * Randomly bias future data, bss, and rodata objects, + * does not help for objects in locore.S though + */ + .data + .space $RANDOM % PAGE_SIZE + + .bss + .space $RANDOM % PAGE_SIZE + + .section rodata + .space $RANDOM % PAGE_SIZE +__EOF__