On Tue, May 30, 2017 at 02:16:45AM -0600, Theo de Raadt wrote:
> This change relinks kernel objects randomly, and unmaps the bootcode
> component of locore during boot.  This makes gadgets harder to find.
> 
> The current linker layout is:
> 
>       locore.o [bring-up code + asm runtime]
>       rest of kernel .o, in order supplied by config(8)
> 
> The new linker layout is:
> 
>       locore.o [just the bring-up code]
>       gap.o
>       rest of kernel .o + new locore2.S [asm runtime], via sort -R
> 
> The gap.o being use some discussion.  This creates 5 random sized
> gaps:
>       Few pages after locore.o .text
>       <pagesize pad before other .text
>       <pagesize pad before .rodata
>       <pagesize pad before .data
>       <pagesize pad before .bss
> 
> resulting in the following layout:
> 
>       boot code
>       [few pages of gap]
> endboot:
>       [partial page gap]
>       rest of .text - randomized order
>       [page-alignment]
>       [partial page gap]
>       .rodata
>       [page-alignment]
>       [partial page gap]
>       .data
>       [partial page gap]
>       .data
> 
> When we supply the .o files to the linker in random order, their text
> segments are placed in that random order.  The .rodata/.data/.bss for
> each of those is also placed in the same order into their respective
> sections.
> 
> Once the system is booted, we unmap the locore.o bring-up code and the
> first few pages of gap.  (Cannot be too early, must be after "codepatch")
> 
> This bootcode is at a known location in KVA space.  At known offsets
> within this .o text object, there are pointers to other .o in
> particular to main().  By unmapping this bootcode, the addresses of
> gadgets in the remaining objects become unknown.  Due to randomization
> are not known.  There is no text-segment knowledge anymore about where
> these objects are.  Obviously some leakage of KVA addresses occurs,
> and cleanup will need to continue to ASLR more of those objects.
> 
> There are a few mitigation strategies against BROP attack methodology.
> One can be summarized as "never reuse an address space".  If a freshly
> linked kernel of this type was booted each time, we would be well on
> the way to satisfying that.  Then other migitations efforts come into
> play.
> 
> I've booted around 100 amd64 kernels, that is fairly well tested.  i386
> hasn't been tested as well yet.
> 

Not sure if you're looking for oks, but the diff reads ok to me.

-ml

> Index: arch/amd64/amd64/autoconf.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
> retrieving revision 1.47
> diff -u -p -u -r1.47 autoconf.c
> --- arch/amd64/amd64/autoconf.c       8 Jun 2016 17:24:44 -0000       1.47
> +++ arch/amd64/amd64/autoconf.c       29 May 2017 13:04:34 -0000
> @@ -59,6 +59,7 @@
>  #include <sys/socketvar.h>
>  #include <sys/timeout.h>
>  #include <sys/hibernate.h>
> +#include <uvm/uvm.h>
>  
>  #include <net/if.h>
>  #include <net/if_types.h>
> @@ -105,6 +106,21 @@ void             aesni_setup(void);
>  extern int   amd64_has_aesni;
>  #endif
>  
> +void
> +unmap_startup(void)
> +{
> +     extern void *kernel_text, *endboot;
> +     vaddr_t p;
> +
> +     printf("unmap kernel init code %lx-%lx\n",
> +         (vaddr_t)&kernel_text, (vaddr_t)&endboot);
> +     p = (vaddr_t)&kernel_text;
> +     do {
> +             pmap_kremove(p, PAGE_SIZE);
> +             p += NBPG;
> +     } while (p < (vaddr_t)&endboot);
> +}
> +
>  /*
>   * Determine i/o configuration for a machine.
>   */
> @@ -122,6 +138,8 @@ cpu_configure(void)
>       lapic_set_lvt();
>       ioapic_enable();
>  #endif
> +
> +     unmap_startup();
>  
>  #ifdef MULTIPROCESSOR
>       cpu_init_idle_pcbs();
> Index: arch/amd64/amd64/locore.S
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
> retrieving revision 1.84
> diff -u -p -u -r1.84 locore.S
> --- arch/amd64/amd64/locore.S 6 Feb 2017 09:15:51 -0000       1.84
> +++ arch/amd64/amd64/locore.S 29 May 2017 20:52:28 -0000
> @@ -131,115 +131,13 @@
>  
>  #include <machine/asm.h>
>  
> -#define SET_CURPROC(proc,cpu)                        \
> -     movq    CPUVAR(SELF),cpu        ;       \
> -     movq    proc,CPUVAR(CURPROC)      ;     \
> -     movq    cpu,P_CPU(proc)
> -
> -#define GET_CURPCB(reg)                      movq    CPUVAR(CURPCB),reg      
> -#define SET_CURPCB(reg)                      movq    reg,CPUVAR(CURPCB)
> -
> -
>  /* XXX temporary kluge; these should not be here */
>  /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
>  #include <dev/isa/isareg.h>
>  
> -
> -/*
> - * Initialization
> - */
> -     .data
> -
> -#if NLAPIC > 0 
> -     .align  NBPG, 0xcc
> -     .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr)
> -_C_LABEL(local_apic):
> -     .space  LAPIC_ID
> -_C_LABEL(lapic_id):
> -     .long   0x00000000
> -     .space  LAPIC_TPRI-(LAPIC_ID+4)
> -_C_LABEL(lapic_tpr):
> -     .space  LAPIC_PPRI-LAPIC_TPRI
> -_C_LABEL(lapic_ppr):
> -     .space  LAPIC_ISR-LAPIC_PPRI 
> -_C_LABEL(lapic_isr):
> -     .space  NBPG-LAPIC_ISR
> -#endif
> -
> -     .globl  _C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
> -     .globl  _C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
> -     .globl  _C_LABEL(cpu_ebxfeature)
> -     .globl  _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature)
> -     .globl  _C_LABEL(cpu_perf_eax)
> -     .globl  _C_LABEL(cpu_perf_ebx)
> -     .globl  _C_LABEL(cpu_perf_edx)
> -     .globl  _C_LABEL(cpu_apmi_edx)
> -     .globl  _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto)
> -     .globl  _C_LABEL(bootdev)
> -     .globl  _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase)
> -     .globl  _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
> -     .globl  _C_LABEL(biosbasemem),_C_LABEL(biosextmem)
> -     .globl  _C_LABEL(bootapiver)
> -     .globl  _C_LABEL(pg_nx)
> -_C_LABEL(cpu_id):    .long   0       # saved from `cpuid' instruction
> -_C_LABEL(cpu_feature):       .long   0       # feature flags from 'cpuid'
> -                                     #   instruction
> -_C_LABEL(cpu_ebxfeature):.long       0       # ext. ebx feature flags from 
> 'cpuid'
> -_C_LABEL(cpu_ecxfeature):.long       0       # ext. ecx feature flags from 
> 'cpuid'
> -_C_LABEL(ecpu_ecxfeature):.long      0       # extended ecx feature flags
> -_C_LABEL(cpu_perf_eax):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_perf_ebx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_perf_edx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_apmi_edx):      .long   0       # adv. power mgmt. info. from 
> 'cpuid'
> -_C_LABEL(cpuid_level):       .long   -1      # max. level accepted by 'cpuid'
> -                                     #   instruction
> -_C_LABEL(cpu_vendor):        .space  16      # vendor string returned by 
> `cpuid'
> -                                     #   instruction
> -_C_LABEL(ssym):              .quad   0       # ptr to start of syms
> -_C_LABEL(esym):              .quad   0       # ptr to end of syms
> -_C_LABEL(atdevbase): .quad   0       # location of start of iomem in virtual
> -_C_LABEL(bootapiver):        .long   0       # /boot API version
> -_C_LABEL(bootdev):   .long   0       # device we booted from
> -_C_LABEL(proc0paddr):        .quad   0
> -_C_LABEL(PTDpaddr):  .quad   0       # paddr of PTD, for libkvm
> -#ifndef REALBASEMEM
> -_C_LABEL(biosbasemem):       .long   0       # base memory reported by BIOS
> -#else
> -_C_LABEL(biosbasemem):       .long   REALBASEMEM
> -#endif
> -#ifndef REALEXTMEM
> -_C_LABEL(biosextmem):        .long   0       # extended memory reported by 
> BIOS
> -#else
> -_C_LABEL(biosextmem):        .long   REALEXTMEM
> -#endif
> -_C_LABEL(pg_nx):     .quad   0       # NX PTE bit (if CPU supports)
> -
>  #define      _RELOC(x)       ((x) - KERNBASE)
>  #define      RELOC(x)        _RELOC(_C_LABEL(x))
>  
> -     .globl  gdt64
> -
> -gdt64:
> -     .word   gdt64_end-gdt64_start-1
> -     .quad   _RELOC(gdt64_start)
> -.align 64, 0xcc
> -
> -gdt64_start:
> -     .quad 0x0000000000000000        /* always empty */
> -     .quad 0x00af9a000000ffff        /* kernel CS */
> -     .quad 0x00cf92000000ffff        /* kernel DS */
> -gdt64_end:
> -
> -farjmp64:
> -     .long   longmode-KERNBASE
> -     .word   GSEL(GCODE_SEL, SEL_KPL)
> -
> -     .space 512
> -tmpstk:
> -
> -     .globl _C_LABEL(cpu_private)
> -     .comm _C_LABEL(cpu_private),NBPG,NBPG
> -
>  /*
>   * Some hackage to deal with 64bit symbols in 32 bit mode.
>   * This may not be needed if things are cleaned up a little.
> @@ -748,475 +646,6 @@ longmode_hi:
>  
>       call    _C_LABEL(main)
>  
> -/*****************************************************************************/
> -
> -/*
> - * Signal trampoline; copied to top of user stack.
> - * gdb's backtrace logic matches against the instructions in this.
> - */
> -     .section .rodata
> -     .globl  _C_LABEL(sigcode)
> -_C_LABEL(sigcode):
> -     call    *%rax
> -
> -     movq    %rsp,%rdi
> -     pushq   %rdi                    /* fake return address */
> -     movq    $SYS_sigreturn,%rax
> -     syscall
> -     .globl  _C_LABEL(sigcoderet)
> -_C_LABEL(sigcoderet):
> -     movq    $SYS_exit,%rax
> -     syscall
> -     .globl  _C_LABEL(esigcode)
> -_C_LABEL(esigcode):
> -
> -     .globl  _C_LABEL(sigfill)
> -_C_LABEL(sigfill):
> -     int3
> -_C_LABEL(esigfill):
> -     .globl  _C_LABEL(sigfillsiz)
> -_C_LABEL(sigfillsiz):
> -     .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
> -
> -     .text
> -/*
> - * void lgdt(struct region_descriptor *rdp);
> - * Change the global descriptor table.
> - */
> -NENTRY(lgdt)
> -     /* Reload the descriptor table. */
> -     movq    %rdi,%rax
> -     lgdt    (%rax)
> -     /* Flush the prefetch q. */
> -     jmp     1f
> -     nop
> -1:   /* Reload "stale" selectors. */
> -     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> -     movl    %eax,%ds
> -     movl    %eax,%es
> -     movl    %eax,%ss
> -     /* Reload code selector by doing intersegment return. */
> -     popq    %rax
> -     pushq   $GSEL(GCODE_SEL, SEL_KPL)
> -     pushq   %rax
> -     lretq
> -
> -ENTRY(setjmp)
> -     /*
> -      * Only save registers that must be preserved across function
> -      * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
> -      * and %rip.
> -      */
> -     movq    %rdi,%rax
> -     movq    %rbx,(%rax)
> -     movq    %rsp,8(%rax)
> -     movq    %rbp,16(%rax)
> -     movq    %r12,24(%rax)
> -     movq    %r13,32(%rax)
> -     movq    %r14,40(%rax)
> -     movq    %r15,48(%rax)
> -     movq    (%rsp),%rdx
> -     movq    %rdx,56(%rax)
> -     xorl    %eax,%eax
> -     ret
> -
> -ENTRY(longjmp)
> -     movq    %rdi,%rax
> -     movq    (%rax),%rbx
> -     movq    8(%rax),%rsp
> -     movq    16(%rax),%rbp
> -     movq    24(%rax),%r12
> -     movq    32(%rax),%r13
> -     movq    40(%rax),%r14
> -     movq    48(%rax),%r15
> -     movq    56(%rax),%rdx
> -     movq    %rdx,(%rsp)
> -     xorl    %eax,%eax
> -     incl    %eax
> -     ret
> -
> -/*****************************************************************************/
> -
> -/*
> - * int cpu_switchto(struct proc *old, struct proc *new)
> - * Switch from "old" proc to "new".
> - */
> -ENTRY(cpu_switchto)
> -     pushq   %rbx
> -     pushq   %rbp
> -     pushq   %r12
> -     pushq   %r13
> -     pushq   %r14
> -     pushq   %r15
> -
> -     movq    %rdi, %r13
> -     movq    %rsi, %r12
> -
> -     /* Record new proc. */
> -     movb    $SONPROC,P_STAT(%r12)   # p->p_stat = SONPROC
> -     SET_CURPROC(%r12,%rcx)
> -
> -     movl    CPUVAR(CPUID),%edi
> -
> -     /* If old proc exited, don't bother. */
> -     testq   %r13,%r13
> -     jz      switch_exited
> -
> -     /*
> -      * Save old context.
> -      *
> -      * Registers:
> -      *   %rax, %rcx - scratch
> -      *   %r13 - old proc, then old pcb
> -      *   %r12 - new proc
> -      *   %edi - cpuid
> -      */
> -
> -     movq    P_ADDR(%r13),%r13
> -
> -     /* clear the old pmap's bit for the cpu */
> -     movq    PCB_PMAP(%r13),%rcx
> -     lock
> -     btrq    %rdi,PM_CPUS(%rcx)
> -
> -     /* Save stack pointers. */
> -     movq    %rsp,PCB_RSP(%r13)
> -     movq    %rbp,PCB_RBP(%r13)
> -
> -switch_exited:
> -     /* did old proc run in userspace?  then reset the segment regs */
> -     btrl    $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
> -     jnc     restore_saved
> -
> -     /* set %ds, %es, and %fs to expected value to prevent info leak */
> -     movw    $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
> -     movw    %ax,%ds
> -     movw    %ax,%es
> -     movw    %ax,%fs
> -
> -restore_saved:
> -     /*
> -      * Restore saved context.
> -      *
> -      * Registers:
> -      *   %rax, %rcx, %rdx - scratch
> -      *   %r13 - new pcb
> -      *   %r12 - new process
> -      */
> -
> -     /* No interrupts while loading new state. */
> -     cli
> -     movq    P_ADDR(%r12),%r13
> -
> -     /* Restore stack pointers. */
> -     movq    PCB_RSP(%r13),%rsp
> -     movq    PCB_RBP(%r13),%rbp
> -
> -     movq    CPUVAR(TSS),%rcx
> -     movq    PCB_KSTACK(%r13),%rdx
> -     movq    %rdx,TSS_RSP0(%rcx)
> -
> -     movq    PCB_CR3(%r13),%rax
> -     movq    %rax,%cr3
> -
> -     /* Don't bother with the rest if switching to a system process. */
> -     testl   $P_SYSTEM,P_FLAG(%r12)
> -     jnz     switch_restored
> -
> -     /* set the new pmap's bit for the cpu */
> -     movl    CPUVAR(CPUID),%edi
> -     movq    PCB_PMAP(%r13),%rcx
> -     lock
> -     btsq    %rdi,PM_CPUS(%rcx)
> -#ifdef DIAGNOSTIC
> -     jc      _C_LABEL(switch_pmcpu_set)
> -#endif
> -
> -switch_restored:
> -     /* Restore cr0 (including FPU state). */
> -     movl    PCB_CR0(%r13),%ecx
> -#ifdef MULTIPROCESSOR
> -     movq    PCB_FPCPU(%r13),%r8
> -     cmpq    CPUVAR(SELF),%r8
> -     jz      1f
> -     orl     $CR0_TS,%ecx
> -1:
> -#endif
> -     movq    %rcx,%cr0
> -
> -     SET_CURPCB(%r13)
> -
> -     /* Interrupts are okay again. */
> -     sti
> -
> -switch_return:
> -
> -     popq    %r15
> -     popq    %r14
> -     popq    %r13
> -     popq    %r12
> -     popq    %rbp
> -     popq    %rbx
> -     ret
> -
> -ENTRY(cpu_idle_enter)
> -     movq    _C_LABEL(cpu_idle_enter_fcn),%rax
> -     cmpq    $0,%rax
> -     je      1f
> -     jmpq    *%rax
> -1:
> -     ret
> -
> -ENTRY(cpu_idle_cycle)
> -     movq    _C_LABEL(cpu_idle_cycle_fcn),%rax
> -     cmpq    $0,%rax
> -     je      1f
> -     call    *%rax
> -     ret
> -1:
> -     sti
> -     hlt
> -     ret
> -
> -ENTRY(cpu_idle_leave)
> -     movq    _C_LABEL(cpu_idle_leave_fcn),%rax
> -     cmpq    $0,%rax
> -     je      1f
> -     jmpq    *%rax
> -1:
> -     ret
> -
> -     .globl  _C_LABEL(panic)
> -
> -#ifdef DIAGNOSTIC
> -NENTRY(switch_pmcpu_set)
> -     movabsq $1f,%rdi
> -     call    _C_LABEL(panic)
> -     /* NOTREACHED */
> -1:   .asciz  "activate already active pmap"
> -#endif /* DIAGNOSTIC */
> -
> -/*
> - * savectx(struct pcb *pcb);
> - * Update pcb, saving current processor state.
> - */
> -ENTRY(savectx)
> -     /* Save stack pointers. */
> -     movq    %rsp,PCB_RSP(%rdi)
> -     movq    %rbp,PCB_RBP(%rdi)
> -
> -     ret
> -
> -IDTVEC(syscall32)
> -     sysret          /* go away please */
> -
> -/*
> - * syscall insn entry. This currently isn't much faster, but
> - * it can be made faster in the future.
> - */
> -IDTVEC(syscall)
> -     /*
> -      * Enter here with interrupts blocked; %rcx contains the caller's
> -      * %rip and the original rflags has been copied to %r11.  %cs and
> -      * %ss have been updated to the kernel segments, but %rsp is still
> -      * the user-space value.
> -      * First order of business is to swap to the kernel gs.base so that
> -      * we can access our struct cpu_info and use the scratch space there
> -      * to switch to our kernel stack.  Once that's in place we can
> -      * unblock interrupts and save the rest of the syscall frame.
> -      */
> -     swapgs
> -     movq    %r15,CPUVAR(SCRATCH)
> -     movq    CPUVAR(CURPCB),%r15
> -     movq    PCB_KSTACK(%r15),%r15
> -     xchgq   %r15,%rsp
> -     sti
> -
> -     /*
> -      * XXX don't need this whole frame, split of the
> -      * syscall frame and trapframe is needed.
> -      * First, leave some room for the trapno, error,
> -      * ss:rsp, etc, so that all GP registers can be
> -      * saved. Then, fill in the rest.
> -      */
> -     pushq   $(GSEL(GUDATA_SEL, SEL_UPL))
> -     pushq   %r15
> -     subq    $(TF_RSP-TF_TRAPNO),%rsp
> -     movq    CPUVAR(SCRATCH),%r15
> -     subq    $32,%rsp
> -     INTR_SAVE_GPRS
> -     movq    %r11, TF_RFLAGS(%rsp)   /* old rflags from syscall insn */
> -     movq    $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
> -     movq    %rcx,TF_RIP(%rsp)
> -     movq    $2,TF_ERR(%rsp)         /* ignored */
> -
> -     movq    CPUVAR(CURPROC),%r14
> -     movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
> -     andl    $~MDP_IRET,P_MD_FLAGS(%r14)
> -     movq    %rsp,%rdi
> -     call    _C_LABEL(syscall)
> -
> -.Lsyscall_check_asts:
> -     /* Check for ASTs on exit to user mode. */
> -     cli
> -     CHECK_ASTPENDING(%r11)
> -     je      2f
> -     CLEAR_ASTPENDING(%r11)
> -     sti
> -     movq    %rsp,%rdi
> -     call    _C_LABEL(ast)
> -     jmp     .Lsyscall_check_asts
> -
> -2:
> -#ifdef DIAGNOSTIC
> -     cmpl    $IPL_NONE,CPUVAR(ILEVEL)
> -     jne     .Lsyscall_spl_not_lowered
> -#endif /* DIAGNOSTIC */
> -
> -     /* Could registers have been changed that require an iretq? */
> -     testl   $MDP_IRET, P_MD_FLAGS(%r14)
> -     jne     intr_fast_exit
> -
> -     movq    TF_RDI(%rsp),%rdi
> -     movq    TF_RSI(%rsp),%rsi
> -     movq    TF_R8(%rsp),%r8
> -     movq    TF_R9(%rsp),%r9
> -     movq    TF_R10(%rsp),%r10
> -     movq    TF_R12(%rsp),%r12
> -     movq    TF_R13(%rsp),%r13
> -     movq    TF_R14(%rsp),%r14
> -     movq    TF_R15(%rsp),%r15
> -     movq    TF_RBP(%rsp),%rbp
> -     movq    TF_RBX(%rsp),%rbx
> -
> -     INTR_RESTORE_SELECTORS
> -
> -     movq    TF_RDX(%rsp),%rdx
> -     movq    TF_RAX(%rsp),%rax
> -
> -     movq    TF_RIP(%rsp),%rcx
> -     movq    TF_RFLAGS(%rsp),%r11
> -     movq    TF_RSP(%rsp),%rsp
> -     sysretq
> -
> -#ifdef DIAGNOSTIC
> -.Lsyscall_spl_not_lowered:
> -     movabsq $4f, %rdi
> -     movl    TF_RAX(%rsp),%esi
> -     movl    TF_RDI(%rsp),%edx
> -     movl    %ebx,%ecx
> -     movl    CPUVAR(ILEVEL),%r8d
> -     xorq    %rax,%rax
> -     call    _C_LABEL(printf)
> -#ifdef DDB
> -     int     $3
> -#endif /* DDB */
> -     movl    $IPL_NONE,CPUVAR(ILEVEL)
> -     jmp     .Lsyscall_check_asts
> -4:   .asciz  "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
> -#endif
> -
> -
> -NENTRY(proc_trampoline)
> -#ifdef MULTIPROCESSOR
> -     call    _C_LABEL(proc_trampoline_mp)
> -#endif
> -     movl    $IPL_NONE,CPUVAR(ILEVEL)
> -     movq    %r13,%rdi
> -     call    *%r12
> -     movq    CPUVAR(CURPROC),%r14
> -     jmp     .Lsyscall_check_asts
> -
> -
> -/*
> - * Return via iretq, for real interrupts and signal returns
> - */
> -NENTRY(intr_fast_exit)
> -     movq    TF_RDI(%rsp),%rdi
> -     movq    TF_RSI(%rsp),%rsi
> -     movq    TF_R8(%rsp),%r8
> -     movq    TF_R9(%rsp),%r9
> -     movq    TF_R10(%rsp),%r10
> -     movq    TF_R12(%rsp),%r12
> -     movq    TF_R13(%rsp),%r13
> -     movq    TF_R14(%rsp),%r14
> -     movq    TF_R15(%rsp),%r15
> -     movq    TF_RBP(%rsp),%rbp
> -     movq    TF_RBX(%rsp),%rbx
> -
> -     testq   $SEL_RPL,TF_CS(%rsp)
> -     je      5f
> -
> -     INTR_RESTORE_SELECTORS
> -
> -5:   movq    TF_RDX(%rsp),%rdx
> -     movq    TF_RCX(%rsp),%rcx
> -     movq    TF_R11(%rsp),%r11
> -     movq    TF_RAX(%rsp),%rax
> -
> -#if !defined(GPROF) && defined(DDBPROF)
> -     /*
> -      * If we are returning from a probe trap we need to fix the
> -      * stack layout and emulate the patched instruction.
> -      *
> -      * The code below does that by trashing %rax, so it MUST be
> -      * restored afterward.
> -      */
> -     cmpl    $INTR_FAKE_TRAP, TF_ERR(%rsp)
> -     je      .Lprobe_fixup
> -#endif /* !defined(GPROF) && defined(DDBPROF) */
> -
> -     addq    $TF_RIP,%rsp
> -
> -     .globl  _C_LABEL(doreti_iret)
> -_C_LABEL(doreti_iret):
> -     iretq
> -
> -
> -#if !defined(GPROF) && defined(DDBPROF)
> -.Lprobe_fixup:
> -     /* Reserve enough room to emulate "pushq %rbp". */
> -     subq    $16, %rsp
> -
> -     /* Shift hardware-saved registers. */
> -     movq    (TF_RIP + 16)(%rsp), %rax
> -     movq    %rax, TF_RIP(%rsp)
> -     movq    (TF_CS + 16)(%rsp), %rax
> -     movq    %rax, TF_CS(%rsp)
> -     movq    (TF_RFLAGS + 16)(%rsp), %rax
> -     movq    %rax, TF_RFLAGS(%rsp)
> -     movq    (TF_RSP + 16)(%rsp), %rax
> -     movq    %rax, TF_RSP(%rsp)
> -     movq    (TF_SS + 16)(%rsp), %rax
> -     movq    %rax, TF_SS(%rsp)
> -
> -     /* Pull 8 bytes off the stack and store %rbp in the expected location.*/
> -     movq    TF_RSP(%rsp), %rax
> -     subq    $8, %rax
> -     movq    %rax, TF_RSP(%rsp)
> -     movq    %rbp, (%rax)
> -
> -     /* Write back overwritten %rax */
> -     movq    (TF_RAX + 16)(%rsp),%rax
> -
> -     addq    $TF_RIP,%rsp
> -     iretq
> -#endif /* !defined(GPROF) && defined(DDBPROF) */
> -
> -ENTRY(pagezero)
> -     movq    $-PAGE_SIZE,%rdx
> -     subq    %rdx,%rdi
> -     xorq    %rax,%rax
> -1:
> -     movnti  %rax,(%rdi,%rdx)
> -     movnti  %rax,8(%rdi,%rdx)
> -     movnti  %rax,16(%rdi,%rdx)
> -     movnti  %rax,24(%rdi,%rdx)
> -     addq    $32,%rdx
> -     jne     1b
> -     sfence
> -     ret
> -
>       .section .codepatch,"a"
>       .align  8, 0xcc
>       .globl _C_LABEL(codepatch_begin)
> @@ -1228,20 +657,16 @@ _C_LABEL(codepatch_begin):
>  _C_LABEL(codepatch_end):
>       .previous
>  
> -#if NXEN > 0
> -     /* Hypercall page needs to be page aligned */
> -     .text
> -     .align  NBPG, 0xcc
> -     .globl  _C_LABEL(xen_hypercall_page)
> -_C_LABEL(xen_hypercall_page):
> -     .skip   0x1000, 0xcc
> -#endif /* NXEN > 0 */
> +     .data
> +
> +farjmp64:
> +     .long   longmode-KERNBASE
> +     .word   GSEL(GCODE_SEL, SEL_KPL)
> +
> +     .globl _C_LABEL(cpu_private)
> +     .comm _C_LABEL(cpu_private),NBPG,NBPG
> +
> +/* XXX we want some guard here */
> +     .space 512
> +tmpstk:
>  
> -#if NHYPERV > 0
> -     /* Hypercall page needs to be page aligned */
> -     .text
> -     .align  NBPG, 0xcc
> -     .globl  _C_LABEL(hv_hypercall_page)
> -_C_LABEL(hv_hypercall_page):
> -     .skip   0x1000, 0xcc
> -#endif /* NXEN > 0 */
> Index: arch/amd64/amd64/locore2.S
> ===================================================================
> RCS file: arch/amd64/amd64/locore2.S
> diff -N arch/amd64/amd64/locore2.S
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/amd64/amd64/locore2.S        29 May 2017 20:51:07 -0000
> @@ -0,0 +1,721 @@
> +/*   $OpenBSD: locore.S,v 1.84 2017/02/06 09:15:51 mpi Exp $ */
> +/*   $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $     */
> +
> +/*
> + * Copyright-o-rama!
> + */
> +
> +/*
> + * Copyright (c) 2001 Wasabi Systems, Inc.
> + * All rights reserved.
> + *
> + * Written by Frank van der Linden for Wasabi Systems, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. All advertising materials mentioning features or use of this software
> + *    must display the following acknowledgement:
> + *      This product includes software developed for the NetBSD Project by
> + *      Wasabi Systems, Inc.
> + * 4. The name of Wasabi Systems, Inc. may not be used to endorse
> + *    or promote products derived from this software without specific prior
> + *    written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + * POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +
> +/*-
> + * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
> + * All rights reserved.
> + *
> + * This code is derived from software contributed to The NetBSD Foundation
> + * by Charles M. Hannum.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
> + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
> LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + * POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/*-
> + * Copyright (c) 1990 The Regents of the University of California.
> + * All rights reserved.
> + *
> + * This code is derived from software contributed to Berkeley by
> + * William Jolitz.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of the University nor the names of its contributors
> + *    may be used to endorse or promote products derived from this software
> + *    without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + *   @(#)locore.s    7.3 (Berkeley) 5/13/91
> + */
> +
> +#include "assym.h"
> +#include "lapic.h"
> +#include "ksyms.h"
> +#include "xen.h"
> +#include "hyperv.h"
> +
> +#include <sys/syscall.h>
> +
> +#include <machine/param.h>
> +#include <machine/segments.h>
> +#include <machine/specialreg.h>
> +#include <machine/trap.h>
> +#include <machine/frameasm.h>
> +
> +#define SET_CURPROC(proc,cpu)                        \
> +     movq    CPUVAR(SELF),cpu        ;       \
> +     movq    proc,CPUVAR(CURPROC)      ;     \
> +     movq    cpu,P_CPU(proc)
> +
> +#define GET_CURPCB(reg)                      movq    CPUVAR(CURPCB),reg      
> +#define SET_CURPCB(reg)                      movq    reg,CPUVAR(CURPCB)
> +
> +#if NLAPIC > 0
> +#include <machine/i82489reg.h>
> +#endif
> +
> +/*
> + * override user-land alignment before including asm.h
> + */
> +#define      ALIGN_DATA      .align  8
> +#define ALIGN_TEXT   .align 16,0x90
> +#define _ALIGN_TEXT  ALIGN_TEXT
> +
> +#include <machine/asm.h>
> +
> +
> +
> +/*
> + * Initialization
> + */
> +     .data
> +
> +#if NLAPIC > 0 
> +     .align  NBPG, 0xcc
> +     .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr)
> +_C_LABEL(local_apic):
> +     .space  LAPIC_ID
> +_C_LABEL(lapic_id):
> +     .long   0x00000000
> +     .space  LAPIC_TPRI-(LAPIC_ID+4)
> +_C_LABEL(lapic_tpr):
> +     .space  LAPIC_PPRI-LAPIC_TPRI
> +_C_LABEL(lapic_ppr):
> +     .space  LAPIC_ISR-LAPIC_PPRI 
> +_C_LABEL(lapic_isr):
> +     .space  NBPG-LAPIC_ISR
> +#endif
> +
> +     .globl  _C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
> +     .globl  _C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
> +     .globl  _C_LABEL(cpu_ebxfeature)
> +     .globl  _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature)
> +     .globl  _C_LABEL(cpu_perf_eax)
> +     .globl  _C_LABEL(cpu_perf_ebx)
> +     .globl  _C_LABEL(cpu_perf_edx)
> +     .globl  _C_LABEL(cpu_apmi_edx)
> +     .globl  _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto)
> +     .globl  _C_LABEL(bootdev)
> +     .globl  _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase)
> +     .globl  _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
> +     .globl  _C_LABEL(biosbasemem),_C_LABEL(biosextmem)
> +     .globl  _C_LABEL(bootapiver)
> +     .globl  _C_LABEL(pg_nx)
> +_C_LABEL(cpu_id):    .long   0       # saved from `cpuid' instruction
> +_C_LABEL(cpu_feature):       .long   0       # feature flags from 'cpuid'
> +                                     #   instruction
> +_C_LABEL(cpu_ebxfeature):.long       0       # ext. ebx feature flags from 
> 'cpuid'
> +_C_LABEL(cpu_ecxfeature):.long       0       # ext. ecx feature flags from 
> 'cpuid'
> +_C_LABEL(ecpu_ecxfeature):.long      0       # extended ecx feature flags
> +_C_LABEL(cpu_perf_eax):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_perf_ebx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_perf_edx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_apmi_edx):      .long   0       # adv. power mgmt. info. from 
> 'cpuid'
> +_C_LABEL(cpuid_level):       .long   -1      # max. level accepted by 'cpuid'
> +                                     #   instruction
> +_C_LABEL(cpu_vendor):        .space  16      # vendor string returned by 
> `cpuid'
> +                                     #   instruction
> +_C_LABEL(ssym):              .quad   0       # ptr to start of syms
> +_C_LABEL(esym):              .quad   0       # ptr to end of syms
> +_C_LABEL(atdevbase): .quad   0       # location of start of iomem in virtual
> +_C_LABEL(bootapiver):        .long   0       # /boot API version
> +_C_LABEL(bootdev):   .long   0       # device we booted from
> +_C_LABEL(proc0paddr):        .quad   0
> +_C_LABEL(PTDpaddr):  .quad   0       # paddr of PTD, for libkvm
> +#ifndef REALBASEMEM
> +_C_LABEL(biosbasemem):       .long   0       # base memory reported by BIOS
> +#else
> +_C_LABEL(biosbasemem):       .long   REALBASEMEM
> +#endif
> +#ifndef REALEXTMEM
> +_C_LABEL(biosextmem):        .long   0       # extended memory reported by 
> BIOS
> +#else
> +_C_LABEL(biosextmem):        .long   REALEXTMEM
> +#endif
> +_C_LABEL(pg_nx):     .quad   0       # NX PTE bit (if CPU supports)
> +
> +#define      _RELOC(x)       ((x) - KERNBASE)
> +#define      RELOC(x)        _RELOC(_C_LABEL(x))
> +
> +     .globl  gdt64
> +
> +gdt64:
> +     .word   gdt64_end-gdt64_start-1
> +     .quad   _RELOC(gdt64_start)
> +.align 64, 0xcc
> +
> +gdt64_start:
> +     .quad 0x0000000000000000        /* always empty */
> +     .quad 0x00af9a000000ffff        /* kernel CS */
> +     .quad 0x00cf92000000ffff        /* kernel DS */
> +gdt64_end:
> +
> +/*
> + * Some hackage to deal with 64bit symbols in 32 bit mode.
> + * This may not be needed if things are cleaned up a little.
> + */
> +
> +/*****************************************************************************/
> +
> +/*
> + * Signal trampoline; copied to top of user stack.
> + * gdb's backtrace logic matches against the instructions in this.
> + */
> +     .section .rodata
> +     .globl  _C_LABEL(sigcode)
> +_C_LABEL(sigcode):
> +     call    *%rax
> +
> +     movq    %rsp,%rdi
> +     pushq   %rdi                    /* fake return address */
> +     movq    $SYS_sigreturn,%rax
> +     syscall
> +     .globl  _C_LABEL(sigcoderet)
> +_C_LABEL(sigcoderet):
> +     movq    $SYS_exit,%rax
> +     syscall
> +     .globl  _C_LABEL(esigcode)
> +_C_LABEL(esigcode):
> +
> +     .globl  _C_LABEL(sigfill)
> +_C_LABEL(sigfill):
> +     int3
> +_C_LABEL(esigfill):
> +     .globl  _C_LABEL(sigfillsiz)
> +_C_LABEL(sigfillsiz):
> +     .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
> +
> +     .text
> +     ALIGN_TEXT
> +
> +/*
> + * void lgdt(struct region_descriptor *rdp);
> + * Change the global descriptor table.
> + */
> +NENTRY(lgdt)
> +     /* Reload the descriptor table. */
> +     movq    %rdi,%rax
> +     lgdt    (%rax)
> +     /* Flush the prefetch q. */
> +     jmp     1f
> +     nop
> +1:   /* Reload "stale" selectors. */
> +     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> +     movl    %eax,%ds
> +     movl    %eax,%es
> +     movl    %eax,%ss
> +     /* Reload code selector by doing intersegment return. */
> +     popq    %rax
> +     pushq   $GSEL(GCODE_SEL, SEL_KPL)
> +     pushq   %rax
> +     lretq
> +
> +ENTRY(setjmp)
> +     /*
> +      * Only save registers that must be preserved across function
> +      * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
> +      * and %rip.
> +      */
> +     movq    %rdi,%rax
> +     movq    %rbx,(%rax)
> +     movq    %rsp,8(%rax)
> +     movq    %rbp,16(%rax)
> +     movq    %r12,24(%rax)
> +     movq    %r13,32(%rax)
> +     movq    %r14,40(%rax)
> +     movq    %r15,48(%rax)
> +     movq    (%rsp),%rdx
> +     movq    %rdx,56(%rax)
> +     xorl    %eax,%eax
> +     ret
> +
> +ENTRY(longjmp)
> +     movq    %rdi,%rax
> +     movq    (%rax),%rbx
> +     movq    8(%rax),%rsp
> +     movq    16(%rax),%rbp
> +     movq    24(%rax),%r12
> +     movq    32(%rax),%r13
> +     movq    40(%rax),%r14
> +     movq    48(%rax),%r15
> +     movq    56(%rax),%rdx
> +     movq    %rdx,(%rsp)
> +     xorl    %eax,%eax
> +     incl    %eax
> +     ret
> +
> +/*****************************************************************************/
> +
> +/*
> + * int cpu_switchto(struct proc *old, struct proc *new)
> + * Switch from "old" proc to "new".
> + */
> +ENTRY(cpu_switchto)
> +     pushq   %rbx
> +     pushq   %rbp
> +     pushq   %r12
> +     pushq   %r13
> +     pushq   %r14
> +     pushq   %r15
> +
> +     movq    %rdi, %r13
> +     movq    %rsi, %r12
> +
> +     /* Record new proc. */
> +     movb    $SONPROC,P_STAT(%r12)   # p->p_stat = SONPROC
> +     SET_CURPROC(%r12,%rcx)
> +
> +     movl    CPUVAR(CPUID),%edi
> +
> +     /* If old proc exited, don't bother. */
> +     testq   %r13,%r13
> +     jz      switch_exited
> +
> +     /*
> +      * Save old context.
> +      *
> +      * Registers:
> +      *   %rax, %rcx - scratch
> +      *   %r13 - old proc, then old pcb
> +      *   %r12 - new proc
> +      *   %edi - cpuid
> +      */
> +
> +     movq    P_ADDR(%r13),%r13
> +
> +     /* clear the old pmap's bit for the cpu */
> +     movq    PCB_PMAP(%r13),%rcx
> +     lock
> +     btrq    %rdi,PM_CPUS(%rcx)
> +
> +     /* Save stack pointers. */
> +     movq    %rsp,PCB_RSP(%r13)
> +     movq    %rbp,PCB_RBP(%r13)
> +
> +switch_exited:
> +     /* did old proc run in userspace?  then reset the segment regs */
> +     btrl    $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
> +     jnc     restore_saved
> +
> +     /* set %ds, %es, and %fs to expected value to prevent info leak */
> +     movw    $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
> +     movw    %ax,%ds
> +     movw    %ax,%es
> +     movw    %ax,%fs
> +
> +restore_saved:
> +     /*
> +      * Restore saved context.
> +      *
> +      * Registers:
> +      *   %rax, %rcx, %rdx - scratch
> +      *   %r13 - new pcb
> +      *   %r12 - new process
> +      */
> +
> +     /* No interrupts while loading new state. */
> +     cli
> +     movq    P_ADDR(%r12),%r13
> +
> +     /* Restore stack pointers. */
> +     movq    PCB_RSP(%r13),%rsp
> +     movq    PCB_RBP(%r13),%rbp
> +
> +     movq    CPUVAR(TSS),%rcx
> +     movq    PCB_KSTACK(%r13),%rdx
> +     movq    %rdx,TSS_RSP0(%rcx)
> +
> +     movq    PCB_CR3(%r13),%rax
> +     movq    %rax,%cr3
> +
> +     /* Don't bother with the rest if switching to a system process. */
> +     testl   $P_SYSTEM,P_FLAG(%r12)
> +     jnz     switch_restored
> +
> +     /* set the new pmap's bit for the cpu */
> +     movl    CPUVAR(CPUID),%edi
> +     movq    PCB_PMAP(%r13),%rcx
> +     lock
> +     btsq    %rdi,PM_CPUS(%rcx)
> +#ifdef DIAGNOSTIC
> +     jc      _C_LABEL(switch_pmcpu_set)
> +#endif
> +
> +switch_restored:
> +     /* Restore cr0 (including FPU state). */
> +     movl    PCB_CR0(%r13),%ecx
> +#ifdef MULTIPROCESSOR
> +     movq    PCB_FPCPU(%r13),%r8
> +     cmpq    CPUVAR(SELF),%r8
> +     jz      1f
> +     orl     $CR0_TS,%ecx
> +1:
> +#endif
> +     movq    %rcx,%cr0
> +
> +     SET_CURPCB(%r13)
> +
> +     /* Interrupts are okay again. */
> +     sti
> +
> +switch_return:
> +
> +     popq    %r15
> +     popq    %r14
> +     popq    %r13
> +     popq    %r12
> +     popq    %rbp
> +     popq    %rbx
> +     ret
> +
> +ENTRY(cpu_idle_enter)
> +     movq    _C_LABEL(cpu_idle_enter_fcn),%rax
> +     cmpq    $0,%rax
> +     je      1f
> +     jmpq    *%rax
> +1:
> +     ret
> +
> +ENTRY(cpu_idle_cycle)
> +     movq    _C_LABEL(cpu_idle_cycle_fcn),%rax
> +     cmpq    $0,%rax
> +     je      1f
> +     call    *%rax
> +     ret
> +1:
> +     sti
> +     hlt
> +     ret
> +
> +ENTRY(cpu_idle_leave)
> +     movq    _C_LABEL(cpu_idle_leave_fcn),%rax
> +     cmpq    $0,%rax
> +     je      1f
> +     jmpq    *%rax
> +1:
> +     ret
> +
> +     .globl  _C_LABEL(panic)
> +
> +#ifdef DIAGNOSTIC
> +NENTRY(switch_pmcpu_set)
> +     movabsq $1f,%rdi
> +     call    _C_LABEL(panic)
> +     /* NOTREACHED */
> +1:   .asciz  "activate already active pmap"
> +#endif /* DIAGNOSTIC */
> +
> +/*
> + * savectx(struct pcb *pcb);
> + * Update pcb, saving current processor state.
> + */
> +ENTRY(savectx)
> +     /* Save stack pointers. */
> +     movq    %rsp,PCB_RSP(%rdi)
> +     movq    %rbp,PCB_RBP(%rdi)
> +
> +     ret
> +
> +IDTVEC(syscall32)
> +     sysret          /* go away please */
> +
> +/*
> + * syscall insn entry. This currently isn't much faster, but
> + * it can be made faster in the future.
> + */
> +IDTVEC(syscall)
> +     /*
> +      * Enter here with interrupts blocked; %rcx contains the caller's
> +      * %rip and the original rflags has been copied to %r11.  %cs and
> +      * %ss have been updated to the kernel segments, but %rsp is still
> +      * the user-space value.
> +      * First order of business is to swap to the kernel gs.base so that
> +      * we can access our struct cpu_info and use the scratch space there
> +      * to switch to our kernel stack.  Once that's in place we can
> +      * unblock interrupts and save the rest of the syscall frame.
> +      */
> +     swapgs
> +     movq    %r15,CPUVAR(SCRATCH)
> +     movq    CPUVAR(CURPCB),%r15
> +     movq    PCB_KSTACK(%r15),%r15
> +     xchgq   %r15,%rsp
> +     sti
> +
> +     /*
> +      * XXX don't need this whole frame, split of the
> +      * syscall frame and trapframe is needed.
> +      * First, leave some room for the trapno, error,
> +      * ss:rsp, etc, so that all GP registers can be
> +      * saved. Then, fill in the rest.
> +      */
> +     pushq   $(GSEL(GUDATA_SEL, SEL_UPL))
> +     pushq   %r15
> +     subq    $(TF_RSP-TF_TRAPNO),%rsp
> +     movq    CPUVAR(SCRATCH),%r15
> +     subq    $32,%rsp
> +     INTR_SAVE_GPRS
> +     movq    %r11, TF_RFLAGS(%rsp)   /* old rflags from syscall insn */
> +     movq    $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
> +     movq    %rcx,TF_RIP(%rsp)
> +     movq    $2,TF_ERR(%rsp)         /* ignored */
> +
> +     movq    CPUVAR(CURPROC),%r14
> +     movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
> +     andl    $~MDP_IRET,P_MD_FLAGS(%r14)
> +     movq    %rsp,%rdi
> +     call    _C_LABEL(syscall)
> +
> +.Lsyscall_check_asts:
> +     /* Check for ASTs on exit to user mode. */
> +     cli
> +     CHECK_ASTPENDING(%r11)
> +     je      2f
> +     CLEAR_ASTPENDING(%r11)
> +     sti
> +     movq    %rsp,%rdi
> +     call    _C_LABEL(ast)
> +     jmp     .Lsyscall_check_asts
> +
> +2:
> +#ifdef DIAGNOSTIC
> +     cmpl    $IPL_NONE,CPUVAR(ILEVEL)
> +     jne     .Lsyscall_spl_not_lowered
> +#endif /* DIAGNOSTIC */
> +
> +     /* Could registers have been changed that require an iretq? */
> +     testl   $MDP_IRET, P_MD_FLAGS(%r14)
> +     jne     intr_fast_exit
> +
> +     movq    TF_RDI(%rsp),%rdi
> +     movq    TF_RSI(%rsp),%rsi
> +     movq    TF_R8(%rsp),%r8
> +     movq    TF_R9(%rsp),%r9
> +     movq    TF_R10(%rsp),%r10
> +     movq    TF_R12(%rsp),%r12
> +     movq    TF_R13(%rsp),%r13
> +     movq    TF_R14(%rsp),%r14
> +     movq    TF_R15(%rsp),%r15
> +     movq    TF_RBP(%rsp),%rbp
> +     movq    TF_RBX(%rsp),%rbx
> +
> +     INTR_RESTORE_SELECTORS
> +
> +     movq    TF_RDX(%rsp),%rdx
> +     movq    TF_RAX(%rsp),%rax
> +
> +     movq    TF_RIP(%rsp),%rcx
> +     movq    TF_RFLAGS(%rsp),%r11
> +     movq    TF_RSP(%rsp),%rsp
> +     sysretq
> +
> +#ifdef DIAGNOSTIC
> +.Lsyscall_spl_not_lowered:
> +     movabsq $4f, %rdi
> +     movl    TF_RAX(%rsp),%esi
> +     movl    TF_RDI(%rsp),%edx
> +     movl    %ebx,%ecx
> +     movl    CPUVAR(ILEVEL),%r8d
> +     xorq    %rax,%rax
> +     call    _C_LABEL(printf)
> +#ifdef DDB
> +     int     $3
> +#endif /* DDB */
> +     movl    $IPL_NONE,CPUVAR(ILEVEL)
> +     jmp     .Lsyscall_check_asts
> +4:   .asciz  "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
> +#endif
> +
> +
> +NENTRY(proc_trampoline)
> +#ifdef MULTIPROCESSOR
> +     call    _C_LABEL(proc_trampoline_mp)
> +#endif
> +     movl    $IPL_NONE,CPUVAR(ILEVEL)
> +     movq    %r13,%rdi
> +     call    *%r12
> +     movq    CPUVAR(CURPROC),%r14
> +     jmp     .Lsyscall_check_asts
> +
> +
> +/*
> + * Return via iretq, for real interrupts and signal returns
> + */
> +NENTRY(intr_fast_exit)
> +     movq    TF_RDI(%rsp),%rdi
> +     movq    TF_RSI(%rsp),%rsi
> +     movq    TF_R8(%rsp),%r8
> +     movq    TF_R9(%rsp),%r9
> +     movq    TF_R10(%rsp),%r10
> +     movq    TF_R12(%rsp),%r12
> +     movq    TF_R13(%rsp),%r13
> +     movq    TF_R14(%rsp),%r14
> +     movq    TF_R15(%rsp),%r15
> +     movq    TF_RBP(%rsp),%rbp
> +     movq    TF_RBX(%rsp),%rbx
> +
> +     testq   $SEL_RPL,TF_CS(%rsp)
> +     je      5f
> +
> +     INTR_RESTORE_SELECTORS
> +
> +5:   movq    TF_RDX(%rsp),%rdx
> +     movq    TF_RCX(%rsp),%rcx
> +     movq    TF_R11(%rsp),%r11
> +     movq    TF_RAX(%rsp),%rax
> +
> +#if !defined(GPROF) && defined(DDBPROF)
> +     /*
> +      * If we are returning from a probe trap we need to fix the
> +      * stack layout and emulate the patched instruction.
> +      *
> +      * The code below does that by trashing %rax, so it MUST be
> +      * restored afterward.
> +      */
> +     cmpl    $INTR_FAKE_TRAP, TF_ERR(%rsp)
> +     je      .Lprobe_fixup
> +#endif /* !defined(GPROF) && defined(DDBPROF) */
> +
> +     addq    $TF_RIP,%rsp
> +
> +     .globl  _C_LABEL(doreti_iret)
> +_C_LABEL(doreti_iret):
> +     iretq
> +
> +
> +#if !defined(GPROF) && defined(DDBPROF)
> +.Lprobe_fixup:
> +     /* Reserve enough room to emulate "pushq %rbp". */
> +     subq    $16, %rsp
> +
> +     /* Shift hardware-saved registers. */
> +     movq    (TF_RIP + 16)(%rsp), %rax
> +     movq    %rax, TF_RIP(%rsp)
> +     movq    (TF_CS + 16)(%rsp), %rax
> +     movq    %rax, TF_CS(%rsp)
> +     movq    (TF_RFLAGS + 16)(%rsp), %rax
> +     movq    %rax, TF_RFLAGS(%rsp)
> +     movq    (TF_RSP + 16)(%rsp), %rax
> +     movq    %rax, TF_RSP(%rsp)
> +     movq    (TF_SS + 16)(%rsp), %rax
> +     movq    %rax, TF_SS(%rsp)
> +
> +     /* Pull 8 bytes off the stack and store %rbp in the expected location.*/
> +     movq    TF_RSP(%rsp), %rax
> +     subq    $8, %rax
> +     movq    %rax, TF_RSP(%rsp)
> +     movq    %rbp, (%rax)
> +
> +     /* Write back overwritten %rax */
> +     movq    (TF_RAX + 16)(%rsp),%rax
> +
> +     addq    $TF_RIP,%rsp
> +     iretq
> +#endif /* !defined(GPROF) && defined(DDBPROF) */
> +
> +ENTRY(pagezero)
> +     movq    $-PAGE_SIZE,%rdx
> +     subq    %rdx,%rdi
> +     xorq    %rax,%rax
> +1:
> +     movnti  %rax,(%rdi,%rdx)
> +     movnti  %rax,8(%rdi,%rdx)
> +     movnti  %rax,16(%rdi,%rdx)
> +     movnti  %rax,24(%rdi,%rdx)
> +     addq    $32,%rdx
> +     jne     1b
> +     sfence
> +     ret
> +
> +#if NXEN > 0
> +     /* Hypercall page needs to be page aligned */
> +     .text
> +     .align  NBPG, 0xcc
> +     .globl  _C_LABEL(xen_hypercall_page)
> +_C_LABEL(xen_hypercall_page):
> +     .skip   0x1000, 0xcc
> +#endif /* NXEN > 0 */
> +
> +#if NHYPERV > 0
> +     /* Hypercall page needs to be page aligned */
> +     .text
> +     .align  NBPG, 0xcc
> +     .globl  _C_LABEL(hv_hypercall_page)
> +_C_LABEL(hv_hypercall_page):
> +     .skip   0x1000, 0xcc
> +#endif /* NXEN > 0 */
> Index: arch/amd64/conf/Makefile.amd64
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/conf/Makefile.amd64,v
> retrieving revision 1.76
> diff -u -p -u -r1.76 Makefile.amd64
> --- arch/amd64/conf/Makefile.amd64    8 May 2017 00:13:38 -0000       1.76
> +++ arch/amd64/conf/Makefile.amd64    30 May 2017 07:28:14 -0000
> @@ -30,6 +30,7 @@ CWARNFLAGS= -Werror -Wall -Wimplicit-fun
>  CMACHFLAGS=  -mcmodel=kernel -mno-red-zone -mno-sse2 -mno-sse -mno-3dnow \
>               -mno-mmx -msoft-float -fno-omit-frame-pointer
>  CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS}
> +SORTR=               sort -R
>  .if ${IDENT:M-DNO_PROPOLICE}
>  CMACHFLAGS+= -fno-stack-protector
>  .endif
> @@ -38,6 +39,7 @@ CMACHFLAGS+=        -msave-args
>  .endif
>  .if ${IDENT:M-DSMALL_KERNEL}
>  CMACHFLAGS+= -Wa,-n
> +SORTR=               cat
>  .endif
>  
>  DEBUG?=              -g
> @@ -73,12 +75,13 @@ NORMAL_S= ${CC} ${AFLAGS} ${CPPFLAGS} -c
>  #    ${SYSTEM_LD_HEAD}
>  #    ${SYSTEM_LD} swapxxx.o
>  #    ${SYSTEM_LD_TAIL}
> -SYSTEM_HEAD= locore.o param.o ioconf.o
> -SYSTEM_OBJ=  ${SYSTEM_HEAD} ${OBJS}
> +SYSTEM_HEAD= locore.o gap.o
> +SYSTEM_OBJ=  ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o
>  SYSTEM_DEP=  Makefile ${SYSTEM_OBJ} ${LDSCRIPT}
>  SYSTEM_LD_HEAD=      @rm -f $@
>  SYSTEM_LD=   @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o 
> $${OBJS}'; \
> -             ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS}
> +             echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > 
> lorder; \
> +             ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder`
>  SYSTEM_LD_TAIL=      @${SIZE} $@; chmod 755 $@
>  
>  .if ${DEBUG} == "-g"
> @@ -122,8 +125,16 @@ vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP}
>       sh $S/conf/newvers.sh
>       ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
>  
> +gap.S: ${SYSTEM_SWAP_DEP} Makefile
> +     #echo "#include <machine/asm.h>\n\t.text\n\t.space 
> $$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl 
> endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 
> 16,0xcc" > gap.S
> +     sh $S/conf/makegap.sh > gap.S
> +
> +gap.o:       gap.S
> +     ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S
> +
>  clean:
> -     rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c
> +     rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} \
> +         gap.S ldorder param.c
>  
>  cleandir: clean
>       rm -f Makefile *.h ioconf.c options machine ${_mach} vers.c
> @@ -136,7 +147,8 @@ db_structinfo.h: $S/ddb/db_structinfo.c 
>       rm -f db_structinfo.o
>  
>  locore.o: ${_machdir}/${_mach}/locore.S assym.h
> -mutex.o vector.o copy.o spl.o mptramp.o acpi_wakecode.o vmm_support.o: 
> assym.h
> +locore2.o mutex.o vector.o copy.o spl.o: assym.h
> +mptramp.o acpi_wakecode.o vmm_support.o: assym.h
>  
>  # The install target can be redefined by putting a
>  # install-kernel-${MACHINE_NAME} target into /etc/mk.conf
> Index: arch/amd64/conf/files.amd64
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/conf/files.amd64,v
> retrieving revision 1.88
> diff -u -p -u -r1.88 files.amd64
> --- arch/amd64/conf/files.amd64       30 Apr 2017 13:04:49 -0000      1.88
> +++ arch/amd64/conf/files.amd64       28 May 2017 13:19:03 -0000
> @@ -11,6 +11,7 @@ file        arch/amd64/amd64/machdep.c
>  file arch/amd64/amd64/hibernate_machdep.c    hibernate
>  file arch/amd64/amd64/identcpu.c
>  file arch/amd64/amd64/via.c
> +file arch/amd64/amd64/locore2.S
>  file arch/amd64/amd64/aes_intel.S            crypto
>  file arch/amd64/amd64/aesni.c                crypto
>  file arch/amd64/amd64/amd64errata.c
> Index: arch/i386/conf/Makefile.i386
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/conf/Makefile.i386,v
> retrieving revision 1.103
> diff -u -p -u -r1.103 Makefile.i386
> --- arch/i386/conf/Makefile.i386      28 May 2017 13:20:37 -0000      1.103
> +++ arch/i386/conf/Makefile.i386      30 May 2017 07:26:53 -0000
> @@ -29,9 +29,13 @@ CWARNFLAGS=        -Werror -Wall -Wimplicit-fun
>  
>  CMACHFLAGS=
>  CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS}
> +SORTR=               sort -R
>  .if ${IDENT:M-DNO_PROPOLICE}
>  CMACHFLAGS+= -fno-stack-protector
>  .endif
> + .if ${IDENT:M-DSMALL_KERNEL}
> +SORTR=               cat
> +.endif
>  
>  DEBUG?=              -g
>  COPTS?=              -O2
> @@ -72,12 +76,13 @@ NORMAL_S= ${CC} ${AFLAGS} ${CPPFLAGS} -c
>  #    ${SYSTEM_LD_HEAD}
>  #    ${SYSTEM_LD} swapxxx.o
>  #    ${SYSTEM_LD_TAIL}
> -SYSTEM_HEAD= locore.o param.o ioconf.o
> -SYSTEM_OBJ=  ${SYSTEM_HEAD} ${OBJS}
> +SYSTEM_HEAD= locore.o gap.o
> +SYSTEM_OBJ=  ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o
>  SYSTEM_DEP=  Makefile ${SYSTEM_OBJ} ${LDSCRIPT}
>  SYSTEM_LD_HEAD=      @rm -f $@
>  SYSTEM_LD=   @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o 
> $${OBJS}'; \
> -             ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS}
> +             echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > 
> lorder; \
> +             ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder`
>  SYSTEM_LD_TAIL=      @${SIZE} $@; chmod 755 $@
>  
>  .if ${DEBUG} == "-g"
> @@ -120,6 +125,13 @@ ioconf.o: ioconf.c
>  vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP}
>       sh $S/conf/newvers.sh
>       ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
> +
> +gap.S:       ${SYSTEM_SWAP_DEP} Makefile
> +     #echo "#include <machine/asm.h>\n\t.text\n\t.space 
> $$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl 
> endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 
> 16,0xcc" > gap.S
> +     sh $S/conf/makegap.sh > gap.S
> +
> +gap.o:       gap.S
> +     ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S
>  
>  clean:
>       rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c
> Index: arch/i386/conf/files.i386
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/conf/files.i386,v
> retrieving revision 1.232
> diff -u -p -u -r1.232 files.i386
> --- arch/i386/conf/files.i386 30 Apr 2017 13:04:49 -0000      1.232
> +++ arch/i386/conf/files.i386 29 May 2017 12:27:14 -0000
> @@ -23,6 +23,7 @@ file        arch/i386/i386/in_cksum.s
>  file arch/i386/i386/machdep.c
>  file arch/i386/i386/hibernate_machdep.c hibernate
>  file arch/i386/i386/via.c
> +file arch/i386/i386/locore2.S
>  file arch/i386/i386/amd64errata.c    !small_kernel
>  file arch/i386/i386/longrun.c        !small_kernel
>  file arch/i386/i386/mem.c
> Index: arch/i386/i386/autoconf.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/autoconf.c,v
> retrieving revision 1.101
> diff -u -p -u -r1.101 autoconf.c
> --- arch/i386/i386/autoconf.c 8 Jun 2016 17:24:44 -0000       1.101
> +++ arch/i386/i386/autoconf.c 29 May 2017 13:07:46 -0000
> @@ -109,6 +109,21 @@ void             viac3_crypto_setup(void);
>  extern int   i386_has_xcrypt;
>  #endif
>  
> +void
> +unmap_startup(void)
> +{
> +     extern void *kernel_text, *endboot;
> +     vaddr_t p;
> +
> +     printf("unmap kernel init code %lx-%lx\n",
> +         (vaddr_t)&kernel_text, (vaddr_t)&endboot);
> +     p = (vaddr_t)&kernel_text;
> +     do {
> +             pmap_kremove(p, PAGE_SIZE);
> +             p += NBPG;
> +     } while (p < (vaddr_t)&endboot);
> +}
> +
>  /*
>   * Determine i/o configuration for a machine.
>   */
> @@ -154,6 +169,8 @@ cpu_configure(void)
>  
>       proc0.p_addr->u_pcb.pcb_cr0 = rcr0();
>  
> +     unmap_startup();
> +
>  #ifdef MULTIPROCESSOR
>       /* propagate TSS configuration to the idle pcb's. */
>       cpu_init_idle_pcbs();
> @@ -165,6 +182,7 @@ cpu_configure(void)
>        * until we can checksum blocks to figure it out.
>        */
>       cold = 0;
> +
>  
>       /*
>        * At this point the RNG is running, and if FSXR is set we can
> Index: arch/i386/i386/locore.s
> ===================================================================
> RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v
> retrieving revision 1.173
> diff -u -p -u -r1.173 locore.s
> --- arch/i386/i386/locore.s   12 May 2017 08:46:28 -0000      1.173
> +++ arch/i386/i386/locore.s   30 May 2017 07:53:26 -0000
> @@ -55,206 +55,20 @@
>  
>  #include <dev/isa/isareg.h>
>  
> -#if NLAPIC > 0
> -#include <machine/i82489reg.h>
> -#endif
> -
> -#ifndef SMALL_KERNEL
> -/*
> - * As stac/clac SMAP instructions are 3 bytes, we want the fastest
> - * 3 byte nop sequence possible here.  This will be replaced by
> - * stac/clac instructions if SMAP is detected after booting.
> - *
> - * Intel documents multi-byte NOP sequences as being available
> - * on all family 0x6 and 0xf processors (ie 686+)
> - * So use 3 of the single byte nops for compatibility
> - */
> -#define SMAP_NOP     .byte 0x90, 0x90, 0x90
> -#define SMAP_STAC    CODEPATCH_START                 ;\
> -                     SMAP_NOP                        ;\
> -                     CODEPATCH_END(CPTAG_STAC)
> -#define SMAP_CLAC    CODEPATCH_START                 ;\
> -                     SMAP_NOP                        ;\
> -                     CODEPATCH_END(CPTAG_CLAC)
> -
> -#else
> -
> -#define SMAP_STAC
> -#define SMAP_CLAC
> -
> -#endif
> -
> -
>  /*
>   * override user-land alignment before including asm.h
>   */
>  
>  #define      ALIGN_DATA      .align  4
>  #define      ALIGN_TEXT      .align  4,0x90  /* 4-byte boundaries, 
> NOP-filled */
> -#define      SUPERALIGN_TEXT .align  16,0x90 /* 16-byte boundaries better 
> for 486 */
>  #define _ALIGN_TEXT  ALIGN_TEXT
>  #include <machine/asm.h>
>  
> -#define CPL _C_LABEL(lapic_tpr)
> -
> -#define      GET_CURPCB(reg)                                 \
> -     movl    CPUVAR(CURPCB), reg
> -
> -#define      CHECK_ASTPENDING(treg)                          \
> -     movl    CPUVAR(CURPROC),treg            ;       \
> -     cmpl    $0, treg                        ;       \
> -     je      1f                              ;       \
> -     cmpl    $0,P_MD_ASTPENDING(treg)        ;       \
> -     1:
> -
> -#define      CLEAR_ASTPENDING(cpreg)                         \
> -     movl    $0,P_MD_ASTPENDING(cpreg)
> -
> -/*
> - * These are used on interrupt or trap entry or exit.
> - */
> -#define      INTRENTRY \
> -     cld                     ; \
> -     pushl   %eax            ; \
> -     pushl   %ecx            ; \
> -     pushl   %edx            ; \
> -     pushl   %ebx            ; \
> -     pushl   %ebp            ; \
> -     pushl   %esi            ; \
> -     pushl   %edi            ; \
> -     pushl   %ds             ; \
> -     pushl   %es             ; \
> -     pushl   %gs             ; \
> -     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax  ; \
> -     movw    %ax,%ds         ; \
> -     movw    %ax,%es         ; \
> -     xorl    %eax,%eax       ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \
> -     movw    %ax,%gs         ; \
> -     pushl   %fs             ; \
> -     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax   ; \
> -     movw    %ax,%fs
> -
> -#define      INTR_RESTORE_ALL \
> -     popl    %fs             ; \
> -     popl    %gs             ; \
> -     popl    %es             ; \
> -     popl    %ds             ; \
> -     popl    %edi            ; \
> -     popl    %esi            ; \
> -     popl    %ebp            ; \
> -     popl    %ebx            ; \
> -     popl    %edx            ; \
> -     popl    %ecx            ; \
> -     popl    %eax
> -
> -#define      INTRFASTEXIT \
> -     INTR_RESTORE_ALL        ;\
> -     addl    $8,%esp         ; \
> -     iret
> -
> -#define      INTR_FAKE_TRAP  0xbadabada
> -
> -/*
> - * PTmap is recursive pagemap at top of virtual address space.
> - * Within PTmap, the page directory can be found (third indirection).
> - */
> -     .globl  _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde)
> -     .set    _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT)
> -     .set    _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG)
> -     .set    _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4)      # XXX 4 
> == sizeof pde
> -
> -/*
> - * APTmap, APTD is the alternate recursive pagemap.
> - * It's used when modifying another process's page tables.
> - */
> -     .globl  _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde)
> -     .set    _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT)
> -     .set    _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG)
> -     # XXX 4 == sizeof pde
> -     .set    _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4)
> -
>  /*
>   * Initialization
>   */
>       .data
>  
> -     .globl  _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor)
> -     .globl  _C_LABEL(cpu_brandstr)
> -     .globl  _C_LABEL(cpuid_level)
> -     .globl  _C_LABEL(cpu_miscinfo)
> -     .globl  _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature)
> -     .globl  _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature)
> -     .globl  _C_LABEL(ecpu_ecxfeature)
> -     .globl  _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx)
> -     .globl  _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx)
> -     .globl  _C_LABEL(cpu_perf_eax)
> -     .globl  _C_LABEL(cpu_perf_ebx)
> -     .globl  _C_LABEL(cpu_perf_edx)
> -     .globl  _C_LABEL(cpu_apmi_edx)
> -     .globl  _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem)
> -     .globl  _C_LABEL(cpu_pae)
> -     .globl  _C_LABEL(esym)
> -     .globl  _C_LABEL(ssym)
> -     .globl  _C_LABEL(nkptp_max)
> -     .globl  _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase)
> -     .globl  _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize)
> -     .globl  _C_LABEL(gdt)
> -     .globl  _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
> -     .globl  _C_LABEL(lapic_tpr)
> -
> -#if NLAPIC > 0
> -     .align NBPG
> -     .globl _C_LABEL(local_apic), _C_LABEL(lapic_id)
> -_C_LABEL(local_apic):
> -     .space  LAPIC_ID
> -_C_LABEL(lapic_id):
> -     .long   0x00000000
> -     .space  LAPIC_TPRI-(LAPIC_ID+4)
> -_C_LABEL(lapic_tpr):
> -     .space  LAPIC_PPRI-LAPIC_TPRI
> -_C_LABEL(lapic_ppr):
> -     .space  LAPIC_ISR-LAPIC_PPRI
> -_C_LABEL(lapic_isr):
> -     .space  NBPG-LAPIC_ISR
> -#else
> -_C_LABEL(lapic_tpr):
> -     .long   0
> -#endif
> -
> -_C_LABEL(cpu):               .long   0       # are we 386, 386sx, 486, 586 
> or 686
> -_C_LABEL(cpu_id):    .long   0       # saved from 'cpuid' instruction
> -_C_LABEL(cpu_pae):   .long   0       # are we using PAE paging mode?
> -_C_LABEL(cpu_miscinfo):      .long   0       # misc info (apic/brand id) 
> from 'cpuid'
> -_C_LABEL(cpu_feature):       .long   0       # feature flags from 'cpuid' 
> instruction
> -_C_LABEL(ecpu_feature): .long        0       # extended feature flags from 
> 'cpuid'
> -_C_LABEL(cpu_ecxfeature):.long       0       # ecx feature flags from 'cpuid'
> -_C_LABEL(ecpu_eaxfeature): .long 0   # extended eax feature flags
> -_C_LABEL(ecpu_ecxfeature): .long 0   # extended ecx feature flags
> -_C_LABEL(cpuid_level):       .long   -1      # max. lvl accepted by 'cpuid' 
> insn
> -_C_LABEL(cpu_cache_eax):.long        0
> -_C_LABEL(cpu_cache_ebx):.long        0
> -_C_LABEL(cpu_cache_ecx):.long        0
> -_C_LABEL(cpu_cache_edx):.long        0
> -_C_LABEL(cpu_perf_eax):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_perf_ebx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_perf_edx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> -_C_LABEL(cpu_apmi_edx):      .long   0       # adv. power management info. 
> 'cpuid'
> -_C_LABEL(cpu_vendor): .space 16      # vendor string returned by 'cpuid' 
> instruction
> -_C_LABEL(cpu_brandstr):      .space 48 # brand string returned by 'cpuid'
> -_C_LABEL(cold):              .long   1       # cold till we are not
> -_C_LABEL(ssym):              .long   0       # ptr to start of syms
> -_C_LABEL(esym):              .long   0       # ptr to end of syms
> -_C_LABEL(cnvmem):    .long   0       # conventional memory size
> -_C_LABEL(extmem):    .long   0       # extended memory size
> -_C_LABEL(atdevbase): .long   0       # location of start of iomem in virtual
> -_C_LABEL(bootapiver):        .long   0       # /boot API version
> -_C_LABEL(bootargc):  .long   0       # /boot argc
> -_C_LABEL(bootargv):  .long   0       # /boot argv
> -_C_LABEL(bootdev):   .long   0       # device we booted from
> -_C_LABEL(proc0paddr):        .long   0
> -_C_LABEL(PTDpaddr):  .long   0       # paddr of PTD, for libkvm
> -_C_LABEL(PTDsize):   .long   NBPG    # size of PTD, for libkvm
> -
>       .space 512
>  tmpstk:
>  
> @@ -666,1092 +480,3 @@ begin:
>  
>       call    _C_LABEL(main)
>       /* NOTREACHED */
> -
> -NENTRY(proc_trampoline)
> -#ifdef MULTIPROCESSOR
> -     call    _C_LABEL(proc_trampoline_mp)
> -#endif
> -     movl    $IPL_NONE,CPL
> -     pushl   %ebx
> -     call    *%esi
> -     addl    $4,%esp
> -     INTRFASTEXIT
> -     /* NOTREACHED */
> -
> -     /* This must come before any use of the CODEPATCH macros */
> -       .section .codepatch,"a"
> -       .align  8
> -       .globl _C_LABEL(codepatch_begin)
> -_C_LABEL(codepatch_begin):
> -       .previous
> -
> -       .section .codepatchend,"a"
> -       .globl _C_LABEL(codepatch_end)
> -_C_LABEL(codepatch_end):
> -       .previous
> -
> -/*****************************************************************************/
> -
> -/*
> - * Signal trampoline; copied to top of user stack.
> - */
> -     .section .rodata
> -     .globl  _C_LABEL(sigcode)
> -_C_LABEL(sigcode):
> -     call    *SIGF_HANDLER(%esp)
> -     leal    SIGF_SC(%esp),%eax      # scp (the call may have clobbered the
> -                                     # copy at SIGF_SCP(%esp))
> -     pushl   %eax
> -     pushl   %eax                    # junk to fake return address
> -     movl    $SYS_sigreturn,%eax
> -     int     $0x80                   # enter kernel with args on stack
> -     .globl  _C_LABEL(sigcoderet)
> -_C_LABEL(sigcoderet):
> -     movl    $SYS_exit,%eax
> -     int     $0x80                   # exit if sigreturn fails
> -     .globl  _C_LABEL(esigcode)
> -_C_LABEL(esigcode):
> -
> -     .globl  _C_LABEL(sigfill)
> -_C_LABEL(sigfill):
> -     int3
> -_C_LABEL(esigfill):
> -
> -     .data
> -     .globl  _C_LABEL(sigfillsiz)
> -_C_LABEL(sigfillsiz):
> -     .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
> -
> -     .text
> -
> -/*****************************************************************************/
> -
> -/*
> - * The following primitives are used to fill and copy regions of memory.
> - */
> -
> -/* Frame pointer reserve on stack. */
> -#ifdef DDB
> -#define FPADD 4
> -#else
> -#define FPADD 0
> -#endif
> -
> -/*
> - * kcopy(caddr_t from, caddr_t to, size_t len);
> - * Copy len bytes, abort on fault.
> - */
> -ENTRY(kcopy)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -     GET_CURPCB(%eax)                # load curpcb into eax and set on-fault
> -     pushl   PCB_ONFAULT(%eax)
> -     movl    $_C_LABEL(copy_fault), PCB_ONFAULT(%eax)
> -
> -     movl    16+FPADD(%esp),%esi
> -     movl    20+FPADD(%esp),%edi
> -     movl    24+FPADD(%esp),%ecx
> -     movl    %edi,%eax
> -     subl    %esi,%eax
> -     cmpl    %ecx,%eax               # overlapping?
> -     jb      1f
> -     shrl    $2,%ecx                 # nope, copy forward by 32-bit words
> -     rep
> -     movsl
> -     movl    24+FPADD(%esp),%ecx
> -     andl    $3,%ecx                 # any bytes left?
> -     rep
> -     movsb
> -
> -     GET_CURPCB(%edx)                # XXX save curpcb?
> -     popl    PCB_ONFAULT(%edx)
> -     popl    %edi
> -     popl    %esi
> -     xorl    %eax,%eax
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -     ALIGN_TEXT
> -1:   addl    %ecx,%edi               # copy backward
> -     addl    %ecx,%esi
> -     std
> -     andl    $3,%ecx                 # any fractional bytes?
> -     decl    %edi
> -     decl    %esi
> -     rep
> -     movsb
> -     movl    24+FPADD(%esp),%ecx     # copy remainder by 32-bit words
> -     shrl    $2,%ecx
> -     subl    $3,%esi
> -     subl    $3,%edi
> -     rep
> -     movsl
> -     cld
> -
> -     GET_CURPCB(%edx)
> -     popl    PCB_ONFAULT(%edx)
> -     popl    %edi
> -     popl    %esi
> -     xorl    %eax,%eax
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -     
> -/*****************************************************************************/
> -
> -/*
> - * The following primitives are used to copy data in and out of the user's
> - * address space.
> - */
> -
> -/*
> - * copyout(caddr_t from, caddr_t to, size_t len);
> - * Copy len bytes into the user's address space.
> - */
> -ENTRY(copyout)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -     pushl   $0      
> -     
> -     movl    16+FPADD(%esp),%esi
> -     movl    20+FPADD(%esp),%edi
> -     movl    24+FPADD(%esp),%eax
> -
> -     /*
> -      * We check that the end of the destination buffer is not past the end
> -      * of the user's address space.  If it's not, then we only need to
> -      * check that each page is writable.  The 486 will do this for us; the
> -      * 386 will not.  (We assume that pages in user space that are not
> -      * writable by the user are not writable by the kernel either.)
> -      */
> -     movl    %edi,%edx
> -     addl    %eax,%edx
> -     jc      _C_LABEL(copy_fault)
> -     cmpl    $VM_MAXUSER_ADDRESS,%edx
> -     ja      _C_LABEL(copy_fault)
> -
> -     GET_CURPCB(%edx)
> -     movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
> -     SMAP_STAC
> -
> -     /* bcopy(%esi, %edi, %eax); */
> -     movl    %eax,%ecx
> -     shrl    $2,%ecx
> -     rep
> -     movsl
> -     movl    %eax,%ecx
> -     andl    $3,%ecx
> -     rep
> -     movsb
> -
> -     SMAP_CLAC
> -     popl    PCB_ONFAULT(%edx)
> -     popl    %edi
> -     popl    %esi
> -     xorl    %eax,%eax
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -/*
> - * copyin(caddr_t from, caddr_t to, size_t len);
> - * Copy len bytes from the user's address space.
> - */
> -ENTRY(copyin)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -     GET_CURPCB(%eax)
> -     pushl   $0
> -     movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
> -     SMAP_STAC
> -     
> -     movl    16+FPADD(%esp),%esi
> -     movl    20+FPADD(%esp),%edi
> -     movl    24+FPADD(%esp),%eax
> -
> -     /*
> -      * We check that the end of the destination buffer is not past the end
> -      * of the user's address space.  If it's not, then we only need to
> -      * check that each page is readable, and the CPU will do that for us.
> -      */
> -     movl    %esi,%edx
> -     addl    %eax,%edx
> -     jc      _C_LABEL(copy_fault)
> -     cmpl    $VM_MAXUSER_ADDRESS,%edx
> -     ja      _C_LABEL(copy_fault)
> -
> -     /* bcopy(%esi, %edi, %eax); */
> -     movl    %eax,%ecx
> -     shrl    $2,%ecx
> -     rep
> -     movsl
> -     movb    %al,%cl
> -     andb    $3,%cl
> -     rep
> -     movsb
> -
> -     SMAP_CLAC
> -     GET_CURPCB(%edx)
> -     popl    PCB_ONFAULT(%edx)
> -     popl    %edi
> -     popl    %esi
> -     xorl    %eax,%eax
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -ENTRY(copy_fault)
> -     SMAP_CLAC
> -     GET_CURPCB(%edx)
> -     popl    PCB_ONFAULT(%edx)
> -     popl    %edi
> -     popl    %esi
> -     movl    $EFAULT,%eax
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -/*
> - * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> - * Copy a NUL-terminated string, at most maxlen characters long, into the
> - * user's address space.  Return the number of characters copied (including 
> the
> - * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
> - * return 0 or EFAULT.
> - */
> -ENTRY(copyoutstr)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -
> -     movl    12+FPADD(%esp),%esi             # esi = from
> -     movl    16+FPADD(%esp),%edi             # edi = to
> -     movl    20+FPADD(%esp),%edx             # edx = maxlen
> -
> -5:   GET_CURPCB(%eax)
> -     movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
> -     SMAP_STAC
> -     /*
> -      * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
> -      */
> -     movl    $VM_MAXUSER_ADDRESS,%eax
> -     subl    %edi,%eax
> -     jbe     _C_LABEL(copystr_fault)         # die if CF == 1 || ZF == 1
> -                                             # i.e. make sure that %edi
> -                                             # is below VM_MAXUSER_ADDRESS
> -
> -     cmpl    %edx,%eax
> -     jae     1f
> -     movl    %eax,%edx
> -     movl    %eax,20+FPADD(%esp)
> -
> -1:   incl    %edx
> -
> -1:   decl    %edx
> -     jz      2f
> -     lodsb
> -     stosb
> -     testb   %al,%al
> -     jnz     1b
> -
> -     /* Success -- 0 byte reached. */
> -     decl    %edx
> -     xorl    %eax,%eax
> -     jmp     copystr_return
> -
> -2:   /* edx is zero -- return EFAULT or ENAMETOOLONG. */
> -     cmpl    $VM_MAXUSER_ADDRESS,%edi
> -     jae     _C_LABEL(copystr_fault)
> -     movl    $ENAMETOOLONG,%eax
> -     jmp     copystr_return
> -
> -/*
> - * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> - * Copy a NUL-terminated string, at most maxlen characters long, from the
> - * user's address space.  Return the number of characters copied (including 
> the
> - * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
> - * return 0 or EFAULT.
> - */
> -ENTRY(copyinstr)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -     GET_CURPCB(%ecx)
> -     movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
> -     SMAP_STAC
> -
> -     movl    12+FPADD(%esp),%esi             # %esi = from
> -     movl    16+FPADD(%esp),%edi             # %edi = to
> -     movl    20+FPADD(%esp),%edx             # %edx = maxlen
> -
> -     /*
> -      * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
> -      */
> -     movl    $VM_MAXUSER_ADDRESS,%eax
> -     subl    %esi,%eax
> -     jbe     _C_LABEL(copystr_fault)         # Error if CF == 1 || ZF == 1
> -                                             # i.e. make sure that %esi
> -                                             # is below VM_MAXUSER_ADDRESS
> -     cmpl    %edx,%eax
> -     jae     1f
> -     movl    %eax,%edx
> -     movl    %eax,20+FPADD(%esp)
> -
> -1:   incl    %edx
> -
> -1:   decl    %edx
> -     jz      2f
> -     lodsb
> -     stosb
> -     testb   %al,%al
> -     jnz     1b
> -
> -     /* Success -- 0 byte reached. */
> -     decl    %edx
> -     xorl    %eax,%eax
> -     jmp     copystr_return
> -
> -2:   /* edx is zero -- return EFAULT or ENAMETOOLONG. */
> -     cmpl    $VM_MAXUSER_ADDRESS,%esi
> -     jae     _C_LABEL(copystr_fault)
> -     movl    $ENAMETOOLONG,%eax
> -     jmp     copystr_return
> -
> -ENTRY(copystr_fault)
> -     movl    $EFAULT,%eax
> -
> -copystr_return:
> -     SMAP_CLAC
> -     /* Set *lencopied and return %eax. */
> -     GET_CURPCB(%ecx)
> -     movl    $0,PCB_ONFAULT(%ecx)
> -     movl    20+FPADD(%esp),%ecx
> -     subl    %edx,%ecx
> -     movl    24+FPADD(%esp),%edx
> -     testl   %edx,%edx
> -     jz      8f
> -     movl    %ecx,(%edx)
> -
> -8:   popl    %edi
> -     popl    %esi
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -/*
> - * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> - * Copy a NUL-terminated string, at most maxlen characters long.  Return the
> - * number of characters copied (including the NUL) in *lencopied.  If the
> - * string is too long, return ENAMETOOLONG; else return 0.
> - */
> -ENTRY(copystr)
> -#ifdef DDB
> -     pushl   %ebp
> -     movl    %esp,%ebp
> -#endif
> -     pushl   %esi
> -     pushl   %edi
> -
> -     movl    12+FPADD(%esp),%esi             # esi = from
> -     movl    16+FPADD(%esp),%edi             # edi = to
> -     movl    20+FPADD(%esp),%edx             # edx = maxlen
> -     incl    %edx
> -
> -1:   decl    %edx
> -     jz      4f
> -     lodsb
> -     stosb
> -     testb   %al,%al
> -     jnz     1b
> -
> -     /* Success -- 0 byte reached. */
> -     decl    %edx
> -     xorl    %eax,%eax
> -     jmp     6f
> -
> -4:   /* edx is zero -- return ENAMETOOLONG. */
> -     movl    $ENAMETOOLONG,%eax
> -
> -6:   /* Set *lencopied and return %eax. */
> -     movl    20+FPADD(%esp),%ecx
> -     subl    %edx,%ecx
> -     movl    24+FPADD(%esp),%edx
> -     testl   %edx,%edx
> -     jz      7f
> -     movl    %ecx,(%edx)
> -
> -7:   popl    %edi
> -     popl    %esi
> -#ifdef DDB
> -     leave
> -#endif
> -     ret
> -
> -/*****************************************************************************/
> -
> -/*
> - * The following is i386-specific nonsense.
> - */
> -
> -/*
> - * void lgdt(struct region_descriptor *rdp);
> - * Change the global descriptor table.
> - */
> -NENTRY(lgdt)
> -     /* Reload the descriptor table. */
> -     movl    4(%esp),%eax
> -     lgdt    (%eax)
> -     /* Flush the prefetch q. */
> -     jmp     1f
> -     nop
> -1:   /* Reload "stale" selectors. */
> -     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> -     movw    %ax,%ds
> -     movw    %ax,%es
> -     movw    %ax,%ss
> -     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
> -     movw    %ax,%fs
> -     /* Reload code selector by doing intersegment return. */
> -     popl    %eax
> -     pushl   $GSEL(GCODE_SEL, SEL_KPL)
> -     pushl   %eax
> -     lret
> -
> -ENTRY(setjmp)
> -     movl    4(%esp),%eax
> -     movl    %ebx,(%eax)             # save ebx
> -     movl    %esp,4(%eax)            # save esp
> -     movl    %ebp,8(%eax)            # save ebp
> -     movl    %esi,12(%eax)           # save esi
> -     movl    %edi,16(%eax)           # save edi
> -     movl    (%esp),%edx             # get rta
> -     movl    %edx,20(%eax)           # save eip
> -     xorl    %eax,%eax               # return (0);
> -     ret
> -
> -ENTRY(longjmp)
> -     movl    4(%esp),%eax
> -     movl    (%eax),%ebx             # restore ebx
> -     movl    4(%eax),%esp            # restore esp
> -     movl    8(%eax),%ebp            # restore ebp
> -     movl    12(%eax),%esi           # restore esi
> -     movl    16(%eax),%edi           # restore edi
> -     movl    20(%eax),%edx           # get rta
> -     movl    %edx,(%esp)             # put in return frame
> -     xorl    %eax,%eax               # return (1);
> -     incl    %eax
> -     ret
> -
> -/*****************************************************************************/
> -             
> -/*
> - * cpu_switchto(struct proc *old, struct proc *new)
> - * Switch from the "old" proc to the "new" proc. If "old" is NULL, we
> - * don't need to bother saving old context.
> - */
> -ENTRY(cpu_switchto)
> -     pushl   %ebx
> -     pushl   %esi
> -     pushl   %edi
> -
> -     movl    16(%esp), %esi
> -     movl    20(%esp), %edi
> -
> -     /* If old process exited, don't bother. */
> -     testl   %esi,%esi
> -     jz      switch_exited
> -
> -     /* Save old stack pointers. */
> -     movl    P_ADDR(%esi),%ebx
> -     movl    %esp,PCB_ESP(%ebx)
> -     movl    %ebp,PCB_EBP(%ebx)
> -
> -switch_exited:
> -     /* Restore saved context. */
> -
> -     /* No interrupts while loading new state. */
> -     cli
> -
> -     /* Record new process. */
> -     movl    %edi, CPUVAR(CURPROC)
> -     movb    $SONPROC, P_STAT(%edi)
> -
> -     /* Restore stack pointers. */
> -     movl    P_ADDR(%edi),%ebx
> -     movl    PCB_ESP(%ebx),%esp
> -     movl    PCB_EBP(%ebx),%ebp
> -
> -     /* Record new pcb. */
> -     movl    %ebx, CPUVAR(CURPCB)
> -
> -     /*
> -      * Activate the address space.  The pcb copy of %cr3 will
> -      * be refreshed from the pmap, and because we're
> -      * curproc they'll both be reloaded into the CPU.
> -      */
> -     pushl   %edi
> -     pushl   %esi
> -     call    _C_LABEL(pmap_switch)
> -     addl    $8,%esp
> -
> -     /* Load TSS info. */
> -     movl    CPUVAR(GDT),%eax
> -     movl    P_MD_TSS_SEL(%edi),%edx
> -
> -     /* Switch TSS. */
> -     andl    $~0x0200,4-SEL_KPL(%eax,%edx,1)
> -     ltr     %dx
> -
> -     /* Restore cr0 (including FPU state). */
> -     movl    PCB_CR0(%ebx),%ecx
> -#ifdef MULTIPROCESSOR
> -     /*
> -      * If our floating point registers are on a different CPU,
> -      * clear CR0_TS so we'll trap rather than reuse bogus state.
> -      */
> -     movl    CPUVAR(SELF), %esi
> -     cmpl    PCB_FPCPU(%ebx), %esi
> -     jz      1f
> -     orl     $CR0_TS,%ecx
> -1:   
> -#endif       
> -     movl    %ecx,%cr0
> -
> -     /* Interrupts are okay again. */
> -     sti
> -
> -     popl    %edi
> -     popl    %esi
> -     popl    %ebx
> -     ret
> -
> -ENTRY(cpu_idle_enter)
> -     movl    _C_LABEL(cpu_idle_enter_fcn),%eax
> -     cmpl    $0,%eax
> -     je      1f
> -     jmpl    *%eax
> -1:
> -     ret
> -
> -ENTRY(cpu_idle_cycle)
> -     movl    _C_LABEL(cpu_idle_cycle_fcn),%eax
> -     cmpl    $0,%eax
> -     je      1f
> -     call    *%eax
> -     ret
> -1:
> -     sti
> -     hlt
> -     ret
> -
> -ENTRY(cpu_idle_leave)
> -     movl    _C_LABEL(cpu_idle_leave_fcn),%eax
> -     cmpl    $0,%eax
> -     je      1f
> -     jmpl    *%eax
> -1:
> -     ret
> -
> -/*
> - * savectx(struct pcb *pcb);
> - * Update pcb, saving current processor state.
> - */
> -ENTRY(savectx)
> -     movl    4(%esp),%edx            # edx = p->p_addr
> -
> -     /* Save stack pointers. */
> -     movl    %esp,PCB_ESP(%edx)
> -     movl    %ebp,PCB_EBP(%edx)
> -
> -     movl    PCB_FLAGS(%edx),%ecx
> -     orl     $PCB_SAVECTX,%ecx
> -     movl    %ecx,PCB_FLAGS(%edx)
> -
> -     ret
> -
> -/*****************************************************************************/
> -
> -/*
> - * Trap and fault vector routines
> - *
> - * On exit from the kernel to user mode, we always need to check for ASTs.  
> In
> - * addition, we need to do this atomically; otherwise an interrupt may occur
> - * which causes an AST, but it won't get processed until the next kernel 
> entry
> - * (possibly the next clock tick).  Thus, we disable interrupt before 
> checking,
> - * and only enable them again on the final `iret' or before calling the AST
> - * handler.
> - */
> -#define      IDTVEC(name)    ALIGN_TEXT; .globl X##name; X##name:
> -
> -#define      TRAP(a)         pushl $(a) ; jmp _C_LABEL(alltraps)
> -#define      ZTRAP(a)        pushl $0 ; TRAP(a)
> -
> -
> -     .text
> -IDTVEC(div)
> -     ZTRAP(T_DIVIDE)
> -IDTVEC(dbg)
> -     subl    $4,%esp
> -     pushl   %eax
> -     movl    %dr6,%eax
> -     movl    %eax,4(%esp)
> -     andb    $~0xf,%al
> -     movl    %eax,%dr6
> -     popl    %eax
> -     TRAP(T_TRCTRAP)
> -IDTVEC(nmi)
> -     ZTRAP(T_NMI)
> -IDTVEC(bpt)
> -     ZTRAP(T_BPTFLT)
> -IDTVEC(ofl)
> -     ZTRAP(T_OFLOW)
> -IDTVEC(bnd)
> -     ZTRAP(T_BOUND)
> -IDTVEC(ill)
> -     ZTRAP(T_PRIVINFLT)
> -IDTVEC(dna)
> -#if NNPX > 0
> -     pushl   $0                      # dummy error code
> -     pushl   $T_DNA
> -     INTRENTRY
> -#ifdef MULTIPROCESSOR
> -     pushl   CPUVAR(SELF)
> -#else
> -     pushl   $_C_LABEL(cpu_info_primary)
> -#endif
> -     call    *_C_LABEL(npxdna_func)
> -     addl    $4,%esp
> -     testl   %eax,%eax
> -     jz      calltrap
> -     INTRFASTEXIT
> -#else
> -     ZTRAP(T_DNA)
> -#endif
> -IDTVEC(dble)
> -     TRAP(T_DOUBLEFLT)
> -IDTVEC(fpusegm)
> -     ZTRAP(T_FPOPFLT)
> -IDTVEC(tss)
> -     TRAP(T_TSSFLT)
> -IDTVEC(missing)
> -     TRAP(T_SEGNPFLT)
> -IDTVEC(stk)
> -     TRAP(T_STKFLT)
> -IDTVEC(prot)
> -     TRAP(T_PROTFLT)
> -IDTVEC(f00f_redirect)
> -     pushl   $T_PAGEFLT
> -     INTRENTRY
> -     testb   $PGEX_U,TF_ERR(%esp)
> -     jnz     calltrap
> -     movl    %cr2,%eax
> -     subl    _C_LABEL(idt),%eax
> -     cmpl    $(6*8),%eax
> -     jne     calltrap
> -     movb    $T_PRIVINFLT,TF_TRAPNO(%esp)
> -     jmp     calltrap
> -IDTVEC(page)
> -     TRAP(T_PAGEFLT)
> -IDTVEC(rsvd)
> -     ZTRAP(T_RESERVED)
> -IDTVEC(mchk)
> -     ZTRAP(T_MACHK)
> -IDTVEC(simd)
> -     ZTRAP(T_XFTRAP)
> -IDTVEC(intrspurious)
> -     /*
> -      * The Pentium Pro local APIC may erroneously call this vector for a
> -      * default IR7.  Just ignore it.
> -      *
> -      * (The local APIC does this when CPL is raised while it's on the
> -      * way to delivering an interrupt.. presumably enough has been set
> -      * up that it's inconvenient to abort delivery completely..)
> -      */
> -     iret
> -IDTVEC(fpu)
> -#if NNPX > 0
> -     /*
> -      * Handle like an interrupt so that we can call npxintr to clear the
> -      * error.  It would be better to handle npx interrupts as traps but
> -      * this is difficult for nested interrupts.
> -      */
> -     subl    $8,%esp                 /* space for tf_{err,trapno} */
> -     INTRENTRY
> -     pushl   CPL                     # if_ppl in intrframe
> -     pushl   %esp                    # push address of intrframe
> -     incl    _C_LABEL(uvmexp)+V_TRAP
> -     call    _C_LABEL(npxintr)
> -     addl    $8,%esp                 # pop address and if_ppl
> -     INTRFASTEXIT
> -#else
> -     ZTRAP(T_ARITHTRAP)
> -#endif
> -IDTVEC(align)
> -     ZTRAP(T_ALIGNFLT)
> -     /* 18 - 31 reserved for future exp */
> -
> -/*
> - * If an error is detected during trap, syscall, or interrupt exit, trap() 
> will
> - * change %eip to point to one of these labels.  We clean up the stack, if
> - * necessary, and resume as if we were handling a general protection fault.
> - * This will cause the process to get a SIGBUS.
> - */
> -NENTRY(resume_iret)
> -     ZTRAP(T_PROTFLT)
> -NENTRY(resume_pop_ds)
> -     pushl   %es
> -     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> -     movw    %ax,%es
> -NENTRY(resume_pop_es)
> -     pushl   %gs
> -     xorl    %eax,%eax       /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */
> -     movw    %ax,%gs
> -NENTRY(resume_pop_gs)
> -     pushl   %fs
> -     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
> -     movw    %ax,%fs
> -NENTRY(resume_pop_fs)
> -     movl    $T_PROTFLT,TF_TRAPNO(%esp)
> -     sti
> -     jmp     calltrap
> -
> -/*
> - * All traps go through here. Call the generic trap handler, and
> - * check for ASTs afterwards.
> - */
> -NENTRY(alltraps)
> -     INTRENTRY
> -     sti
> -calltrap:
> -#ifdef DIAGNOSTIC
> -     movl    CPL,%ebx
> -#endif /* DIAGNOSTIC */
> -#if !defined(GPROF) && defined(DDBPROF)
> -     cmpl    $T_BPTFLT,TF_TRAPNO(%esp)
> -     jne     .Lreal_trap
> -
> -     pushl   %esp
> -     call    _C_LABEL(db_prof_hook)
> -     addl    $4,%esp
> -     cmpl    $1,%eax
> -     jne     .Lreal_trap
> -
> -     /*
> -      * Abuse the error field to indicate that INTRFASTEXIT needs
> -      * to emulate the patched instruction.
> -      */
> -     movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
> -     jz      2f
> -.Lreal_trap:
> -#endif /* !defined(GPROF) && defined(DDBPROF) */
> -     pushl   %esp
> -     call    _C_LABEL(trap)
> -     addl    $4,%esp
> -2:   /* Check for ASTs on exit to user mode. */
> -     cli
> -     CHECK_ASTPENDING(%ecx)
> -     je      1f
> -     testb   $SEL_RPL,TF_CS(%esp)
> -#ifdef VM86
> -     jnz     5f
> -     testl   $PSL_VM,TF_EFLAGS(%esp)
> -#endif
> -     jz      1f
> -5:   CLEAR_ASTPENDING(%ecx)
> -     sti
> -     pushl   %esp
> -     call    _C_LABEL(ast)
> -     addl    $4,%esp
> -     jmp     2b
> -1:
> -#if !defined(GPROF) && defined(DDBPROF)
> -     /*
> -      * If we are returning from a probe trap we need to fix the
> -      * stack layout and emulate the patched instruction.
> -      *
> -      * The code below does that by trashing %eax, so it MUST be
> -      * restored afterward.
> -      */
> -     cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
> -     je      .Lprobe_fixup
> -#endif /* !defined(GPROF) && defined(DDBPROF) */
> -#ifndef DIAGNOSTIC
> -     INTRFASTEXIT
> -#else
> -     cmpl    CPL,%ebx
> -     jne     3f
> -     INTRFASTEXIT
> -3:   sti
> -     pushl   $4f
> -     call    _C_LABEL(printf)
> -     addl    $4,%esp
> -#if defined(DDB) && 0
> -     int     $3
> -#endif /* DDB */
> -     movl    %ebx,CPL
> -     jmp     2b
> -4:   .asciz  "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
> -#endif /* DIAGNOSTIC */
> -
> -#if !defined(GPROF) && defined(DDBPROF)
> -.Lprobe_fixup:
> -     /* Restore all register unwinding the stack. */
> -     INTR_RESTORE_ALL
> -
> -     /*
> -      * Use the space left by ``err'' and ``trapno'' to emulate
> -      * "pushl %ebp".
> -      *
> -      * Temporarily save %eax.
> -      */
> -     movl    %eax,0(%esp)
> -
> -     /* Shift hardware-saved registers: eip, cs, eflags */
> -     movl    8(%esp),%eax
> -     movl    %eax,4(%esp)
> -     movl    12(%esp),%eax
> -     movl    %eax,8(%esp)
> -     movl    16(%esp),%eax
> -     movl    %eax,12(%esp)
> -
> -     /* Store %ebp in the expected location to finish the emulation. */
> -     movl    %ebp,16(%esp)
> -
> -     popl    %eax
> -     iret
> -#endif /* !defined(GPROF) && defined(DDBPROF) */
> -/*
> - * Trap gate entry for syscall
> - */
> -IDTVEC(syscall)
> -     subl    $8,%esp                 /* space for tf_{err,trapno} */
> -     INTRENTRY
> -     pushl   %esp
> -     call    _C_LABEL(syscall)
> -     addl    $4,%esp
> -2:   /* Check for ASTs on exit to user mode. */
> -     cli
> -     CHECK_ASTPENDING(%ecx)
> -     je      1f
> -     /* Always returning to user mode here. */
> -     CLEAR_ASTPENDING(%ecx)
> -     sti
> -     pushl   %esp
> -     call    _C_LABEL(ast)
> -     addl    $4,%esp
> -     jmp     2b
> -1:   INTRFASTEXIT
> -
> -#include <i386/i386/vector.s>
> -#include <i386/isa/icu.s>
> -
> -/*
> - * bzero (void *b, size_t len)
> - *   write len zero bytes to the string b.
> - */
> -
> -ENTRY(bzero)
> -     pushl   %edi
> -     movl    8(%esp),%edi
> -     movl    12(%esp),%edx
> -
> -     xorl    %eax,%eax               /* set fill data to 0 */
> -
> -     /*
> -      * if the string is too short, it's really not worth the overhead
> -      * of aligning to word boundaries, etc.  So we jump to a plain
> -      * unaligned set.
> -      */
> -     cmpl    $16,%edx
> -     jb      7f
> -
> -     movl    %edi,%ecx               /* compute misalignment */
> -     negl    %ecx
> -     andl    $3,%ecx
> -     subl    %ecx,%edx
> -     rep                             /* zero until word aligned */
> -     stosb
> -
> -     cmpl    $CPUCLASS_486,_C_LABEL(cpu_class)
> -     jne     8f
> -
> -     movl    %edx,%ecx
> -     shrl    $6,%ecx
> -     jz      8f
> -     andl    $63,%edx
> -1:   movl    %eax,(%edi)
> -     movl    %eax,4(%edi)
> -     movl    %eax,8(%edi)
> -     movl    %eax,12(%edi)
> -     movl    %eax,16(%edi)
> -     movl    %eax,20(%edi)
> -     movl    %eax,24(%edi)
> -     movl    %eax,28(%edi)
> -     movl    %eax,32(%edi)
> -     movl    %eax,36(%edi)
> -     movl    %eax,40(%edi)
> -     movl    %eax,44(%edi)
> -     movl    %eax,48(%edi)
> -     movl    %eax,52(%edi)
> -     movl    %eax,56(%edi)
> -     movl    %eax,60(%edi)
> -     addl    $64,%edi
> -     decl    %ecx
> -     jnz     1b
> -
> -8:   movl    %edx,%ecx               /* zero by words */
> -     shrl    $2,%ecx
> -     andl    $3,%edx
> -     rep
> -     stosl
> -
> -7:   movl    %edx,%ecx               /* zero remainder bytes */
> -     rep
> -     stosb
> -
> -     popl    %edi
> -     ret
> -
> -#if !defined(SMALL_KERNEL)
> -ENTRY(sse2_pagezero)
> -     pushl   %ebx
> -     movl    8(%esp),%ecx
> -     movl    %ecx,%eax
> -     addl    $4096,%eax
> -     xor     %ebx,%ebx
> -1:
> -     movnti  %ebx,(%ecx)
> -     addl    $4,%ecx
> -     cmpl    %ecx,%eax
> -     jne     1b
> -     sfence
> -     popl    %ebx
> -     ret
> -
> -ENTRY(i686_pagezero)
> -     pushl   %edi
> -     pushl   %ebx
> -
> -     movl    12(%esp), %edi
> -     movl    $1024, %ecx
> -
> -     ALIGN_TEXT
> -1:
> -     xorl    %eax, %eax
> -     repe
> -     scasl
> -     jnz     2f
> -
> -     popl    %ebx
> -     popl    %edi
> -     ret
> -
> -     ALIGN_TEXT
> -
> -2:
> -     incl    %ecx
> -     subl    $4, %edi
> -
> -     movl    %ecx, %edx
> -     cmpl    $16, %ecx
> -
> -     jge     3f
> -
> -     movl    %edi, %ebx
> -     andl    $0x3f, %ebx
> -     shrl    %ebx
> -     shrl    %ebx
> -     movl    $16, %ecx
> -     subl    %ebx, %ecx
> -
> -3:
> -     subl    %ecx, %edx
> -     rep
> -     stosl
> -
> -     movl    %edx, %ecx
> -     testl   %edx, %edx
> -     jnz     1b
> -
> -     popl    %ebx
> -     popl    %edi
> -     ret
> -#endif
> -
> -/*
> - * int cpu_paenable(void *);
> - */
> -ENTRY(cpu_paenable)
> -     movl    $-1, %eax
> -     testl   $CPUID_PAE, _C_LABEL(cpu_feature)
> -     jz      1f
> -
> -     pushl   %esi
> -     pushl   %edi
> -     movl    12(%esp), %esi
> -     movl    %cr3, %edi
> -     orl     $0xfe0, %edi    /* PDPT will be in the last four slots! */
> -     movl    %edi, %cr3
> -     addl    $KERNBASE, %edi /* and make it back virtual again */
> -     movl    $8, %ecx
> -     rep
> -     movsl
> -
> -     movl    $MSR_EFER, %ecx
> -     rdmsr
> -     orl     $EFER_NXE, %eax
> -     wrmsr
> -
> -     movl    %cr4, %eax
> -     orl     $CR4_PAE, %eax
> -     movl    %eax, %cr4      /* BANG!!! */
> -
> -     movl    12(%esp), %eax
> -     subl    $KERNBASE, %eax
> -     movl    %eax, %cr3      /* reload real PDPT */
> -     movl    $4*NBPG, %eax
> -     movl    %eax, _C_LABEL(PTDsize)
> -
> -     xorl    %eax, %eax
> -     popl    %edi
> -     popl    %esi
> -1:
> -     ret
> -
> -#if NLAPIC > 0
> -#include <i386/i386/apicvec.s>
> -#endif
> -
> -#include <i386/i386/mutex.S>
> -
> -.globl _C_LABEL(_stac)
> -_C_LABEL(_stac):
> -     stac
> -
> -.globl _C_LABEL(_clac)
> -_C_LABEL(_clac):
> -     clac
> Index: arch/i386/i386/locore2.S
> ===================================================================
> RCS file: arch/i386/i386/locore2.S
> diff -N arch/i386/i386/locore2.S
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/i386/i386/locore2.S  30 May 2017 07:52:22 -0000
> @@ -0,0 +1,1346 @@
> +/*   $OpenBSD: locore.s,v 1.173 2017/05/12 08:46:28 mpi Exp $        */
> +/*   $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $    */
> +
> +/*-
> + * Copyright (c) 1993, 1994, 1995 Charles M. Hannum.  All rights reserved.
> + * Copyright (c) 1990 The Regents of the University of California.
> + * All rights reserved.
> + *
> + * This code is derived from software contributed to Berkeley by
> + * William Jolitz.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of the University nor the names of its contributors
> + *    may be used to endorse or promote products derived from this software
> + *    without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + *   @(#)locore.s    7.3 (Berkeley) 5/13/91
> + */
> +
> +#include "npx.h"
> +#include "assym.h"
> +#include "apm.h"
> +#include "lapic.h"
> +#include "ksyms.h"
> +
> +#include <sys/errno.h>
> +#include <sys/syscall.h>
> +
> +#include <machine/codepatch.h>
> +#include <machine/cputypes.h>
> +#include <machine/param.h>
> +#include <machine/pte.h>
> +#include <machine/segments.h>
> +#include <machine/specialreg.h>
> +#include <machine/trap.h>
> +
> +#include <dev/isa/isareg.h>
> +
> +#if NLAPIC > 0
> +#include <machine/i82489reg.h>
> +#endif
> +
> +#ifndef SMALL_KERNEL
> +/*
> + * As stac/clac SMAP instructions are 3 bytes, we want the fastest
> + * 3 byte nop sequence possible here.  This will be replaced by
> + * stac/clac instructions if SMAP is detected after booting.
> + *
> + * Intel documents multi-byte NOP sequences as being available
> + * on all family 0x6 and 0xf processors (ie 686+)
> + * So use 3 of the single byte nops for compatibility
> + */
> +#define SMAP_NOP     .byte 0x90, 0x90, 0x90
> +#define SMAP_STAC    CODEPATCH_START                 ;\
> +                     SMAP_NOP                        ;\
> +                     CODEPATCH_END(CPTAG_STAC)
> +#define SMAP_CLAC    CODEPATCH_START                 ;\
> +                     SMAP_NOP                        ;\
> +                     CODEPATCH_END(CPTAG_CLAC)
> +
> +#else
> +
> +#define SMAP_STAC
> +#define SMAP_CLAC
> +
> +#endif
> +
> +
> +/*
> + * override user-land alignment before including asm.h
> + */
> +
> +#define      ALIGN_DATA      .align  4
> +#define      ALIGN_TEXT      .align  4,0x90  /* 4-byte boundaries, 
> NOP-filled */
> +#define      SUPERALIGN_TEXT .align  16,0x90 /* 16-byte boundaries better 
> for 486 */
> +#define _ALIGN_TEXT  ALIGN_TEXT
> +#include <machine/asm.h>
> +
> +#define CPL _C_LABEL(lapic_tpr)
> +
> +#define      GET_CURPCB(reg)                                 \
> +     movl    CPUVAR(CURPCB), reg
> +
> +#define      CHECK_ASTPENDING(treg)                          \
> +     movl    CPUVAR(CURPROC),treg            ;       \
> +     cmpl    $0, treg                        ;       \
> +     je      1f                              ;       \
> +     cmpl    $0,P_MD_ASTPENDING(treg)        ;       \
> +     1:
> +
> +#define      CLEAR_ASTPENDING(cpreg)                         \
> +     movl    $0,P_MD_ASTPENDING(cpreg)
> +
> +/*
> + * These are used on interrupt or trap entry or exit.
> + */
> +#define      INTRENTRY \
> +     cld                     ; \
> +     pushl   %eax            ; \
> +     pushl   %ecx            ; \
> +     pushl   %edx            ; \
> +     pushl   %ebx            ; \
> +     pushl   %ebp            ; \
> +     pushl   %esi            ; \
> +     pushl   %edi            ; \
> +     pushl   %ds             ; \
> +     pushl   %es             ; \
> +     pushl   %gs             ; \
> +     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax  ; \
> +     movw    %ax,%ds         ; \
> +     movw    %ax,%es         ; \
> +     xorl    %eax,%eax       ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \
> +     movw    %ax,%gs         ; \
> +     pushl   %fs             ; \
> +     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax   ; \
> +     movw    %ax,%fs
> +
> +#define      INTR_RESTORE_ALL \
> +     popl    %fs             ; \
> +     popl    %gs             ; \
> +     popl    %es             ; \
> +     popl    %ds             ; \
> +     popl    %edi            ; \
> +     popl    %esi            ; \
> +     popl    %ebp            ; \
> +     popl    %ebx            ; \
> +     popl    %edx            ; \
> +     popl    %ecx            ; \
> +     popl    %eax
> +
> +#define      INTRFASTEXIT \
> +     INTR_RESTORE_ALL        ;\
> +     addl    $8,%esp         ; \
> +     iret
> +
> +#define      INTR_FAKE_TRAP  0xbadabada
> +
> +/*
> + * PTmap is recursive pagemap at top of virtual address space.
> + * Within PTmap, the page directory can be found (third indirection).
> + */
> +     .globl  _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde)
> +     .set    _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT)
> +     .set    _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG)
> +     .set    _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4)      # XXX 4 
> == sizeof pde
> +
> +/*
> + * APTmap, APTD is the alternate recursive pagemap.
> + * It's used when modifying another process's page tables.
> + */
> +     .globl  _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde)
> +     .set    _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT)
> +     .set    _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG)
> +     # XXX 4 == sizeof pde
> +     .set    _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4)
> +
> +
> +     .data
> +
> +     .globl  _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor)
> +     .globl  _C_LABEL(cpu_brandstr)
> +     .globl  _C_LABEL(cpuid_level)
> +     .globl  _C_LABEL(cpu_miscinfo)
> +     .globl  _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature)
> +     .globl  _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature)
> +     .globl  _C_LABEL(ecpu_ecxfeature)
> +     .globl  _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx)
> +     .globl  _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx)
> +     .globl  _C_LABEL(cpu_perf_eax)
> +     .globl  _C_LABEL(cpu_perf_ebx)
> +     .globl  _C_LABEL(cpu_perf_edx)
> +     .globl  _C_LABEL(cpu_apmi_edx)
> +     .globl  _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem)
> +     .globl  _C_LABEL(cpu_pae)
> +     .globl  _C_LABEL(esym)
> +     .globl  _C_LABEL(ssym)
> +     .globl  _C_LABEL(nkptp_max)
> +     .globl  _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase)
> +     .globl  _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize)
> +     .globl  _C_LABEL(gdt)
> +     .globl  _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
> +     .globl  _C_LABEL(lapic_tpr)
> +
> +#if NLAPIC > 0
> +     .align NBPG
> +     .globl _C_LABEL(local_apic)
> +_C_LABEL(local_apic):
> +     .space  LAPIC_ID
> +     .globl  _C_LABEL(lapic_ppr)
> +_C_LABEL(lapic_id):
> +     .long   0x00000000
> +     .space  LAPIC_TPRI-(LAPIC_ID+4)
> +_C_LABEL(lapic_tpr):
> +     .space  LAPIC_PPRI-LAPIC_TPRI
> +_C_LABEL(lapic_ppr):
> +     .space  LAPIC_ISR-LAPIC_PPRI
> +_C_LABEL(lapic_isr):
> +     .space  NBPG-LAPIC_ISR
> +#else
> +_C_LABEL(lapic_tpr):
> +     .long   0
> +#endif
> +
> +_C_LABEL(cpu):               .long   0       # are we 386, 386sx, 486, 586 
> or 686
> +_C_LABEL(cpu_id):    .long   0       # saved from 'cpuid' instruction
> +_C_LABEL(cpu_pae):   .long   0       # are we using PAE paging mode?
> +_C_LABEL(cpu_miscinfo):      .long   0       # misc info (apic/brand id) 
> from 'cpuid'
> +_C_LABEL(cpu_feature):       .long   0       # feature flags from 'cpuid' 
> instruction
> +_C_LABEL(ecpu_feature): .long        0       # extended feature flags from 
> 'cpuid'
> +_C_LABEL(cpu_ecxfeature):.long       0       # ecx feature flags from 'cpuid'
> +_C_LABEL(ecpu_eaxfeature): .long 0   # extended eax feature flags
> +_C_LABEL(ecpu_ecxfeature): .long 0   # extended ecx feature flags
> +_C_LABEL(cpuid_level):       .long   -1      # max. lvl accepted by 'cpuid' 
> insn
> +_C_LABEL(cpu_cache_eax):.long        0
> +_C_LABEL(cpu_cache_ebx):.long        0
> +_C_LABEL(cpu_cache_ecx):.long        0
> +_C_LABEL(cpu_cache_edx):.long        0
> +_C_LABEL(cpu_perf_eax):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_perf_ebx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_perf_edx):      .long   0       # arch. perf. mon. flags from 
> 'cpuid'
> +_C_LABEL(cpu_apmi_edx):      .long   0       # adv. power management info. 
> 'cpuid'
> +_C_LABEL(cpu_vendor): .space 16      # vendor string returned by 'cpuid' 
> instruction
> +_C_LABEL(cpu_brandstr):      .space 48 # brand string returned by 'cpuid'
> +_C_LABEL(cold):              .long   1       # cold till we are not
> +_C_LABEL(ssym):              .long   0       # ptr to start of syms
> +_C_LABEL(esym):              .long   0       # ptr to end of syms
> +_C_LABEL(cnvmem):    .long   0       # conventional memory size
> +_C_LABEL(extmem):    .long   0       # extended memory size
> +_C_LABEL(atdevbase): .long   0       # location of start of iomem in virtual
> +_C_LABEL(bootapiver):        .long   0       # /boot API version
> +_C_LABEL(bootargc):  .long   0       # /boot argc
> +_C_LABEL(bootargv):  .long   0       # /boot argv
> +_C_LABEL(bootdev):   .long   0       # device we booted from
> +_C_LABEL(proc0paddr):        .long   0
> +_C_LABEL(PTDpaddr):  .long   0       # paddr of PTD, for libkvm
> +_C_LABEL(PTDsize):   .long   NBPG    # size of PTD, for libkvm
> +
> +     .text
> +
> +NENTRY(proc_trampoline)
> +#ifdef MULTIPROCESSOR
> +     call    _C_LABEL(proc_trampoline_mp)
> +#endif
> +     movl    $IPL_NONE,CPL
> +     pushl   %ebx
> +     call    *%esi
> +     addl    $4,%esp
> +     INTRFASTEXIT
> +     /* NOTREACHED */
> +
> +     /* This must come before any use of the CODEPATCH macros */
> +       .section .codepatch,"a"
> +       .align  8
> +       .globl _C_LABEL(codepatch_begin)
> +_C_LABEL(codepatch_begin):
> +       .previous
> +
> +       .section .codepatchend,"a"
> +       .globl _C_LABEL(codepatch_end)
> +_C_LABEL(codepatch_end):
> +       .previous
> +
> +/*****************************************************************************/
> +
> +/*
> + * Signal trampoline; copied to top of user stack.
> + */
> +     .section .rodata
> +     .globl  _C_LABEL(sigcode)
> +_C_LABEL(sigcode):
> +     call    *SIGF_HANDLER(%esp)
> +     leal    SIGF_SC(%esp),%eax      # scp (the call may have clobbered the
> +                                     # copy at SIGF_SCP(%esp))
> +     pushl   %eax
> +     pushl   %eax                    # junk to fake return address
> +     movl    $SYS_sigreturn,%eax
> +     int     $0x80                   # enter kernel with args on stack
> +     .globl  _C_LABEL(sigcoderet)
> +_C_LABEL(sigcoderet):
> +     movl    $SYS_exit,%eax
> +     int     $0x80                   # exit if sigreturn fails
> +     .globl  _C_LABEL(esigcode)
> +_C_LABEL(esigcode):
> +
> +     .globl  _C_LABEL(sigfill)
> +_C_LABEL(sigfill):
> +     int3
> +_C_LABEL(esigfill):
> +
> +     .data
> +     .globl  _C_LABEL(sigfillsiz)
> +_C_LABEL(sigfillsiz):
> +     .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
> +
> +     .text
> +
> +/*****************************************************************************/
> +
> +/*
> + * The following primitives are used to fill and copy regions of memory.
> + */
> +
> +/* Frame pointer reserve on stack. */
> +#ifdef DDB
> +#define FPADD 4
> +#else
> +#define FPADD 0
> +#endif
> +
> +/*
> + * kcopy(caddr_t from, caddr_t to, size_t len);
> + * Copy len bytes, abort on fault.
> + */
> +ENTRY(kcopy)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +     GET_CURPCB(%eax)                # load curpcb into eax and set on-fault
> +     pushl   PCB_ONFAULT(%eax)
> +     movl    $_C_LABEL(copy_fault), PCB_ONFAULT(%eax)
> +
> +     movl    16+FPADD(%esp),%esi
> +     movl    20+FPADD(%esp),%edi
> +     movl    24+FPADD(%esp),%ecx
> +     movl    %edi,%eax
> +     subl    %esi,%eax
> +     cmpl    %ecx,%eax               # overlapping?
> +     jb      1f
> +     shrl    $2,%ecx                 # nope, copy forward by 32-bit words
> +     rep
> +     movsl
> +     movl    24+FPADD(%esp),%ecx
> +     andl    $3,%ecx                 # any bytes left?
> +     rep
> +     movsb
> +
> +     GET_CURPCB(%edx)                # XXX save curpcb?
> +     popl    PCB_ONFAULT(%edx)
> +     popl    %edi
> +     popl    %esi
> +     xorl    %eax,%eax
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +     ALIGN_TEXT
> +1:   addl    %ecx,%edi               # copy backward
> +     addl    %ecx,%esi
> +     std
> +     andl    $3,%ecx                 # any fractional bytes?
> +     decl    %edi
> +     decl    %esi
> +     rep
> +     movsb
> +     movl    24+FPADD(%esp),%ecx     # copy remainder by 32-bit words
> +     shrl    $2,%ecx
> +     subl    $3,%esi
> +     subl    $3,%edi
> +     rep
> +     movsl
> +     cld
> +
> +     GET_CURPCB(%edx)
> +     popl    PCB_ONFAULT(%edx)
> +     popl    %edi
> +     popl    %esi
> +     xorl    %eax,%eax
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +     
> +/*****************************************************************************/
> +
> +/*
> + * The following primitives are used to copy data in and out of the user's
> + * address space.
> + */
> +
> +/*
> + * copyout(caddr_t from, caddr_t to, size_t len);
> + * Copy len bytes into the user's address space.
> + */
> +ENTRY(copyout)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +     pushl   $0      
> +     
> +     movl    16+FPADD(%esp),%esi
> +     movl    20+FPADD(%esp),%edi
> +     movl    24+FPADD(%esp),%eax
> +
> +     /*
> +      * We check that the end of the destination buffer is not past the end
> +      * of the user's address space.  If it's not, then we only need to
> +      * check that each page is writable.  The 486 will do this for us; the
> +      * 386 will not.  (We assume that pages in user space that are not
> +      * writable by the user are not writable by the kernel either.)
> +      */
> +     movl    %edi,%edx
> +     addl    %eax,%edx
> +     jc      _C_LABEL(copy_fault)
> +     cmpl    $VM_MAXUSER_ADDRESS,%edx
> +     ja      _C_LABEL(copy_fault)
> +
> +     GET_CURPCB(%edx)
> +     movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
> +     SMAP_STAC
> +
> +     /* bcopy(%esi, %edi, %eax); */
> +     movl    %eax,%ecx
> +     shrl    $2,%ecx
> +     rep
> +     movsl
> +     movl    %eax,%ecx
> +     andl    $3,%ecx
> +     rep
> +     movsb
> +
> +     SMAP_CLAC
> +     popl    PCB_ONFAULT(%edx)
> +     popl    %edi
> +     popl    %esi
> +     xorl    %eax,%eax
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +/*
> + * copyin(caddr_t from, caddr_t to, size_t len);
> + * Copy len bytes from the user's address space.
> + */
> +ENTRY(copyin)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +     GET_CURPCB(%eax)
> +     pushl   $0
> +     movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
> +     SMAP_STAC
> +     
> +     movl    16+FPADD(%esp),%esi
> +     movl    20+FPADD(%esp),%edi
> +     movl    24+FPADD(%esp),%eax
> +
> +     /*
> +      * We check that the end of the destination buffer is not past the end
> +      * of the user's address space.  If it's not, then we only need to
> +      * check that each page is readable, and the CPU will do that for us.
> +      */
> +     movl    %esi,%edx
> +     addl    %eax,%edx
> +     jc      _C_LABEL(copy_fault)
> +     cmpl    $VM_MAXUSER_ADDRESS,%edx
> +     ja      _C_LABEL(copy_fault)
> +
> +     /* bcopy(%esi, %edi, %eax); */
> +     movl    %eax,%ecx
> +     shrl    $2,%ecx
> +     rep
> +     movsl
> +     movb    %al,%cl
> +     andb    $3,%cl
> +     rep
> +     movsb
> +
> +     SMAP_CLAC
> +     GET_CURPCB(%edx)
> +     popl    PCB_ONFAULT(%edx)
> +     popl    %edi
> +     popl    %esi
> +     xorl    %eax,%eax
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +ENTRY(copy_fault)
> +     SMAP_CLAC
> +     GET_CURPCB(%edx)
> +     popl    PCB_ONFAULT(%edx)
> +     popl    %edi
> +     popl    %esi
> +     movl    $EFAULT,%eax
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +/*
> + * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> + * Copy a NUL-terminated string, at most maxlen characters long, into the
> + * user's address space.  Return the number of characters copied (including 
> the
> + * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
> + * return 0 or EFAULT.
> + */
> +ENTRY(copyoutstr)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +
> +     movl    12+FPADD(%esp),%esi             # esi = from
> +     movl    16+FPADD(%esp),%edi             # edi = to
> +     movl    20+FPADD(%esp),%edx             # edx = maxlen
> +
> +5:   GET_CURPCB(%eax)
> +     movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
> +     SMAP_STAC
> +     /*
> +      * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
> +      */
> +     movl    $VM_MAXUSER_ADDRESS,%eax
> +     subl    %edi,%eax
> +     jbe     _C_LABEL(copystr_fault)         # die if CF == 1 || ZF == 1
> +                                             # i.e. make sure that %edi
> +                                             # is below VM_MAXUSER_ADDRESS
> +
> +     cmpl    %edx,%eax
> +     jae     1f
> +     movl    %eax,%edx
> +     movl    %eax,20+FPADD(%esp)
> +
> +1:   incl    %edx
> +
> +1:   decl    %edx
> +     jz      2f
> +     lodsb
> +     stosb
> +     testb   %al,%al
> +     jnz     1b
> +
> +     /* Success -- 0 byte reached. */
> +     decl    %edx
> +     xorl    %eax,%eax
> +     jmp     copystr_return
> +
> +2:   /* edx is zero -- return EFAULT or ENAMETOOLONG. */
> +     cmpl    $VM_MAXUSER_ADDRESS,%edi
> +     jae     _C_LABEL(copystr_fault)
> +     movl    $ENAMETOOLONG,%eax
> +     jmp     copystr_return
> +
> +/*
> + * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> + * Copy a NUL-terminated string, at most maxlen characters long, from the
> + * user's address space.  Return the number of characters copied (including 
> the
> + * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
> + * return 0 or EFAULT.
> + */
> +ENTRY(copyinstr)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +     GET_CURPCB(%ecx)
> +     movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
> +     SMAP_STAC
> +
> +     movl    12+FPADD(%esp),%esi             # %esi = from
> +     movl    16+FPADD(%esp),%edi             # %edi = to
> +     movl    20+FPADD(%esp),%edx             # %edx = maxlen
> +
> +     /*
> +      * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
> +      */
> +     movl    $VM_MAXUSER_ADDRESS,%eax
> +     subl    %esi,%eax
> +     jbe     _C_LABEL(copystr_fault)         # Error if CF == 1 || ZF == 1
> +                                             # i.e. make sure that %esi
> +                                             # is below VM_MAXUSER_ADDRESS
> +     cmpl    %edx,%eax
> +     jae     1f
> +     movl    %eax,%edx
> +     movl    %eax,20+FPADD(%esp)
> +
> +1:   incl    %edx
> +
> +1:   decl    %edx
> +     jz      2f
> +     lodsb
> +     stosb
> +     testb   %al,%al
> +     jnz     1b
> +
> +     /* Success -- 0 byte reached. */
> +     decl    %edx
> +     xorl    %eax,%eax
> +     jmp     copystr_return
> +
> +2:   /* edx is zero -- return EFAULT or ENAMETOOLONG. */
> +     cmpl    $VM_MAXUSER_ADDRESS,%esi
> +     jae     _C_LABEL(copystr_fault)
> +     movl    $ENAMETOOLONG,%eax
> +     jmp     copystr_return
> +
> +ENTRY(copystr_fault)
> +     movl    $EFAULT,%eax
> +
> +copystr_return:
> +     SMAP_CLAC
> +     /* Set *lencopied and return %eax. */
> +     GET_CURPCB(%ecx)
> +     movl    $0,PCB_ONFAULT(%ecx)
> +     movl    20+FPADD(%esp),%ecx
> +     subl    %edx,%ecx
> +     movl    24+FPADD(%esp),%edx
> +     testl   %edx,%edx
> +     jz      8f
> +     movl    %ecx,(%edx)
> +
> +8:   popl    %edi
> +     popl    %esi
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +/*
> + * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
> + * Copy a NUL-terminated string, at most maxlen characters long.  Return the
> + * number of characters copied (including the NUL) in *lencopied.  If the
> + * string is too long, return ENAMETOOLONG; else return 0.
> + */
> +ENTRY(copystr)
> +#ifdef DDB
> +     pushl   %ebp
> +     movl    %esp,%ebp
> +#endif
> +     pushl   %esi
> +     pushl   %edi
> +
> +     movl    12+FPADD(%esp),%esi             # esi = from
> +     movl    16+FPADD(%esp),%edi             # edi = to
> +     movl    20+FPADD(%esp),%edx             # edx = maxlen
> +     incl    %edx
> +
> +1:   decl    %edx
> +     jz      4f
> +     lodsb
> +     stosb
> +     testb   %al,%al
> +     jnz     1b
> +
> +     /* Success -- 0 byte reached. */
> +     decl    %edx
> +     xorl    %eax,%eax
> +     jmp     6f
> +
> +4:   /* edx is zero -- return ENAMETOOLONG. */
> +     movl    $ENAMETOOLONG,%eax
> +
> +6:   /* Set *lencopied and return %eax. */
> +     movl    20+FPADD(%esp),%ecx
> +     subl    %edx,%ecx
> +     movl    24+FPADD(%esp),%edx
> +     testl   %edx,%edx
> +     jz      7f
> +     movl    %ecx,(%edx)
> +
> +7:   popl    %edi
> +     popl    %esi
> +#ifdef DDB
> +     leave
> +#endif
> +     ret
> +
> +/*****************************************************************************/
> +
> +/*
> + * The following is i386-specific nonsense.
> + */
> +
> +/*
> + * void lgdt(struct region_descriptor *rdp);
> + * Change the global descriptor table.
> + */
> +NENTRY(lgdt)
> +     /* Reload the descriptor table. */
> +     movl    4(%esp),%eax
> +     lgdt    (%eax)
> +     /* Flush the prefetch q. */
> +     jmp     1f
> +     nop
> +1:   /* Reload "stale" selectors. */
> +     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> +     movw    %ax,%ds
> +     movw    %ax,%es
> +     movw    %ax,%ss
> +     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
> +     movw    %ax,%fs
> +     /* Reload code selector by doing intersegment return. */
> +     popl    %eax
> +     pushl   $GSEL(GCODE_SEL, SEL_KPL)
> +     pushl   %eax
> +     lret
> +
> +ENTRY(setjmp)
> +     movl    4(%esp),%eax
> +     movl    %ebx,(%eax)             # save ebx
> +     movl    %esp,4(%eax)            # save esp
> +     movl    %ebp,8(%eax)            # save ebp
> +     movl    %esi,12(%eax)           # save esi
> +     movl    %edi,16(%eax)           # save edi
> +     movl    (%esp),%edx             # get rta
> +     movl    %edx,20(%eax)           # save eip
> +     xorl    %eax,%eax               # return (0);
> +     ret
> +
> +ENTRY(longjmp)
> +     movl    4(%esp),%eax
> +     movl    (%eax),%ebx             # restore ebx
> +     movl    4(%eax),%esp            # restore esp
> +     movl    8(%eax),%ebp            # restore ebp
> +     movl    12(%eax),%esi           # restore esi
> +     movl    16(%eax),%edi           # restore edi
> +     movl    20(%eax),%edx           # get rta
> +     movl    %edx,(%esp)             # put in return frame
> +     xorl    %eax,%eax               # return (1);
> +     incl    %eax
> +     ret
> +
> +/*****************************************************************************/
> +             
> +/*
> + * cpu_switchto(struct proc *old, struct proc *new)
> + * Switch from the "old" proc to the "new" proc. If "old" is NULL, we
> + * don't need to bother saving old context.
> + */
> +ENTRY(cpu_switchto)
> +     pushl   %ebx
> +     pushl   %esi
> +     pushl   %edi
> +
> +     movl    16(%esp), %esi
> +     movl    20(%esp), %edi
> +
> +     /* If old process exited, don't bother. */
> +     testl   %esi,%esi
> +     jz      switch_exited
> +
> +     /* Save old stack pointers. */
> +     movl    P_ADDR(%esi),%ebx
> +     movl    %esp,PCB_ESP(%ebx)
> +     movl    %ebp,PCB_EBP(%ebx)
> +
> +switch_exited:
> +     /* Restore saved context. */
> +
> +     /* No interrupts while loading new state. */
> +     cli
> +
> +     /* Record new process. */
> +     movl    %edi, CPUVAR(CURPROC)
> +     movb    $SONPROC, P_STAT(%edi)
> +
> +     /* Restore stack pointers. */
> +     movl    P_ADDR(%edi),%ebx
> +     movl    PCB_ESP(%ebx),%esp
> +     movl    PCB_EBP(%ebx),%ebp
> +
> +     /* Record new pcb. */
> +     movl    %ebx, CPUVAR(CURPCB)
> +
> +     /*
> +      * Activate the address space.  The pcb copy of %cr3 will
> +      * be refreshed from the pmap, and because we're
> +      * curproc they'll both be reloaded into the CPU.
> +      */
> +     pushl   %edi
> +     pushl   %esi
> +     call    _C_LABEL(pmap_switch)
> +     addl    $8,%esp
> +
> +     /* Load TSS info. */
> +     movl    CPUVAR(GDT),%eax
> +     movl    P_MD_TSS_SEL(%edi),%edx
> +
> +     /* Switch TSS. */
> +     andl    $~0x0200,4-SEL_KPL(%eax,%edx,1)
> +     ltr     %dx
> +
> +     /* Restore cr0 (including FPU state). */
> +     movl    PCB_CR0(%ebx),%ecx
> +#ifdef MULTIPROCESSOR
> +     /*
> +      * If our floating point registers are on a different CPU,
> +      * clear CR0_TS so we'll trap rather than reuse bogus state.
> +      */
> +     movl    CPUVAR(SELF), %esi
> +     cmpl    PCB_FPCPU(%ebx), %esi
> +     jz      1f
> +     orl     $CR0_TS,%ecx
> +1:   
> +#endif       
> +     movl    %ecx,%cr0
> +
> +     /* Interrupts are okay again. */
> +     sti
> +
> +     popl    %edi
> +     popl    %esi
> +     popl    %ebx
> +     ret
> +
> +ENTRY(cpu_idle_enter)
> +     movl    _C_LABEL(cpu_idle_enter_fcn),%eax
> +     cmpl    $0,%eax
> +     je      1f
> +     jmpl    *%eax
> +1:
> +     ret
> +
> +ENTRY(cpu_idle_cycle)
> +     movl    _C_LABEL(cpu_idle_cycle_fcn),%eax
> +     cmpl    $0,%eax
> +     je      1f
> +     call    *%eax
> +     ret
> +1:
> +     sti
> +     hlt
> +     ret
> +
> +ENTRY(cpu_idle_leave)
> +     movl    _C_LABEL(cpu_idle_leave_fcn),%eax
> +     cmpl    $0,%eax
> +     je      1f
> +     jmpl    *%eax
> +1:
> +     ret
> +
> +/*
> + * savectx(struct pcb *pcb);
> + * Update pcb, saving current processor state.
> + */
> +ENTRY(savectx)
> +     movl    4(%esp),%edx            # edx = p->p_addr
> +
> +     /* Save stack pointers. */
> +     movl    %esp,PCB_ESP(%edx)
> +     movl    %ebp,PCB_EBP(%edx)
> +
> +     movl    PCB_FLAGS(%edx),%ecx
> +     orl     $PCB_SAVECTX,%ecx
> +     movl    %ecx,PCB_FLAGS(%edx)
> +
> +     ret
> +
> +/*****************************************************************************/
> +
> +/*
> + * Trap and fault vector routines
> + *
> + * On exit from the kernel to user mode, we always need to check for ASTs.  
> In
> + * addition, we need to do this atomically; otherwise an interrupt may occur
> + * which causes an AST, but it won't get processed until the next kernel 
> entry
> + * (possibly the next clock tick).  Thus, we disable interrupt before 
> checking,
> + * and only enable them again on the final `iret' or before calling the AST
> + * handler.
> + */
> +#define      IDTVEC(name)    ALIGN_TEXT; .globl X##name; X##name:
> +
> +#define      TRAP(a)         pushl $(a) ; jmp _C_LABEL(alltraps)
> +#define      ZTRAP(a)        pushl $0 ; TRAP(a)
> +
> +
> +     .text
> +IDTVEC(div)
> +     ZTRAP(T_DIVIDE)
> +IDTVEC(dbg)
> +     subl    $4,%esp
> +     pushl   %eax
> +     movl    %dr6,%eax
> +     movl    %eax,4(%esp)
> +     andb    $~0xf,%al
> +     movl    %eax,%dr6
> +     popl    %eax
> +     TRAP(T_TRCTRAP)
> +IDTVEC(nmi)
> +     ZTRAP(T_NMI)
> +IDTVEC(bpt)
> +     ZTRAP(T_BPTFLT)
> +IDTVEC(ofl)
> +     ZTRAP(T_OFLOW)
> +IDTVEC(bnd)
> +     ZTRAP(T_BOUND)
> +IDTVEC(ill)
> +     ZTRAP(T_PRIVINFLT)
> +IDTVEC(dna)
> +#if NNPX > 0
> +     pushl   $0                      # dummy error code
> +     pushl   $T_DNA
> +     INTRENTRY
> +#ifdef MULTIPROCESSOR
> +     pushl   CPUVAR(SELF)
> +#else
> +     pushl   $_C_LABEL(cpu_info_primary)
> +#endif
> +     call    *_C_LABEL(npxdna_func)
> +     addl    $4,%esp
> +     testl   %eax,%eax
> +     jz      calltrap
> +     INTRFASTEXIT
> +#else
> +     ZTRAP(T_DNA)
> +#endif
> +IDTVEC(dble)
> +     TRAP(T_DOUBLEFLT)
> +IDTVEC(fpusegm)
> +     ZTRAP(T_FPOPFLT)
> +IDTVEC(tss)
> +     TRAP(T_TSSFLT)
> +IDTVEC(missing)
> +     TRAP(T_SEGNPFLT)
> +IDTVEC(stk)
> +     TRAP(T_STKFLT)
> +IDTVEC(prot)
> +     TRAP(T_PROTFLT)
> +IDTVEC(f00f_redirect)
> +     pushl   $T_PAGEFLT
> +     INTRENTRY
> +     testb   $PGEX_U,TF_ERR(%esp)
> +     jnz     calltrap
> +     movl    %cr2,%eax
> +     subl    _C_LABEL(idt),%eax
> +     cmpl    $(6*8),%eax
> +     jne     calltrap
> +     movb    $T_PRIVINFLT,TF_TRAPNO(%esp)
> +     jmp     calltrap
> +IDTVEC(page)
> +     TRAP(T_PAGEFLT)
> +IDTVEC(rsvd)
> +     ZTRAP(T_RESERVED)
> +IDTVEC(mchk)
> +     ZTRAP(T_MACHK)
> +IDTVEC(simd)
> +     ZTRAP(T_XFTRAP)
> +IDTVEC(intrspurious)
> +     /*
> +      * The Pentium Pro local APIC may erroneously call this vector for a
> +      * default IR7.  Just ignore it.
> +      *
> +      * (The local APIC does this when CPL is raised while it's on the
> +      * way to delivering an interrupt.. presumably enough has been set
> +      * up that it's inconvenient to abort delivery completely..)
> +      */
> +     iret
> +IDTVEC(fpu)
> +#if NNPX > 0
> +     /*
> +      * Handle like an interrupt so that we can call npxintr to clear the
> +      * error.  It would be better to handle npx interrupts as traps but
> +      * this is difficult for nested interrupts.
> +      */
> +     subl    $8,%esp                 /* space for tf_{err,trapno} */
> +     INTRENTRY
> +     pushl   CPL                     # if_ppl in intrframe
> +     pushl   %esp                    # push address of intrframe
> +     incl    _C_LABEL(uvmexp)+V_TRAP
> +     call    _C_LABEL(npxintr)
> +     addl    $8,%esp                 # pop address and if_ppl
> +     INTRFASTEXIT
> +#else
> +     ZTRAP(T_ARITHTRAP)
> +#endif
> +IDTVEC(align)
> +     ZTRAP(T_ALIGNFLT)
> +     /* 18 - 31 reserved for future exp */
> +
> +/*
> + * If an error is detected during trap, syscall, or interrupt exit, trap() 
> will
> + * change %eip to point to one of these labels.  We clean up the stack, if
> + * necessary, and resume as if we were handling a general protection fault.
> + * This will cause the process to get a SIGBUS.
> + */
> +NENTRY(resume_iret)
> +     ZTRAP(T_PROTFLT)
> +NENTRY(resume_pop_ds)
> +     pushl   %es
> +     movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
> +     movw    %ax,%es
> +NENTRY(resume_pop_es)
> +     pushl   %gs
> +     xorl    %eax,%eax       /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */
> +     movw    %ax,%gs
> +NENTRY(resume_pop_gs)
> +     pushl   %fs
> +     movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
> +     movw    %ax,%fs
> +NENTRY(resume_pop_fs)
> +     movl    $T_PROTFLT,TF_TRAPNO(%esp)
> +     sti
> +     jmp     calltrap
> +
> +/*
> + * All traps go through here. Call the generic trap handler, and
> + * check for ASTs afterwards.
> + */
> +NENTRY(alltraps)
> +     INTRENTRY
> +     sti
> +calltrap:
> +#ifdef DIAGNOSTIC
> +     movl    CPL,%ebx
> +#endif /* DIAGNOSTIC */
> +#if !defined(GPROF) && defined(DDBPROF)
> +     cmpl    $T_BPTFLT,TF_TRAPNO(%esp)
> +     jne     .Lreal_trap
> +
> +     pushl   %esp
> +     call    _C_LABEL(db_prof_hook)
> +     addl    $4,%esp
> +     cmpl    $1,%eax
> +     jne     .Lreal_trap
> +
> +     /*
> +      * Abuse the error field to indicate that INTRFASTEXIT needs
> +      * to emulate the patched instruction.
> +      */
> +     movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
> +     jz      2f
> +.Lreal_trap:
> +#endif /* !defined(GPROF) && defined(DDBPROF) */
> +     pushl   %esp
> +     call    _C_LABEL(trap)
> +     addl    $4,%esp
> +2:   /* Check for ASTs on exit to user mode. */
> +     cli
> +     CHECK_ASTPENDING(%ecx)
> +     je      1f
> +     testb   $SEL_RPL,TF_CS(%esp)
> +#ifdef VM86
> +     jnz     5f
> +     testl   $PSL_VM,TF_EFLAGS(%esp)
> +#endif
> +     jz      1f
> +5:   CLEAR_ASTPENDING(%ecx)
> +     sti
> +     pushl   %esp
> +     call    _C_LABEL(ast)
> +     addl    $4,%esp
> +     jmp     2b
> +1:
> +#if !defined(GPROF) && defined(DDBPROF)
> +     /*
> +      * If we are returning from a probe trap we need to fix the
> +      * stack layout and emulate the patched instruction.
> +      *
> +      * The code below does that by trashing %eax, so it MUST be
> +      * restored afterward.
> +      */
> +     cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
> +     je      .Lprobe_fixup
> +#endif /* !defined(GPROF) && defined(DDBPROF) */
> +#ifndef DIAGNOSTIC
> +     INTRFASTEXIT
> +#else
> +     cmpl    CPL,%ebx
> +     jne     3f
> +     INTRFASTEXIT
> +3:   sti
> +     pushl   $4f
> +     call    _C_LABEL(printf)
> +     addl    $4,%esp
> +#if defined(DDB) && 0
> +     int     $3
> +#endif /* DDB */
> +     movl    %ebx,CPL
> +     jmp     2b
> +4:   .asciz  "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
> +#endif /* DIAGNOSTIC */
> +
> +#if !defined(GPROF) && defined(DDBPROF)
> +.Lprobe_fixup:
> +     /* Restore all register unwinding the stack. */
> +     INTR_RESTORE_ALL
> +
> +     /*
> +      * Use the space left by ``err'' and ``trapno'' to emulate
> +      * "pushl %ebp".
> +      *
> +      * Temporarily save %eax.
> +      */
> +     movl    %eax,0(%esp)
> +
> +     /* Shift hardware-saved registers: eip, cs, eflags */
> +     movl    8(%esp),%eax
> +     movl    %eax,4(%esp)
> +     movl    12(%esp),%eax
> +     movl    %eax,8(%esp)
> +     movl    16(%esp),%eax
> +     movl    %eax,12(%esp)
> +
> +     /* Store %ebp in the expected location to finish the emulation. */
> +     movl    %ebp,16(%esp)
> +
> +     popl    %eax
> +     iret
> +#endif /* !defined(GPROF) && defined(DDBPROF) */
> +/*
> + * Trap gate entry for syscall
> + */
> +IDTVEC(syscall)
> +     subl    $8,%esp                 /* space for tf_{err,trapno} */
> +     INTRENTRY
> +     pushl   %esp
> +     call    _C_LABEL(syscall)
> +     addl    $4,%esp
> +2:   /* Check for ASTs on exit to user mode. */
> +     cli
> +     CHECK_ASTPENDING(%ecx)
> +     je      1f
> +     /* Always returning to user mode here. */
> +     CLEAR_ASTPENDING(%ecx)
> +     sti
> +     pushl   %esp
> +     call    _C_LABEL(ast)
> +     addl    $4,%esp
> +     jmp     2b
> +1:   INTRFASTEXIT
> +
> +#include <i386/i386/vector.s>
> +#include <i386/isa/icu.s>
> +
> +/*
> + * bzero (void *b, size_t len)
> + *   write len zero bytes to the string b.
> + */
> +
> +ENTRY(bzero)
> +     pushl   %edi
> +     movl    8(%esp),%edi
> +     movl    12(%esp),%edx
> +
> +     xorl    %eax,%eax               /* set fill data to 0 */
> +
> +     /*
> +      * if the string is too short, it's really not worth the overhead
> +      * of aligning to word boundaries, etc.  So we jump to a plain
> +      * unaligned set.
> +      */
> +     cmpl    $16,%edx
> +     jb      7f
> +
> +     movl    %edi,%ecx               /* compute misalignment */
> +     negl    %ecx
> +     andl    $3,%ecx
> +     subl    %ecx,%edx
> +     rep                             /* zero until word aligned */
> +     stosb
> +
> +     cmpl    $CPUCLASS_486,_C_LABEL(cpu_class)
> +     jne     8f
> +
> +     movl    %edx,%ecx
> +     shrl    $6,%ecx
> +     jz      8f
> +     andl    $63,%edx
> +1:   movl    %eax,(%edi)
> +     movl    %eax,4(%edi)
> +     movl    %eax,8(%edi)
> +     movl    %eax,12(%edi)
> +     movl    %eax,16(%edi)
> +     movl    %eax,20(%edi)
> +     movl    %eax,24(%edi)
> +     movl    %eax,28(%edi)
> +     movl    %eax,32(%edi)
> +     movl    %eax,36(%edi)
> +     movl    %eax,40(%edi)
> +     movl    %eax,44(%edi)
> +     movl    %eax,48(%edi)
> +     movl    %eax,52(%edi)
> +     movl    %eax,56(%edi)
> +     movl    %eax,60(%edi)
> +     addl    $64,%edi
> +     decl    %ecx
> +     jnz     1b
> +
> +8:   movl    %edx,%ecx               /* zero by words */
> +     shrl    $2,%ecx
> +     andl    $3,%edx
> +     rep
> +     stosl
> +
> +7:   movl    %edx,%ecx               /* zero remainder bytes */
> +     rep
> +     stosb
> +
> +     popl    %edi
> +     ret
> +
> +#if !defined(SMALL_KERNEL)
> +ENTRY(sse2_pagezero)
> +     pushl   %ebx
> +     movl    8(%esp),%ecx
> +     movl    %ecx,%eax
> +     addl    $4096,%eax
> +     xor     %ebx,%ebx
> +1:
> +     movnti  %ebx,(%ecx)
> +     addl    $4,%ecx
> +     cmpl    %ecx,%eax
> +     jne     1b
> +     sfence
> +     popl    %ebx
> +     ret
> +
> +ENTRY(i686_pagezero)
> +     pushl   %edi
> +     pushl   %ebx
> +
> +     movl    12(%esp), %edi
> +     movl    $1024, %ecx
> +
> +     ALIGN_TEXT
> +1:
> +     xorl    %eax, %eax
> +     repe
> +     scasl
> +     jnz     2f
> +
> +     popl    %ebx
> +     popl    %edi
> +     ret
> +
> +     ALIGN_TEXT
> +
> +2:
> +     incl    %ecx
> +     subl    $4, %edi
> +
> +     movl    %ecx, %edx
> +     cmpl    $16, %ecx
> +
> +     jge     3f
> +
> +     movl    %edi, %ebx
> +     andl    $0x3f, %ebx
> +     shrl    %ebx
> +     shrl    %ebx
> +     movl    $16, %ecx
> +     subl    %ebx, %ecx
> +
> +3:
> +     subl    %ecx, %edx
> +     rep
> +     stosl
> +
> +     movl    %edx, %ecx
> +     testl   %edx, %edx
> +     jnz     1b
> +
> +     popl    %ebx
> +     popl    %edi
> +     ret
> +#endif
> +
> +/*
> + * int cpu_paenable(void *);
> + */
> +ENTRY(cpu_paenable)
> +     movl    $-1, %eax
> +     testl   $CPUID_PAE, _C_LABEL(cpu_feature)
> +     jz      1f
> +
> +     pushl   %esi
> +     pushl   %edi
> +     movl    12(%esp), %esi
> +     movl    %cr3, %edi
> +     orl     $0xfe0, %edi    /* PDPT will be in the last four slots! */
> +     movl    %edi, %cr3
> +     addl    $KERNBASE, %edi /* and make it back virtual again */
> +     movl    $8, %ecx
> +     rep
> +     movsl
> +
> +     movl    $MSR_EFER, %ecx
> +     rdmsr
> +     orl     $EFER_NXE, %eax
> +     wrmsr
> +
> +     movl    %cr4, %eax
> +     orl     $CR4_PAE, %eax
> +     movl    %eax, %cr4      /* BANG!!! */
> +
> +     movl    12(%esp), %eax
> +     subl    $KERNBASE, %eax
> +     movl    %eax, %cr3      /* reload real PDPT */
> +     movl    $4*NBPG, %eax
> +     movl    %eax, _C_LABEL(PTDsize)
> +
> +     xorl    %eax, %eax
> +     popl    %edi
> +     popl    %esi
> +1:
> +     ret
> +
> +#if NLAPIC > 0
> +#include <i386/i386/apicvec.s>
> +#endif
> +
> +#include <i386/i386/mutex.S>
> +
> +.globl _C_LABEL(_stac)
> +_C_LABEL(_stac):
> +     stac
> +
> +.globl _C_LABEL(_clac)
> +_C_LABEL(_clac):
> +     clac
> Index: conf/makegap.sh
> ===================================================================
> RCS file: conf/makegap.sh
> diff -N conf/makegap.sh
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ conf/makegap.sh   30 May 2017 07:33:02 -0000
> @@ -0,0 +1,28 @@
> +#!/bin/sh -
> +
> +cat << __EOF__
> +#include <machine/asm.h>
> +#include <machine/param.h>
> +
> +     .text
> +     .space  $RANDOM*3, 0xcc
> +     .align  PAGE_SIZE, 0xcc
> +
> +     .globl  endboot
> +_C_LABEL(endboot):
> +     .space  PAGE_SIZE + $RANDOM % PAGE_SIZE,  0xcc
> +     .align  16, 0xcc
> +
> +     /*
> +      * Randomly bias future data, bss, and rodata objects,
> +      * does not help for objects in locore.S though
> +       */
> +     .data
> +     .space  $RANDOM % PAGE_SIZE
> +
> +     .bss
> +     .space  $RANDOM % PAGE_SIZE
> +
> +     .section rodata
> +     .space  $RANDOM % PAGE_SIZE
> +__EOF__
> 

Reply via email to