Linus,

Please pull the latest x86-asm-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-asm-for-linus

   # HEAD: 53938ee427bf27525a63721b7e25d86b8f31f161 
scripts/decode_stacktrace.sh: Fix address line detection on x86

The main changes in this development cycle were:

 - A large number of call stack dumping/printing improvements: higher 
robustness, 
   better cross-context dumping, improved output, etc. (Josh Poimboeuf)

 - vDSO getcpu() performance improvement for future Intel CPUs with the RDPID 
   instruction (Andy Lutomirski)

 - Add two new Intel AVX512 features and the CPUID support infrastructure for 
it: 
   AVX512IFMA and AVX512VBMI. (Gayatri Kammela, He Chen)

 - More copy-user unification (Borislav Petkov)

 - Entry code assembly macro simplifications (Alexander Kuleshov)

 - vDSO C/R support improvements (Dmitry Safonov)

 - Misc fixes and cleanups (Borislav Petkov, Paul Bolle)

  out-of-topic modifications in x86-asm-for-linus:
  --------------------------------------------------
  kernel/sysctl.c                    # 0ee1dd9f5e7e: x86/dumpstack: Remove raw 
st
  mm/page_alloc.c                    # adb1fe9ae2ee: mm/page_alloc: Remove 
kernel
  scripts/decode_stacktrace.sh       # 53938ee427bf: 
scripts/decode_stacktrace.sh
  scripts/faddr2line                 # efdb4167e676: scripts/faddr2line: Fix 
"siz
  tools/testing/selftests/x86/Makefile# 3200ca806942: selftests/x86: Add 
test_vdso
  tools/testing/selftests/x86/test_vdso.c# 3200ca806942: selftests/x86: Add 
test_vdso

 Thanks,

        Ingo

------------------>
Alexander Kuleshov (3):
      x86/entry/64: Remove unused 'addskip' parameter of the 
ALLOC_PT_GPREGS_ON_STACK macro
      entry/64: Remove unused ZERO_EXTRA_REGS macro
      x86/entry64: Remove unused audit related macros

Andy Lutomirski (2):
      x86/vdso: Use RDPID in preference to LSL when available
      selftests/x86: Add test_vdso to test getcpu()

Borislav Petkov (2):
      x86/copy_user: Unify the code by removing the 64-bit asm _copy_*_user() 
variants
      x86/boot/64: Use defines for page size

Dmitry Safonov (2):
      x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE
      x86/vdso: Set vDSO pointer only after success

Gayatri Kammela (1):
      x86/cpufeatures: Enable new AVX512 cpu features

He Chen (2):
      x86/cpuid: Cleanup cpuid_regs definitions
      x86/cpuid: Provide get_scattered_cpuid_leaf()

Josh Poimboeuf (26):
      x86/entry/32, x86/boot/32: Use local labels
      x86/entry/32: Rename 'error_code' to 'common_exception'
      x86/entry/32: Fix the end of the stack for newly forked tasks
      x86/boot/32: Fix the end of the stack for idle tasks
      x86/boot/smp/32: Fix initial idle stack location on 32-bit kernels
      x86/boot/64: Use a common function for starting CPUs
      x86/boot/64: Put a real return address on the idle task stack
      x86/boot: Fix the end of the stack for idle tasks
      x86/boot: Move the _stext marker to before the boot code
      x86/entry/unwind: Create stack frames for saved interrupt registers
      x86/unwind: Create stack frames for saved syscall registers
      x86/dumpstack: Print stack identifier on its own line
      x86/dumpstack: Print any pt_regs found on the stack
      x86/dumpstack: Fix duplicate RIP address display in __show_regs()
      x86/dumpstack: Print orig_ax in __show_regs()
      scripts/faddr2line: Fix "size mismatch" error
      x86/dumpstack: Remove kernel text addresses from stack dump
      x86/dumpstack: Remove raw stack dump
      mm/page_alloc: Remove kernel address exposure in free_reserved_area()
      x86/unwind: Warn on bad frame pointer
      x86/dumpstack: Warn on stack recursion
      x86/unwind: Detect bad stack return address
      x86/unwind: Ensure stack grows down
      x86/dumpstack: Handle NULL stack pointer in show_trace_log_lvl()
      x86/dumpstack: Make stack name tags more comprehensible
      scripts/decode_stacktrace.sh: Fix address line detection on x86

Paul Bolle (2):
      x86/decoder: Use stdout if insn decoder test is successful
      x86/decoder: Use stderr if insn sanity test fails


 Documentation/kernel-parameters.txt       |   3 -
 Documentation/sysctl/kernel.txt           |   8 --
 Documentation/x86/x86_64/boot-options.txt |   4 -
 arch/x86/entry/calling.h                  |  33 ++++--
 arch/x86/entry/entry_32.S                 | 141 +++++++++++++++++---------
 arch/x86/entry/entry_64.S                 |  16 ++-
 arch/x86/entry/vdso/vma.c                 |  10 +-
 arch/x86/events/intel/pt.c                |  45 ++++-----
 arch/x86/include/asm/cpufeatures.h        |   3 +
 arch/x86/include/asm/kdebug.h             |   1 -
 arch/x86/include/asm/processor.h          |  14 +++
 arch/x86/include/asm/stacktrace.h         |   8 +-
 arch/x86/include/asm/unwind.h             |  16 ++-
 arch/x86/include/asm/vgtod.h              |   7 +-
 arch/x86/include/uapi/asm/prctl.h         |   8 +-
 arch/x86/kernel/cpu/scattered.c           |  57 +++++++----
 arch/x86/kernel/cpuid.c                   |   4 -
 arch/x86/kernel/dumpstack.c               |  68 ++++++-------
 arch/x86/kernel/dumpstack_32.c            |  56 +++--------
 arch/x86/kernel/dumpstack_64.c            |  79 +++------------
 arch/x86/kernel/fpu/xstate.c              |   2 +
 arch/x86/kernel/head_32.S                 |  49 +++++----
 arch/x86/kernel/head_64.S                 |  52 +++++-----
 arch/x86/kernel/process_32.c              |   7 +-
 arch/x86/kernel/process_64.c              |  13 ++-
 arch/x86/kernel/smpboot.c                 |   4 +-
 arch/x86/kernel/unwind_frame.c            | 161 ++++++++++++++++++++++++++++--
 arch/x86/kernel/vmlinux.lds.S             |   2 +-
 arch/x86/lib/copy_user_64.S               |  47 ---------
 arch/x86/lib/usercopy.c                   |  49 +++++++++
 arch/x86/lib/usercopy_32.c                |  49 ---------
 arch/x86/mm/fault.c                       |   3 +-
 arch/x86/platform/uv/uv_nmi.c             |   4 +-
 arch/x86/tools/insn_sanity.c              |   3 +-
 arch/x86/tools/test_get_len.c             |   2 +-
 kernel/sysctl.c                           |   7 --
 mm/page_alloc.c                           |   4 +-
 scripts/decode_stacktrace.sh              |   3 +-
 scripts/faddr2line                        |  33 +++---
 tools/testing/selftests/x86/Makefile      |   2 +-
 tools/testing/selftests/x86/test_vdso.c   | 123 +++++++++++++++++++++++
 41 files changed, 703 insertions(+), 497 deletions(-)
 create mode 100644 tools/testing/selftests/x86/test_vdso.c

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 37babf91f2cb..049a9172ed22 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1958,9 +1958,6 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
                        kmemcheck=2 (one-shot mode)
                        Default: 2 (one-shot mode)
 
-       kstack=N        [X86] Print N words from the kernel stack
-                       in oops dumps.
-
        kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
                        Default is 0 (don't ignore, but inject #GP)
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index ffab8b5caa60..065f18478c1c 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -40,7 +40,6 @@ Currently, these files might (depending on your configuration)
 - hung_task_warnings
 - kexec_load_disabled
 - kptr_restrict
-- kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
@@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed 
using
 
 ==============================================================
 
-kstack_depth_to_print: (X86 only)
-
-Controls the number of words to print when dumping the raw
-kernel stack.
-
-==============================================================
-
 l2cr: (PPC only)
 
 This flag controls the L2 cache of G3 processor boards. If
diff --git a/Documentation/x86/x86_64/boot-options.txt 
b/Documentation/x86/x86_64/boot-options.txt
index 0965a71f9942..61b611e9eeaf 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -277,10 +277,6 @@ IOMMU (input/output memory management unit)
     space might stop working. Use this option if you have devices that
     are accessed from userspace directly on some PCI host bridge.
 
-Debugging
-
-  kstack=N     Print N words from the kernel stack in oops dumps.
-
 Miscellaneous
 
        nogbpages
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9a9e5884066c..05ed3d393da7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is 
built with
 
 #define SIZEOF_PTREGS  21*8
 
-       .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
-       addq    $-(15*8+\addskip), %rsp
+       .macro ALLOC_PT_GPREGS_ON_STACK
+       addq    $-(15*8), %rsp
        .endm
 
        .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is 
built with
        movq 5*8+\offset(%rsp), %rbx
        .endm
 
-       .macro ZERO_EXTRA_REGS
-       xorl    %r15d, %r15d
-       xorl    %r14d, %r14d
-       xorl    %r13d, %r13d
-       xorl    %r12d, %r12d
-       xorl    %ebp, %ebp
-       xorl    %ebx, %ebx
-       .endm
-
        .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, 
rstor_r8910=1, rstor_rdx=1
        .if \rstor_r11
        movq 6*8(%rsp), %r11
@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is 
built with
        .byte 0xf1
        .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+       .if \ptregs_offset
+               leaq \ptregs_offset(%rsp), %rbp
+       .else
+               mov %rsp, %rbp
+       .endif
+       orq     $0x1, %rbp
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 21b352a11b49..acc0c6f36f3f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,6 +45,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/export.h>
+#include <asm/frame.h>
 
        .section .entry.text, "ax"
 
@@ -175,6 +176,22 @@
        SET_KERNEL_GS %edx
 .endm
 
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
+ * frame pointer is replaced with an encoded pointer to pt_regs.  The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+       mov %esp, %ebp
+       orl $0x1, %ebp
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
        popl    %ebx
        popl    %ecx
@@ -238,6 +255,23 @@ ENTRY(__switch_to_asm)
 END(__switch_to_asm)
 
 /*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+       FRAME_BEGIN
+
+       pushl   %eax
+       call    schedule_tail
+       popl    %eax
+
+       FRAME_END
+       ret
+ENDPROC(schedule_tail_wrapper)
+/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -245,9 +279,7 @@ END(__switch_to_asm)
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
+       call    schedule_tail_wrapper
 
        testl   %ebx, %ebx
        jnz     1f              /* kernel threads are uncommon */
@@ -307,13 +339,13 @@ END(ret_from_exception)
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
        DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
+.Lneed_resched:
        cmpl    $0, PER_CPU_VAR(__preempt_count)
        jnz     restore_all
        testl   $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception 
path) ?
        jz      restore_all
        call    preempt_schedule_irq
-       jmp     need_resched
+       jmp     .Lneed_resched
 END(resume_kernel)
 #endif
 
@@ -334,7 +366,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
  */
 ENTRY(xen_sysenter_target)
        addl    $5*4, %esp                      /* remove xen-provided frame */
-       jmp     sysenter_past_esp
+       jmp     .Lsysenter_past_esp
 #endif
 
 /*
@@ -371,7 +403,7 @@ ENTRY(xen_sysenter_target)
  */
 ENTRY(entry_SYSENTER_32)
        movl    TSS_sysenter_sp0(%esp), %esp
-sysenter_past_esp:
+.Lsysenter_past_esp:
        pushl   $__USER_DS              /* pt_regs->ss */
        pushl   %ebp                    /* pt_regs->sp (stashed in bp) */
        pushfl                          /* pt_regs->flags (except IF = 0) */
@@ -504,9 +536,9 @@ ENTRY(entry_INT80_32)
 
 restore_all:
        TRACE_IRQS_IRET
-restore_all_notrace:
+.Lrestore_all_notrace:
 #ifdef CONFIG_X86_ESPFIX32
-       ALTERNATIVE     "jmp restore_nocheck", "", X86_BUG_ESPFIX
+       ALTERNATIVE     "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
 
        movl    PT_EFLAGS(%esp), %eax           # mix EFLAGS, SS and CS
        /*
@@ -518,22 +550,23 @@ ENTRY(entry_INT80_32)
        movb    PT_CS(%esp), %al
        andl    $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), 
%eax
        cmpl    $((SEGMENT_LDT << 8) | USER_RPL), %eax
-       je ldt_ss                               # returning to user-space with 
LDT SS
+       je .Lldt_ss                             # returning to user-space with 
LDT SS
 #endif
-restore_nocheck:
+.Lrestore_nocheck:
        RESTORE_REGS 4                          # skip orig_eax/error_code
-irq_return:
+.Lirq_return:
        INTERRUPT_RETURN
+
 .section .fixup, "ax"
 ENTRY(iret_exc )
        pushl   $0                              # no error code
        pushl   $do_iret_error
-       jmp     error_code
+       jmp     common_exception
 .previous
-       _ASM_EXTABLE(irq_return, iret_exc)
+       _ASM_EXTABLE(.Lirq_return, iret_exc)
 
 #ifdef CONFIG_X86_ESPFIX32
-ldt_ss:
+.Lldt_ss:
 /*
  * Setup and switch to ESPFIX stack
  *
@@ -562,7 +595,7 @@ ENTRY(iret_exc      )
         */
        DISABLE_INTERRUPTS(CLBR_EAX)
        lss     (%esp), %esp                    /* switch to espfix segment */
-       jmp     restore_nocheck
+       jmp     .Lrestore_nocheck
 #endif
 ENDPROC(entry_INT80_32)
 
@@ -624,6 +657,7 @@ END(irq_entries_start)
        ASM_CLAC
        addl    $-0x80, (%esp)                  /* Adjust vector into the 
[-256, -1] range */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
        movl    %esp, %eax
        call    do_IRQ
@@ -635,6 +669,7 @@ ENTRY(name)                         \
        ASM_CLAC;                       \
        pushl   $~(nr);                 \
        SAVE_ALL;                       \
+       ENCODE_FRAME_POINTER;           \
        TRACE_IRQS_OFF                  \
        movl    %esp, %eax;             \
        call    fn;                     \
@@ -659,7 +694,7 @@ ENTRY(coprocessor_error)
        ASM_CLAC
        pushl   $0
        pushl   $do_coprocessor_error
-       jmp     error_code
+       jmp     common_exception
 END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
@@ -673,14 +708,14 @@ ENTRY(simd_coprocessor_error)
 #else
        pushl   $do_simd_coprocessor_error
 #endif
-       jmp     error_code
+       jmp     common_exception
 END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        pushl   $do_device_not_available
-       jmp     error_code
+       jmp     common_exception
 END(device_not_available)
 
 #ifdef CONFIG_PARAVIRT
@@ -694,59 +729,59 @@ ENTRY(overflow)
        ASM_CLAC
        pushl   $0
        pushl   $do_overflow
-       jmp     error_code
+       jmp     common_exception
 END(overflow)
 
 ENTRY(bounds)
        ASM_CLAC
        pushl   $0
        pushl   $do_bounds
-       jmp     error_code
+       jmp     common_exception
 END(bounds)
 
 ENTRY(invalid_op)
        ASM_CLAC
        pushl   $0
        pushl   $do_invalid_op
-       jmp     error_code
+       jmp     common_exception
 END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
        ASM_CLAC
        pushl   $0
        pushl   $do_coprocessor_segment_overrun
-       jmp     error_code
+       jmp     common_exception
 END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
        ASM_CLAC
        pushl   $do_invalid_TSS
-       jmp     error_code
+       jmp     common_exception
 END(invalid_TSS)
 
 ENTRY(segment_not_present)
        ASM_CLAC
        pushl   $do_segment_not_present
-       jmp     error_code
+       jmp     common_exception
 END(segment_not_present)
 
 ENTRY(stack_segment)
        ASM_CLAC
        pushl   $do_stack_segment
-       jmp     error_code
+       jmp     common_exception
 END(stack_segment)
 
 ENTRY(alignment_check)
        ASM_CLAC
        pushl   $do_alignment_check
-       jmp     error_code
+       jmp     common_exception
 END(alignment_check)
 
 ENTRY(divide_error)
        ASM_CLAC
        pushl   $0                              # no error code
        pushl   $do_divide_error
-       jmp     error_code
+       jmp     common_exception
 END(divide_error)
 
 #ifdef CONFIG_X86_MCE
@@ -754,7 +789,7 @@ ENTRY(machine_check)
        ASM_CLAC
        pushl   $0
        pushl   machine_check_vector
-       jmp     error_code
+       jmp     common_exception
 END(machine_check)
 #endif
 
@@ -762,13 +797,14 @@ ENTRY(spurious_interrupt_bug)
        ASM_CLAC
        pushl   $0
        pushl   $do_spurious_interrupt_bug
-       jmp     error_code
+       jmp     common_exception
 END(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN
 ENTRY(xen_hypervisor_callback)
        pushl   $-1                             /* orig_ax = -1 => not a system 
call */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
 
        /*
@@ -823,6 +859,7 @@ ENTRY(xen_failsafe_callback)
        jmp     iret_exc
 5:     pushl   $-1                             /* orig_ax = -1 => not a system 
call */
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        jmp     ret_from_exception
 
 .section .fixup, "ax"
@@ -882,7 +919,7 @@ ENTRY(ftrace_caller)
        popl    %edx
        popl    %ecx
        popl    %eax
-ftrace_ret:
+.Lftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 ftrace_graph_call:
@@ -952,7 +989,7 @@ GLOBAL(ftrace_regs_call)
        popl    %gs
        addl    $8, %esp                        /* Skip orig_ax and ip */
        popf                                    /* Pop flags at end (no addl to 
corrupt flags) */
-       jmp     ftrace_ret
+       jmp     .Lftrace_ret
 
        popf
        jmp     ftrace_stub
@@ -963,7 +1000,7 @@ ENTRY(mcount)
        jb      ftrace_stub                     /* Paging not enabled yet? */
 
        cmpl    $ftrace_stub, ftrace_trace_function
-       jnz     trace
+       jnz     .Ltrace
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        cmpl    $ftrace_stub, ftrace_graph_return
        jnz     ftrace_graph_caller
@@ -976,7 +1013,7 @@ ENTRY(mcount)
        ret
 
        /* taken from glibc */
-trace:
+.Ltrace:
        pushl   %eax
        pushl   %ecx
        pushl   %edx
@@ -1027,7 +1064,7 @@ END(ftrace_graph_caller)
 ENTRY(trace_page_fault)
        ASM_CLAC
        pushl   $trace_do_page_fault
-       jmp     error_code
+       jmp     common_exception
 END(trace_page_fault)
 #endif
 
@@ -1035,7 +1072,10 @@ ENTRY(page_fault)
        ASM_CLAC
        pushl   $do_page_fault
        ALIGN
-error_code:
+       jmp common_exception
+END(page_fault)
+
+common_exception:
        /* the function address is in %gs's slot on the stack */
        pushl   %fs
        pushl   %es
@@ -1047,6 +1087,7 @@ ENTRY(page_fault)
        pushl   %edx
        pushl   %ecx
        pushl   %ebx
+       ENCODE_FRAME_POINTER
        cld
        movl    $(__KERNEL_PERCPU), %ecx
        movl    %ecx, %fs
@@ -1064,7 +1105,7 @@ ENTRY(page_fault)
        movl    %esp, %eax                      # pt_regs pointer
        call    *%edi
        jmp     ret_from_exception
-END(page_fault)
+END(common_exception)
 
 ENTRY(debug)
        /*
@@ -1079,6 +1120,7 @@ ENTRY(debug)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        xorl    %edx, %edx                      # error code 0
        movl    %esp, %eax                      # pt_regs pointer
 
@@ -1094,11 +1136,11 @@ ENTRY(debug)
 
 .Ldebug_from_sysenter_stack:
        /* We're on the SYSENTER stack.  Switch off. */
-       movl    %esp, %ebp
+       movl    %esp, %ebx
        movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
        TRACE_IRQS_OFF
        call    do_debug
-       movl    %ebp, %esp
+       movl    %ebx, %esp
        jmp     ret_from_exception
 END(debug)
 
@@ -1116,11 +1158,12 @@ ENTRY(nmi)
        movl    %ss, %eax
        cmpw    $__ESPFIX_SS, %ax
        popl    %eax
-       je      nmi_espfix_stack
+       je      .Lnmi_espfix_stack
 #endif
 
        pushl   %eax                            # pt_regs->orig_ax
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        xorl    %edx, %edx                      # zero error code
        movl    %esp, %eax                      # pt_regs pointer
 
@@ -1132,21 +1175,21 @@ ENTRY(nmi)
 
        /* Not on SYSENTER stack. */
        call    do_nmi
-       jmp     restore_all_notrace
+       jmp     .Lrestore_all_notrace
 
 .Lnmi_from_sysenter_stack:
        /*
         * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
         * is using the thread stack right now, so it's safe for us to use it.
         */
-       movl    %esp, %ebp
+       movl    %esp, %ebx
        movl    PER_CPU_VAR(cpu_current_top_of_stack), %esp
        call    do_nmi
-       movl    %ebp, %esp
-       jmp     restore_all_notrace
+       movl    %ebx, %esp
+       jmp     .Lrestore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
+.Lnmi_espfix_stack:
        /*
         * create the pointer to lss back
         */
@@ -1159,12 +1202,13 @@ ENTRY(nmi)
        .endr
        pushl   %eax
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        FIXUP_ESPFIX_STACK                      # %eax == %esp
        xorl    %edx, %edx                      # zero error code
        call    do_nmi
        RESTORE_REGS
        lss     12+4(%esp), %esp                # back to espfix stack
-       jmp     irq_return
+       jmp     .Lirq_return
 #endif
 END(nmi)
 
@@ -1172,6 +1216,7 @@ ENTRY(int3)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
        SAVE_ALL
+       ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
        xorl    %edx, %edx                      # zero error code
        movl    %esp, %eax                      # pt_regs pointer
@@ -1181,14 +1226,14 @@ END(int3)
 
 ENTRY(general_protection)
        pushl   $do_general_protection
-       jmp     error_code
+       jmp     common_exception
 END(general_protection)
 
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
        ASM_CLAC
        pushl   $do_async_page_fault
-       jmp     error_code
+       jmp     common_exception
 END(async_page_fault)
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a358b37..5b219707c2f2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -38,12 +38,6 @@
 #include <asm/export.h>
 #include <linux/err.h>
 
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64                      
(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT                     0x80000000
-#define __AUDIT_ARCH_LE                                0x40000000
-
 .code64
 .section .entry.text, "ax"
 
@@ -469,6 +463,7 @@ END(irq_entries_start)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
        jz      1f
@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
 
@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
@@ -1257,6 +1255,7 @@ ENTRY(nmi)
        pushq   %r13            /* pt_regs->r13 */
        pushq   %r14            /* pt_regs->r14 */
        pushq   %r15            /* pt_regs->r15 */
+       ENCODE_FRAME_POINTER
 
        /*
         * At this point we no longer need to worry about stack damage
@@ -1270,11 +1269,10 @@ ENTRY(nmi)
 
        /*
         * Return back to user mode.  We must *not* do the normal exit
-        * work, because we don't want to enable interrupts.  Fortunately,
-        * do_nmi doesn't modify pt_regs.
+        * work, because we don't want to enable interrupts.
         */
        SWAPGS
-       jmp     restore_c_regs_and_iret
+       jmp     restore_regs_and_iret
 
 .Lnmi_from_kernel:
        /*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 23c881caabd1..e739002427ed 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, 
unsigned long addr)
        }
 
        text_start = addr - image->sym_vvar_start;
-       current->mm->context.vdso = (void __user *)text_start;
-       current->mm->context.vdso_image = image;
 
        /*
         * MAYWRITE to allow gdb to COW and set breakpoints
@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, 
unsigned long addr)
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                do_munmap(mm, text_start, image->size);
+       } else {
+               current->mm->context.vdso = (void __user *)text_start;
+               current->mm->context.vdso_image = image;
        }
 
 up_fail:
-       if (ret) {
-               current->mm->context.vdso = NULL;
-               current->mm->context.vdso_image = NULL;
-       }
-
        up_write(&mm->mmap_sem);
        return ret;
 }
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c5047b8f777b..1c1b9fe705c8 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
 
 static struct pt_pmu pt_pmu;
 
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
-};
-
 /*
  * Capabilities of Intel PT hardware, such as number of address bits or
  * supported output schemes, are cached and exported to userspace as "caps"
@@ -64,21 +57,21 @@ static struct pt_cap_desc {
        u8              reg;
        u32             mask;
 } pt_caps[] = {
-       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
-       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
-       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
-       PT_CAP(ip_filtering,            0, CR_EBX, BIT(2)),
-       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
-       PT_CAP(ptwrite,                 0, CR_EBX, BIT(4)),
-       PT_CAP(power_event_trace,       0, CR_EBX, BIT(5)),
-       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
-       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
-       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
-       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
-       PT_CAP(num_address_ranges,      1, CR_EAX, 0x3),
-       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
-       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
-       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
+       PT_CAP(max_subleaf,             0, CPUID_EAX, 0xffffffff),
+       PT_CAP(cr3_filtering,           0, CPUID_EBX, BIT(0)),
+       PT_CAP(psb_cyc,                 0, CPUID_EBX, BIT(1)),
+       PT_CAP(ip_filtering,            0, CPUID_EBX, BIT(2)),
+       PT_CAP(mtc,                     0, CPUID_EBX, BIT(3)),
+       PT_CAP(ptwrite,                 0, CPUID_EBX, BIT(4)),
+       PT_CAP(power_event_trace,       0, CPUID_EBX, BIT(5)),
+       PT_CAP(topa_output,             0, CPUID_ECX, BIT(0)),
+       PT_CAP(topa_multiple_entries,   0, CPUID_ECX, BIT(1)),
+       PT_CAP(single_range_output,     0, CPUID_ECX, BIT(2)),
+       PT_CAP(payloads_lip,            0, CPUID_ECX, BIT(31)),
+       PT_CAP(num_address_ranges,      1, CPUID_EAX, 0x3),
+       PT_CAP(mtc_periods,             1, CPUID_EAX, 0xffff0000),
+       PT_CAP(cycle_thresholds,        1, CPUID_EBX, 0xffff),
+       PT_CAP(psb_periods,             1, CPUID_EBX, 0xffff0000),
 };
 
 static u32 pt_cap_get(enum pt_capabilities cap)
@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
 
        for (i = 0; i < PT_CPUID_LEAVES; i++) {
                cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+                           &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
        }
 
        ret = -ENOMEM;
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index a39629206864..eac7572bf8bb 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -226,6 +226,7 @@
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX 
instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention 
*/
+#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused 
Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
@@ -279,8 +280,10 @@
 #define X86_FEATURE_AVIC       (15*32+13) /* Virtual Interrupt Controller */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation 
instructions*/
 #define X86_FEATURE_PKU                (16*32+ 3) /* Protection Keys for 
Userspace */
 #define X86_FEATURE_OSPKE      (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_RDPID      (16*32+ 22) /* RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support 
*/
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d31881188431..29a594a3b82a 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -21,7 +21,6 @@ enum die_val {
        DIE_NMIUNKNOWN,
 };
 
-extern void printk_address(unsigned long address);
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 984a7bf17f6a..e7f8c62701d4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
        u32                     microcode;
 };
 
+struct cpuid_regs {
+       u32 eax, ebx, ecx, edx;
+};
+
+enum cpuid_regs_idx {
+       CPUID_EAX = 0,
+       CPUID_EBX,
+       CPUID_ECX,
+       CPUID_EDX,
+};
+
 #define X86_VENDOR_INTEL       0
 #define X86_VENDOR_CYRIX       1
 #define X86_VENDOR_AMD         2
@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+                                   unsigned int sub_leaf,
+                                   enum cpuid_regs_idx reg);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
 
diff --git a/arch/x86/include/asm/stacktrace.h 
b/arch/x86/include/asm/stacktrace.h
index 37f2e0b377ad..a3269c897ec5 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct 
*task,
 int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
 
-void stack_type_str(enum stack_type type, const char **begin,
-                   const char **end);
+const char *stack_type_name(enum stack_type type);
 
 static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
 {
@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void 
*addr, size_t len)
                addr + len > begin && addr + len <= end);
 }
 
-extern int kstack_depth_to_print;
-
 #ifdef CONFIG_X86_32
 #define STACKSLOTS_PER_LINE 8
 #else
@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs 
*regs)
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        unsigned long *stack, char *log_lvl);
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl);
-
 extern unsigned int code_bytes;
 
 /* The form of the top of the frame on the stack */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 46de9ac4b990..c5a7f3a930dd 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,6 +13,7 @@ struct unwind_state {
        int graph_idx;
 #ifdef CONFIG_FRAME_POINTER
        unsigned long *bp;
+       struct pt_regs *regs;
 #else
        unsigned long *sp;
 #endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct 
unwind_state *state)
        if (unwind_done(state))
                return NULL;
 
-       return state->bp + 1;
+       return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+       if (unwind_done(state))
+               return NULL;
+
+       return state->regs;
 }
 
 #else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct 
unwind_state *state)
        return NULL;
 }
 
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_FRAME_POINTER */
 
 #endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index e728699db774..3a01996db58f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
         * works on all CPUs.  This is volatile so that it orders
         * correctly wrt barrier() and to keep gcc from cleverly
         * hoisting it out of the calling function.
+        *
+        * If RDPID is available, use it.
         */
-       asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+       alternative_io ("lsl %[p],%[seg]",
+                       ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+                       X86_FEATURE_RDPID,
+                       [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
 
        return p;
 }
diff --git a/arch/x86/include/uapi/asm/prctl.h 
b/arch/x86/include/uapi/asm/prctl.h
index ae135de547f5..835aa51c7f6e 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,10 +6,8 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-# define ARCH_MAP_VDSO_X32     0x2001
-# define ARCH_MAP_VDSO_32      0x2002
-# define ARCH_MAP_VDSO_64      0x2003
-#endif
+#define ARCH_MAP_VDSO_X32      0x2001
+#define ARCH_MAP_VDSO_32       0x2002
+#define ARCH_MAP_VDSO_64       0x2003
 
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 1db8dc490b66..d1316f9c8329 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -17,11 +17,17 @@ struct cpuid_bit {
        u32 sub_leaf;
 };
 
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
+/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+static const struct cpuid_bit cpuid_bits[] = {
+       { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
+       { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
+       { X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
+       { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
+       { X86_FEATURE_HW_PSTATE,        CPUID_EDX,  7, 0x80000007, 0 },
+       { X86_FEATURE_CPB,              CPUID_EDX,  9, 0x80000007, 0 },
+       { X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
+       { 0, 0, 0, 0, 0 }
 };
 
 void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
        u32 regs[4];
        const struct cpuid_bit *cb;
 
-       static const struct cpuid_bit cpuid_bits[] = {
-               { X86_FEATURE_INTEL_PT,         CR_EBX,25, 0x00000007, 0 },
-               { X86_FEATURE_AVX512_4VNNIW,    CR_EDX, 2, 0x00000007, 0 },
-               { X86_FEATURE_AVX512_4FMAPS,    CR_EDX, 3, 0x00000007, 0 },
-               { X86_FEATURE_APERFMPERF,       CR_ECX, 0, 0x00000006, 0 },
-               { X86_FEATURE_EPB,              CR_ECX, 3, 0x00000006, 0 },
-               { X86_FEATURE_HW_PSTATE,        CR_EDX, 7, 0x80000007, 0 },
-               { X86_FEATURE_CPB,              CR_EDX, 9, 0x80000007, 0 },
-               { X86_FEATURE_PROC_FEEDBACK,    CR_EDX,11, 0x80000007, 0 },
-               { 0, 0, 0, 0, 0 }
-       };
-
        for (cb = cpuid_bits; cb->feature; cb++) {
 
                /* Verify that the level is valid */
@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
                    max_level > (cb->level | 0xffff))
                        continue;
 
-               cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
-                           &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
+               cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
+                           &regs[CPUID_EBX], &regs[CPUID_ECX],
+                           &regs[CPUID_EDX]);
 
                if (regs[cb->reg] & (1 << cb->bit))
                        set_cpu_cap(c, cb->feature);
        }
 }
+
+u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
+                            enum cpuid_regs_idx reg)
+{
+       const struct cpuid_bit *cb;
+       u32 cpuid_val = 0;
+
+       for (cb = cpuid_bits; cb->feature; cb++) {
+
+               if (level > cb->level)
+                       continue;
+
+               if (level < cb->level)
+                       break;
+
+               if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
+                       if (cpu_has(&boot_cpu_data, cb->feature))
+                               cpuid_val |= BIT(cb->bit);
+               }
+       }
+
+       return cpuid_val;
+}
+EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2836de390f95..9095c80723d6 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -46,10 +46,6 @@
 
 static struct class *cpuid_class;
 
-struct cpuid_regs {
-       u32 eax, ebx, ecx, edx;
-};
-
 static void cpuid_smp_cpuid(void *cmd_block)
 {
        struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9b7cf5c28f5f..0e5c9d0f6c28 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,7 +22,6 @@
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
 unsigned int code_bytes = 64;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int 
reliable,
                                 char *log_lvl)
 {
        touch_nmi_watchdog();
-       printk("%s [<%p>] %s%pB\n",
-               log_lvl, (void *)address, reliable ? "" : "? ",
-               (void *)address);
-}
-
-void printk_address(unsigned long address)
-{
-       pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
+       printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct 
pt_regs *regs,
        printk("%sCall Trace:\n", log_lvl);
 
        unwind_start(&state, task, regs, stack);
+       stack = stack ? : get_stack_pointer(task, regs);
 
        /*
         * Iterate through the stacks, starting with the current stack pointer.
@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct 
pt_regs *regs,
         * - softirq stack
         * - hardirq stack
         */
-       for (; stack; stack = stack_info.next_sp) {
-               const char *str_begin, *str_end;
+       for (regs = NULL; stack; stack = stack_info.next_sp) {
+               const char *stack_name;
 
                /*
                 * If we overflowed the task stack into a guard page, jump back
@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct 
pt_regs *regs,
                if (get_stack_info(stack, task, &stack_info, &visit_mask))
                        break;
 
-               stack_type_str(stack_info.type, &str_begin, &str_end);
-               if (str_begin)
-                       printk("%s <%s> ", log_lvl, str_begin);
+               stack_name = stack_type_name(stack_info.type);
+               if (stack_name)
+                       printk("%s <%s>\n", log_lvl, stack_name);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct 
pt_regs *regs,
                        if (!__kernel_text_address(addr))
                                continue;
 
+                       /*
+                        * Don't print regs->ip again if it was already printed
+                        * by __show_regs() below.
+                        */
+                       if (regs && stack == &regs->ip) {
+                               unwind_next_frame(&state);
+                               continue;
+                       }
+
                        if (stack == ret_addr_p)
                                reliable = 1;
 
@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct 
pt_regs *regs,
                         * of the addresses will just be printed as unreliable.
                         */
                        unwind_next_frame(&state);
+
+                       /* if the frame has entry regs, print them */
+                       regs = unwind_get_entry_regs(&state);
+                       if (regs)
+                               __show_regs(regs, 0);
                }
 
-               if (str_end)
-                       printk("%s <%s> ", log_lvl, str_end);
+               if (stack_name)
+                       printk("%s </%s>\n", log_lvl, stack_name);
        }
 }
 
@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long 
*sp)
        if (!sp && task == current)
                sp = get_stack_pointer(current, NULL);
 
-       show_stack_log_lvl(task, NULL, sp, "");
+       show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
 }
 
 void show_stack_regs(struct pt_regs *regs)
 {
-       show_stack_log_lvl(current, regs, NULL, "");
+       show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 }
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
                sp = kernel_stack_pointer(regs);
                savesegment(ss, ss);
        }
-       printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-       print_symbol("%s", regs->ip);
-       printk(" SS:ESP %04x:%08lx\n", ss, sp);
+       printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
+              (void *)regs->ip, ss, sp);
 #else
        /* Executive summary in case the oops scrolled away */
-       printk(KERN_ALERT "RIP ");
-       printk_address(regs->ip);
-       printk(" RSP <%016lx>\n", regs->sp);
+       printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
 #endif
        return 0;
 }
@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
        oops_end(flags, regs, sig);
 }
 
-static int __init kstack_setup(char *s)
-{
-       ssize_t ret;
-       unsigned long val;
-
-       if (!s)
-               return -EINVAL;
-
-       ret = kstrtoul(s, 0, &val);
-       if (ret)
-               return ret;
-       kstack_depth_to_print = val;
-       return 0;
-}
-early_param("kstack", kstack_setup);
-
 static int __init code_bytes_setup(char *s)
 {
        ssize_t ret;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 06eb322b5f9f..bb3b5b9a6899 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,18 +16,15 @@
 
 #include <asm/stacktrace.h>
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
-       switch (type) {
-       case STACK_TYPE_IRQ:
-       case STACK_TYPE_SOFTIRQ:
-               *begin = "IRQ";
-               *end   = "EOI";
-               break;
-       default:
-               *begin = NULL;
-               *end   = NULL;
-       }
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+
+       if (type == STACK_TYPE_SOFTIRQ)
+               return "SOFTIRQ";
+
+       return NULL;
 }
 
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
@@ -109,8 +106,10 @@ int get_stack_info(unsigned long *stack, struct 
task_struct *task,
         * just break out and report an unknown stack type.
         */
        if (visit_mask) {
-               if (*visit_mask & (1UL << info->type))
+               if (*visit_mask & (1UL << info->type)) {
+                       printk_deferred_once(KERN_WARNING "WARNING: stack 
recursion on stack type %d\n", info->type);
                        goto unknown;
+               }
                *visit_mask |= 1UL << info->type;
        }
 
@@ -121,36 +120,6 @@ int get_stack_info(unsigned long *stack, struct 
task_struct *task,
        return -EINVAL;
 }
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl)
-{
-       unsigned long *stack;
-       int i;
-
-       if (!try_get_task_stack(task))
-               return;
-
-       sp = sp ? : get_stack_pointer(task, regs);
-
-       stack = sp;
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               if (kstack_end(stack))
-                       break;
-               if ((i % STACKSLOTS_PER_LINE) == 0) {
-                       if (i != 0)
-                               pr_cont("\n");
-                       printk("%s %08lx", log_lvl, *stack++);
-               } else
-                       pr_cont(" %08lx", *stack++);
-               touch_nmi_watchdog();
-       }
-       pr_cont("\n");
-       show_trace_log_lvl(task, regs, sp, log_lvl);
-
-       put_task_stack(task);
-}
-
-
 void show_regs(struct pt_regs *regs)
 {
        int i;
@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
                unsigned char c;
                u8 *ip;
 
-               pr_emerg("Stack:\n");
-               show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
+               show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
 
                pr_emerg("Code:");
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 36cf1a498227..fac189efcc34 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -28,23 +28,17 @@ static unsigned long 
exception_stack_sizes[N_EXCEPTION_STACKS] = {
        [DEBUG_STACK - 1]                       = DEBUG_STKSZ
 };
 
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
 {
        BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
 
-       switch (type) {
-       case STACK_TYPE_IRQ:
-               *begin = "IRQ";
-               *end   = "EOI";
-               break;
-       case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
-               *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
-               *end   = "EOE";
-               break;
-       default:
-               *begin = NULL;
-               *end   = NULL;
-       }
+       if (type == STACK_TYPE_IRQ)
+               return "IRQ";
+
+       if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+               return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+
+       return NULL;
 }
 
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
@@ -128,8 +122,10 @@ int get_stack_info(unsigned long *stack, struct 
task_struct *task,
         * just break out and report an unknown stack type.
         */
        if (visit_mask) {
-               if (*visit_mask & (1UL << info->type))
+               if (*visit_mask & (1UL << info->type)) {
+                       printk_deferred_once(KERN_WARNING "WARNING: stack 
recursion on stack type %d\n", info->type);
                        goto unknown;
+               }
                *visit_mask |= 1UL << info->type;
        }
 
@@ -140,56 +136,6 @@ int get_stack_info(unsigned long *stack, struct 
task_struct *task,
        return -EINVAL;
 }
 
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                       unsigned long *sp, char *log_lvl)
-{
-       unsigned long *irq_stack_end;
-       unsigned long *irq_stack;
-       unsigned long *stack;
-       int i;
-
-       if (!try_get_task_stack(task))
-               return;
-
-       irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
-       irq_stack     = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
-
-       sp = sp ? : get_stack_pointer(task, regs);
-
-       stack = sp;
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               unsigned long word;
-
-               if (stack >= irq_stack && stack <= irq_stack_end) {
-                       if (stack == irq_stack_end) {
-                               stack = (unsigned long *) (irq_stack_end[-1]);
-                               pr_cont(" <EOI> ");
-                       }
-               } else {
-               if (kstack_end(stack))
-                       break;
-               }
-
-               if (probe_kernel_address(stack, word))
-                       break;
-
-               if ((i % STACKSLOTS_PER_LINE) == 0) {
-                       if (i != 0)
-                               pr_cont("\n");
-                       printk("%s %016lx", log_lvl, word);
-               } else
-                       pr_cont(" %016lx", word);
-
-               stack++;
-               touch_nmi_watchdog();
-       }
-
-       pr_cont("\n");
-       show_trace_log_lvl(task, regs, sp, log_lvl);
-
-       put_task_stack(task);
-}
-
 void show_regs(struct pt_regs *regs)
 {
        int i;
@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
                unsigned char c;
                u8 *ip;
 
-               printk(KERN_DEFAULT "Stack:\n");
-               show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
+               show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 
                printk(KERN_DEFAULT "Code: ");
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 095ef7ddd6ae..ce47452879fd 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
        setup_clear_cpu_cap(X86_FEATURE_AVX);
        setup_clear_cpu_cap(X86_FEATURE_AVX2);
        setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
        setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
        setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
        setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
        setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
        setup_clear_cpu_cap(X86_FEATURE_MPX);
        setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
        setup_clear_cpu_cap(X86_FEATURE_PKU);
        setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
        setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b6b2f0264af3..df541ac2071e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -63,6 +63,8 @@
 #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
 #endif
 
+#define SIZEOF_PTREGS 17*4
+
 /*
  * Number of possible pages in the lowmem region.
  *
@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 #ifdef CONFIG_PARAVIRT
        /* This is can only trip for a broken bootloader... */
        cmpw $0x207, pa(boot_params + BP_version)
-       jb default_entry
+       jb .Ldefault_entry
 
        /* Paravirt-compatible boot parameters.  Look to see what architecture
                we're booting under. */
        movl pa(boot_params + BP_hardware_subarch), %eax
        cmpl $num_subarch_entries, %eax
-       jae bad_subarch
+       jae .Lbad_subarch
 
        movl pa(subarch_entries)(,%eax,4), %eax
        subl $__PAGE_OFFSET, %eax
        jmp *%eax
 
-bad_subarch:
+.Lbad_subarch:
 WEAK(lguest_entry)
 WEAK(xen_entry)
        /* Unknown implementation; there's really
@@ -270,14 +272,14 @@ WEAK(xen_entry)
        __INITDATA
 
 subarch_entries:
-       .long default_entry             /* normal x86/PC */
+       .long .Ldefault_entry           /* normal x86/PC */
        .long lguest_entry              /* lguest hypervisor */
        .long xen_entry                 /* Xen hypervisor */
-       .long default_entry             /* Moorestown MID */
+       .long .Ldefault_entry           /* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
 .previous
 #else
-       jmp default_entry
+       jmp .Ldefault_entry
 #endif /* CONFIG_PARAVIRT */
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
 ENTRY(start_cpu0)
        movl initial_stack, %ecx
        movl %ecx, %esp
-       jmp  *(initial_code)
+       call *(initial_code)
+1:     jmp 1b
 ENDPROC(start_cpu0)
 #endif
 
@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
        call load_ucode_ap
 #endif
 
-default_entry:
+.Ldefault_entry:
 #define CR0_STATE      (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
                         X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
                         X86_CR0_PG)
@@ -347,7 +350,7 @@ ENTRY(startup_32_smp)
        pushfl
        popl %eax                       # get EFLAGS
        testl $X86_EFLAGS_ID,%eax       # did EFLAGS.ID remained set?
-       jz enable_paging                # hw disallowed setting of ID bit
+       jz .Lenable_paging              # hw disallowed setting of ID bit
                                        # which means no CPUID and no CR4
 
        xorl %eax,%eax
@@ -357,13 +360,13 @@ ENTRY(startup_32_smp)
        movl $1,%eax
        cpuid
        andl $~1,%edx                   # Ignore CPUID.FPU
-       jz enable_paging                # No flags or only CPUID.FPU = no CR4
+       jz .Lenable_paging              # No flags or only CPUID.FPU = no CR4
 
        movl pa(mmu_cr4_features),%eax
        movl %eax,%cr4
 
        testb $X86_CR4_PAE, %al         # check if PAE is enabled
-       jz enable_paging
+       jz .Lenable_paging
 
        /* Check if extended functions are implemented */
        movl $0x80000000, %eax
@@ -371,7 +374,7 @@ ENTRY(startup_32_smp)
        /* Value must be in the range 0x80000001 to 0x8000ffff */
        subl $0x80000001, %eax
        cmpl $(0x8000ffff-0x80000001), %eax
-       ja enable_paging
+       ja .Lenable_paging
 
        /* Clear bogus XD_DISABLE bits */
        call verify_cpu
@@ -380,7 +383,7 @@ ENTRY(startup_32_smp)
        cpuid
        /* Execute Disable bit supported? */
        btl $(X86_FEATURE_NX & 31), %edx
-       jnc enable_paging
+       jnc .Lenable_paging
 
        /* Setup EFER (Extended Feature Enable Register) */
        movl $MSR_EFER, %ecx
@@ -390,7 +393,7 @@ ENTRY(startup_32_smp)
        /* Make changes effective */
        wrmsr
 
-enable_paging:
+.Lenable_paging:
 
 /*
  * Enable paging
@@ -419,7 +422,7 @@ ENTRY(startup_32_smp)
  */
        movb $4,X86                     # at least 486
        cmpl $-1,X86_CPUID
-       je is486
+       je .Lis486
 
        /* get vendor info */
        xorl %eax,%eax                  # call CPUID with 0 -> return vendor ID
@@ -430,7 +433,7 @@ ENTRY(startup_32_smp)
        movl %ecx,X86_VENDOR_ID+8       # last 4 chars
 
        orl %eax,%eax                   # do we have processor info as well?
-       je is486
+       je .Lis486
 
        movl $1,%eax            # Use the CPUID instruction to get CPU type
        cpuid
@@ -444,7 +447,7 @@ ENTRY(startup_32_smp)
        movb %cl,X86_MASK
        movl %edx,X86_CAPABILITY
 
-is486:
+.Lis486:
        movl $0x50022,%ecx      # set AM, WP, NE and MP
        movl %cr0,%eax
        andl $0x80000011,%eax   # Save PG,PE,ET
@@ -470,8 +473,9 @@ ENTRY(startup_32_smp)
        xorl %eax,%eax                  # Clear LDT
        lldt %ax
 
-       pushl $0                # fake return address for unwinder
-       jmp *(initial_code)
+       call *(initial_code)
+1:     jmp 1b
+ENDPROC(startup_32_smp)
 
 #include "verify_cpu.S"
 
@@ -706,7 +710,12 @@ ENTRY(initial_page_table)
 .data
 .balign 4
 ENTRY(initial_stack)
-       .long init_thread_union+THREAD_SIZE
+       /*
+        * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+        * unwinder reliably detect the end of the stack.
+        */
+       .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
+             TOP_OF_KERNEL_STACK_PADDING;
 
 __INITRODATA
 int_msg:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b4421cc191b0..a15d381e6020 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,13 +66,8 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
         * tables and then reload them.
         */
 
-       /*
-        * Setup stack for verify_cpu(). "-8" because initial_stack is defined
-        * this way, see below. Our best guess is a NULL ptr for stack
-        * termination heuristics and we don't want to break anything which
-        * might depend on it (kgdb, ...).
-        */
-       leaq    (__end_init_task - 8)(%rip), %rsp
+       /* Set up the stack for verify_cpu(), similar to initial_stack below */
+       leaq    (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
 
        /* Sanitize CPU configuration */
        call verify_cpu
@@ -117,20 +112,20 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
        movq    %rdi, %rax
        shrq    $PGDIR_SHIFT, %rax
 
-       leaq    (4096 + _KERNPG_TABLE)(%rbx), %rdx
+       leaq    (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
        movq    %rdx, 0(%rbx,%rax,8)
        movq    %rdx, 8(%rbx,%rax,8)
 
-       addq    $4096, %rdx
+       addq    $PAGE_SIZE, %rdx
        movq    %rdi, %rax
        shrq    $PUD_SHIFT, %rax
        andl    $(PTRS_PER_PUD-1), %eax
-       movq    %rdx, 4096(%rbx,%rax,8)
+       movq    %rdx, PAGE_SIZE(%rbx,%rax,8)
        incl    %eax
        andl    $(PTRS_PER_PUD-1), %eax
-       movq    %rdx, 4096(%rbx,%rax,8)
+       movq    %rdx, PAGE_SIZE(%rbx,%rax,8)
 
-       addq    $8192, %rbx
+       addq    $PAGE_SIZE * 2, %rbx
        movq    %rdi, %rax
        shrq    $PMD_SHIFT, %rdi
        addq    $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
@@ -265,13 +260,17 @@ ENTRY(secondary_startup_64)
        movl    $MSR_GS_BASE,%ecx
        movl    initial_gs(%rip),%eax
        movl    initial_gs+4(%rip),%edx
-       wrmsr   
+       wrmsr
 
        /* rsi is pointer to real mode structure with interesting info.
           pass it to C */
        movq    %rsi, %rdi
-       
-       /* Finally jump to run C code and to be on real kernel address
+       jmp     start_cpu
+ENDPROC(secondary_startup_64)
+
+ENTRY(start_cpu)
+       /*
+        * Jump to run C code and to be on a real kernel address.
         * Since we are running on identity-mapped space we have to jump
         * to the full 64bit address, this is only possible as indirect
         * jump.  In addition we need to ensure %cs is set so we make this
@@ -295,12 +294,13 @@ ENTRY(secondary_startup_64)
         *      REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
         *              address given in m16:64.
         */
-       movq    initial_code(%rip),%rax
-       pushq   $0              # fake return address to stop unwinder
+       call    1f              # put return address on stack for unwinder
+1:     xorq    %rbp, %rbp      # clear frame pointer
+       movq    initial_code(%rip), %rax
        pushq   $__KERNEL_CS    # set correct cs
        pushq   %rax            # target address in negative space
        lretq
-ENDPROC(secondary_startup_64)
+ENDPROC(start_cpu)
 
 #include "verify_cpu.S"
 
@@ -308,15 +308,11 @@ ENDPROC(secondary_startup_64)
 /*
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
  * up already except stack. We just set up stack here. Then call
- * start_secondary().
+ * start_secondary() via start_cpu().
  */
 ENTRY(start_cpu0)
-       movq initial_stack(%rip),%rsp
-       movq    initial_code(%rip),%rax
-       pushq   $0              # fake return address to stop unwinder
-       pushq   $__KERNEL_CS    # set correct cs
-       pushq   %rax            # target address in negative space
-       lretq
+       movq    initial_stack(%rip), %rsp
+       jmp     start_cpu
 ENDPROC(start_cpu0)
 #endif
 
@@ -328,7 +324,11 @@ ENDPROC(start_cpu0)
        GLOBAL(initial_gs)
        .quad   INIT_PER_CPU_VAR(irq_stack_union)
        GLOBAL(initial_stack)
-       .quad  init_thread_union+THREAD_SIZE-8
+       /*
+        * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+        * unwinder reliably detect the end of the stack.
+        */
+       .quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
        __FINITDATA
 
 bad_address:
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd7be8efdc4c..e3223bc78cb6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
                savesegment(gs, gs);
        }
 
-       printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-                       (u16)regs->cs, regs->ip, regs->flags,
-                       smp_processor_id());
-       print_symbol("EIP is at %s\n", regs->ip);
+       printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
+       printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
+               smp_processor_id());
 
        printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
                regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3c1ca0..c99f1ca35eb5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned int fsindex, gsindex;
        unsigned int ds, cs, es;
 
-       printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, 
regs->ip);
-       printk_address(regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
-                       regs->sp, regs->flags);
+       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
+               (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+               regs->sp, regs->flags);
+       if (regs->orig_ax != -1)
+               pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+       else
+               pr_cont("\n");
+
        printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
               regs->ax, regs->bx, regs->cx);
        printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42f5eb7b4f6c..95d6fc549ad5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -964,9 +964,7 @@ static int do_boot_cpu(int apicid, int cpu, struct 
task_struct *idle)
        int cpu0_nmi_registered = 0;
        unsigned long timeout;
 
-       idle->thread.sp = (unsigned long) (((struct pt_regs *)
-                         (THREAD_SIZE +  task_stack_page(idle))) - 1);
-
+       idle->thread.sp = (unsigned long)task_pt_regs(idle);
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a2456d4d286a..ea7b7f9a3b9e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state 
*state)
        if (unwind_done(state))
                return 0;
 
+       if (state->regs && user_mode(state->regs))
+               return 0;
+
        addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
                                     addr_p);
 
-       return __kernel_text_address(addr) ? addr : 0;
+       if (!__kernel_text_address(addr)) {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: unrecognized kernel stack return address %p 
at %p in %s:%d\n",
+                       (void *)addr, addr_p, state->task->comm,
+                       state->task->pid);
+               return 0;
+       }
+
+       return addr;
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
+static size_t regs_size(struct pt_regs *regs)
+{
+       /* x86_32 regs from kernel mode are two words shorter: */
+       if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
+               return sizeof(*regs) - 2*sizeof(long);
+
+       return sizeof(*regs);
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+       unsigned long bp = (unsigned long)state->bp;
+       unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+       return bp == regs - FRAME_HEADER_SIZE;
+}
+
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+       unsigned long regs = (unsigned long)bp;
+
+       if (!(regs & 0x1))
+               return NULL;
+
+       return (struct pt_regs *)(regs & ~0x1);
+}
+
 static bool update_stack_state(struct unwind_state *state, void *addr,
                               size_t len)
 {
@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, 
void *addr,
 
 bool unwind_next_frame(struct unwind_state *state)
 {
-       unsigned long *next_bp;
+       struct pt_regs *regs;
+       unsigned long *next_bp, *next_frame;
+       size_t next_len;
+       enum stack_type prev_type = state->stack_info.type;
 
        if (unwind_done(state))
                return false;
 
-       next_bp = (unsigned long *)*state->bp;
+       /* have we reached the end? */
+       if (state->regs && user_mode(state->regs))
+               goto the_end;
+
+       if (is_last_task_frame(state)) {
+               regs = task_pt_regs(state->task);
+
+               /*
+                * kthreads (other than the boot CPU's idle thread) have some
+                * partial regs at the end of their stack which were placed
+                * there by copy_thread_tls().  But the regs don't have any
+                * useful information, so we can skip them.
+                *
+                * This user_mode() check is slightly broader than a PF_KTHREAD
+                * check because it also catches the awkward situation where a
+                * newly forked kthread transitions into a user task by calling
+                * do_execve(), which eventually clears PF_KTHREAD.
+                */
+               if (!user_mode(regs))
+                       goto the_end;
+
+               /*
+                * We're almost at the end, but not quite: there's still the
+                * syscall regs frame.  Entry code doesn't encode the regs
+                * pointer for syscalls, so we have to set it manually.
+                */
+               state->regs = regs;
+               state->bp = NULL;
+               return true;
+       }
+
+       /* get the next frame pointer */
+       if (state->regs)
+               next_bp = (unsigned long *)state->regs->bp;
+       else
+               next_bp = (unsigned long *)*state->bp;
+
+       /* is the next frame pointer an encoded pointer to pt_regs? */
+       regs = decode_frame_pointer(next_bp);
+       if (regs) {
+               next_frame = (unsigned long *)regs;
+               next_len = sizeof(*regs);
+       } else {
+               next_frame = next_bp;
+               next_len = FRAME_HEADER_SIZE;
+       }
 
        /* make sure the next frame's data is accessible */
-       if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
-               return false;
+       if (!update_stack_state(state, next_frame, next_len)) {
+               /*
+                * Don't warn on bad regs->bp.  An interrupt in entry code
+                * might cause a false positive warning.
+                */
+               if (state->regs)
+                       goto the_end;
+
+               goto bad_address;
+       }
+
+       /* Make sure it only unwinds up and doesn't overlap the last frame: */
+       if (state->stack_info.type == prev_type) {
+               if (state->regs && (void *)next_frame < (void *)state->regs + 
regs_size(state->regs))
+                       goto bad_address;
+
+               if (state->bp && (void *)next_frame < (void *)state->bp + 
FRAME_HEADER_SIZE)
+                       goto bad_address;
+       }
 
        /* move to the next frame */
-       state->bp = next_bp;
+       if (regs) {
+               state->regs = regs;
+               state->bp = NULL;
+       } else {
+               state->bp = next_bp;
+               state->regs = NULL;
+       }
+
        return true;
+
+bad_address:
+       if (state->regs) {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' 
value %p\n",
+                       state->regs, state->task->comm,
+                       state->task->pid, next_frame);
+       } else {
+               printk_deferred_once(KERN_WARNING
+                       "WARNING: kernel stack frame pointer at %p in %s:%d has 
bad value %p\n",
+                       state->bp, state->task->comm,
+                       state->task->pid, next_frame);
+       }
+the_end:
+       state->stack_info.type = STACK_TYPE_UNKNOWN;
+       return false;
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame)
 {
+       unsigned long *bp, *frame;
+       size_t len;
+
        memset(state, 0, sizeof(*state));
        state->task = task;
 
@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct 
task_struct *task,
        }
 
        /* set up the starting stack frame */
-       state->bp = get_frame_pointer(task, regs);
+       bp = get_frame_pointer(task, regs);
+       regs = decode_frame_pointer(bp);
+       if (regs) {
+               state->regs = regs;
+               frame = (unsigned long *)regs;
+               len = sizeof(*regs);
+       } else {
+               state->bp = bp;
+               frame = bp;
+               len = FRAME_HEADER_SIZE;
+       }
 
        /* initialize stack info and make sure the frame data is accessible */
-       get_stack_info(state->bp, state->task, &state->stack_info,
+       get_stack_info(frame, state->task, &state->stack_info,
                       &state->stack_mask);
-       update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+       update_stack_state(state, frame, len);
 
        /*
         * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index dbf67f64d5ec..e79f15f108a8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -91,10 +91,10 @@ SECTIONS
        /* Text and read-only data */
        .text :  AT(ADDR(.text) - LOAD_OFFSET) {
                _text = .;
+               _stext = .;
                /* bootstrapping code */
                HEAD_TEXT
                . = ALIGN(8);
-               _stext = .;
                TEXT_TEXT
                SCHED_TEXT
                CPUIDLE_TEXT
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index d376e4b48f88..c5959576c315 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,53 +16,6 @@
 #include <asm/smap.h>
 #include <asm/export.h>
 
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
-       mov PER_CPU_VAR(current_task), %rax
-       movq %rdi,%rcx
-       addq %rdx,%rcx
-       jc bad_to_user
-       cmpq TASK_addr_limit(%rax),%rcx
-       ja bad_to_user
-       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
-                     "jmp copy_user_generic_string",           \
-                     X86_FEATURE_REP_GOOD,                     \
-                     "jmp copy_user_enhanced_fast_string",     \
-                     X86_FEATURE_ERMS
-ENDPROC(_copy_to_user)
-EXPORT_SYMBOL(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
-       mov PER_CPU_VAR(current_task), %rax
-       movq %rsi,%rcx
-       addq %rdx,%rcx
-       jc bad_from_user
-       cmpq TASK_addr_limit(%rax),%rcx
-       ja bad_from_user
-       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
-                     "jmp copy_user_generic_string",           \
-                     X86_FEATURE_REP_GOOD,                     \
-                     "jmp copy_user_enhanced_fast_string",     \
-                     X86_FEATURE_ERMS
-ENDPROC(_copy_from_user)
-EXPORT_SYMBOL(_copy_from_user)
-
-
-       .section .fixup,"ax"
-       /* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
-       movl %edx,%ecx
-       xorl %eax,%eax
-       rep
-       stosb
-bad_to_user:
-       movl %edx,%eax
-       ret
-ENDPROC(bad_from_user)
-       .previous
-
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index b4908789484e..c074799bddae 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, 
unsigned long n)
        return ret;
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               n = __copy_to_user(to, from, n);
+       return n;
+}
+EXPORT_SYMBOL(_copy_to_user);
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               n = __copy_from_user(to, from, n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+EXPORT_SYMBOL(_copy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3bc7baf2a711..0b281217c890 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, 
const void __user *fr
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-               n = __copy_to_user(to, from, n);
-       return n;
-}
-EXPORT_SYMBOL(_copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- *          enabled.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
-{
-       if (access_ok(VERIFY_READ, from, n))
-               n = __copy_from_user(to, from, n);
-       else
-               memset(to, 0, n);
-       return n;
-}
-EXPORT_SYMBOL(_copy_from_user);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9f72ca3b2669..17c55a536fdd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long 
error_code,
                printk(KERN_CONT "paging request");
 
        printk(KERN_CONT " at %p\n", (void *) address);
-       printk(KERN_ALERT "IP:");
-       printk_address(regs->ip);
+       printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
 
        dump_pagetable(address);
 }
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index cd5173a2733f..8410e7d0a5b5 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
 /* Dump Instruction Pointer info */
 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
 {
-       pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
-       printk_address(regs->ip);
+       pr_info("UV: %4d %6d %-32.32s %pS",
+               cpu, current->pid, current->comm, (void *)regs->ip);
 }
 
 /*
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index ba70ff232917..1972565ab106 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -269,7 +269,8 @@ int main(int argc, char **argv)
                insns++;
        }
 
-       fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d 
errors (seed:0x%x)\n",
+       fprintf((errors) ? stderr : stdout,
+               "%s: %s: decoded and checked %d %s instructions with %d errors 
(seed:0x%x)\n",
                prog,
                (errors) ? "Failure" : "Success",
                insns,
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index 56f04db0c9c0..ecf31e0358c8 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -167,7 +167,7 @@ int main(int argc, char **argv)
                fprintf(stderr, "Warning: decoded and checked %d"
                        " instructions with %d warnings\n", insns, warnings);
        else
-               fprintf(stderr, "Succeed: decoded and checked %d"
+               fprintf(stdout, "Success: decoded and checked %d"
                        " instructions\n", insns);
        return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f9ed84..17a5a8253294 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -990,13 +990,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
        {
-               .procname       = "kstack_depth_to_print",
-               .data           = &kstack_depth_to_print,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
                .procname       = "io_delay_type",
                .data           = &io_delay_type,
                .maxlen         = sizeof(int),
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6de9440e3ae2..61b0988bba8c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6399,8 +6399,8 @@ unsigned long free_reserved_area(void *start, void *end, 
int poison, char *s)
        }
 
        if (pages && s)
-               pr_info("Freeing %s memory: %ldK (%p - %p)\n",
-                       s, pages << (PAGE_SHIFT - 10), start, end);
+               pr_info("Freeing %s memory: %ldK\n",
+                       s, pages << (PAGE_SHIFT - 10));
 
        return pages;
 }
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
index c332684e1b5a..5206d99ddeb8 100755
--- a/scripts/decode_stacktrace.sh
+++ b/scripts/decode_stacktrace.sh
@@ -139,7 +139,8 @@ handle_line() {
 
 while read line; do
        # Let's see if we have an address in the line
-       if [[ $line =~ \[\<([^]]+)\>\]  ]]; then
+       if [[ $line =~ \[\<([^]]+)\>\] ]] ||
+          [[ $line =~ [^+\ ]+\+0x[0-9a-f]+/0x[0-9a-f]+ ]]; then
                # Translate address to line numbers
                handle_line "$line"
        # Is it a code line?
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 450b33257339..29df825d375c 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -105,9 +105,18 @@ __faddr2line() {
        # In rare cases there might be duplicates.
        while read symbol; do
                local fields=($symbol)
-               local sym_base=0x${fields[1]}
-               local sym_size=${fields[2]}
-               local sym_type=${fields[3]}
+               local sym_base=0x${fields[0]}
+               local sym_type=${fields[1]}
+               local sym_end=0x${fields[3]}
+
+               # calculate the size
+               local sym_size=$(($sym_end - $sym_base))
+               if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then
+                       warn "bad symbol size: base: $sym_base end: $sym_end"
+                       DONE=1
+                       return
+               fi
+               sym_size=0x$(printf %x $sym_size)
 
                # calculate the address
                local addr=$(($sym_base + $offset))
@@ -116,26 +125,26 @@ __faddr2line() {
                        DONE=1
                        return
                fi
-               local hexaddr=0x$(printf %x $addr)
+               addr=0x$(printf %x $addr)
 
                # weed out non-function symbols
-               if [[ $sym_type != "FUNC" ]]; then
+               if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to 
non-function symbol"
+                               echo "skipping $func address at $addr due to 
non-function symbol of type '$sym_type'"
                        continue
                fi
 
                # if the user provided a size, make sure it matches the 
symbol's size
                if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to 
size mismatch ($size != $sym_size)"
+                               echo "skipping $func address at $addr due to 
size mismatch ($size != $sym_size)"
                        continue;
                fi
 
                # make sure the provided offset is within the symbol's range
                if [[ $offset -gt $sym_size ]]; then
                        [[ $print_warnings = 1 ]] &&
-                               echo "skipping $func address at $hexaddr due to 
size mismatch ($offset > $sym_size)"
+                               echo "skipping $func address at $addr due to 
size mismatch ($offset > $sym_size)"
                        continue
                fi
 
@@ -143,12 +152,12 @@ __faddr2line() {
                [[ $FIRST = 0 ]] && echo
                FIRST=0
 
-               local hexsize=0x$(printf %x $sym_size)
-               echo "$func+$offset/$hexsize:"
-               addr2line -fpie $objfile $hexaddr | sed "s; 
$dir_prefix\(\./\)*; ;"
+               # pass real address to addr2line
+               echo "$func+$offset/$sym_size:"
+               addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;"
                DONE=1
 
-       done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+       done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; 
start=$1; next } found == 1 { found=0; print line, $1 }')
 }
 
 [[ $# -lt 2 ]] && usage
diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index a89f80a5b711..8c1cb423cfe6 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,7 +6,7 @@ include ../lib.mk
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
ptrace_syscall test_mremap_vdso \
                        check_initial_reg_state sigreturn ldt_gdt iopl \
-                       protection_keys
+                       protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
diff --git a/tools/testing/selftests/x86/test_vdso.c 
b/tools/testing/selftests/x86/test_vdso.c
new file mode 100644
index 000000000000..65d7a2bf7e14
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -0,0 +1,123 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+int nerrs = 0;
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+void fill_function_pointers()
+{
+       void *vdso = dlopen("linux-vdso.so.1",
+                           RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-gate.so.1",
+                             RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso) {
+               printf("[WARN]\tfailed to find vDSO\n");
+               return;
+       }
+
+       vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+       if (!vdso_getcpu)
+               printf("Warning: failed to find getcpu in vDSO\n");
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+                      void* cache)
+{
+       return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static void test_getcpu(void)
+{
+       printf("[RUN]\tTesting getcpu...\n");
+
+       for (int cpu = 0; ; cpu++) {
+               cpu_set_t cpuset;
+               CPU_ZERO(&cpuset);
+               CPU_SET(cpu, &cpuset);
+               if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+                       return;
+
+               unsigned cpu_sys, cpu_vdso, cpu_vsys,
+                       node_sys, node_vdso, node_vsys;
+               long ret_sys, ret_vdso = 1, ret_vsys = 1;
+               unsigned node;
+
+               ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+               if (vdso_getcpu)
+                       ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+               if (vgetcpu)
+                       ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+               if (!ret_sys)
+                       node = node_sys;
+               else if (!ret_vdso)
+                       node = node_vdso;
+               else if (!ret_vsys)
+                       node = node_vsys;
+
+               bool ok = true;
+               if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+                       ok = false;
+               if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+                       ok = false;
+               if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+                       ok = false;
+
+               printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+               if (!ret_sys)
+                       printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+               if (!ret_vdso)
+                       printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+               if (!ret_vsys)
+                       printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+                              node_vsys);
+               printf("\n");
+
+               if (!ok)
+                       nerrs++;
+       }
+}
+
+int main(int argc, char **argv)
+{
+       fill_function_pointers();
+
+       test_getcpu();
+
+       return nerrs ? 1 : 0;
+}

Reply via email to