tree:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git WIP.x86/pti
head:   05bdddd3be604144c2f2dc65fcb6e53961834d11
commit: 03b772b5933ba040f9e805f7f82554e64d6344d8 [23/30] x86/speculation: 
Prepare for conditional IBPB in switch_mm()
config: i386-randconfig-x002-201847 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        git checkout 03b772b5933ba040f9e805f7f82554e64d6344d8
        # save the attached .config to linux build tree
        make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   arch/x86/mm/tlb.c: In function 'mm_mangle_tif_spec_ib':
>> arch/x86/mm/tlb.c:192:51: error: 'LAST_USR_MM_IBPB' undeclared (first use in 
>> this function); did you mean 'LAST_USER_MM_IBPB'?
     unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USR_MM_IBPB;
                                                      ^~~~~~~~~~~~~~~~
                                                      LAST_USER_MM_IBPB
   arch/x86/mm/tlb.c:192:51: note: each undeclared identifier is reported only 
once for each function it appears in
   In file included from include/linux/init.h:5:0,
                    from arch/x86/mm/tlb.c:1:
   arch/x86/mm/tlb.c: In function 'cond_ibpb':
   arch/x86/mm/tlb.c:243:51: error: 'LAST_USR_MM_IBPB' undeclared (first use in 
this function); did you mean 'LAST_USER_MM_IBPB'?
      if (next_mm != prev_mm && (next_mm | prev_mm) & LAST_USR_MM_IBPB)
                                                      ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> arch/x86/mm/tlb.c:243:3: note: in expansion of macro 'if'
      if (next_mm != prev_mm && (next_mm | prev_mm) & LAST_USR_MM_IBPB)
      ^~
   In file included from arch/x86/include/asm/preempt.h:6:0,
                    from include/linux/preempt.h:81,
                    from include/linux/spinlock.h:51,
                    from include/linux/mmzone.h:8,
                    from include/linux/gfp.h:6,
                    from include/linux/mm.h:10,
                    from arch/x86/mm/tlb.c:3:
   arch/x86/mm/tlb.c: In function 'initialize_tlbstate_and_flush':
   arch/x86/mm/tlb.c:491:49: error: 'LAST_USR_MM_IBPB' undeclared (first use in 
this function); did you mean 'LAST_USER_MM_IBPB'?
     this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USR_MM_IBPB);
                                                    ^
   arch/x86/include/asm/percpu.h:95:16: note: in definition of macro 
'percpu_to_op'
      pto_tmp__ = (val);   \
                   ^~~
   include/linux/percpu-defs.h:377:11: note: in expansion of macro 
'this_cpu_write_1'
      case 1: stem##1(variable, __VA_ARGS__);break;  \
              ^~~~
   include/linux/percpu-defs.h:508:34: note: in expansion of macro 
'__pcpu_size_call'
    #define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val)
                                     ^~~~~~~~~~~~~~~~
>> arch/x86/mm/tlb.c:491:2: note: in expansion of macro 'this_cpu_write'
     this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USR_MM_IBPB);
     ^~~~~~~~~~~~~~

vim +192 arch/x86/mm/tlb.c

   > 1  #include <linux/init.h>
     2  
     3  #include <linux/mm.h>
     4  #include <linux/spinlock.h>
     5  #include <linux/smp.h>
     6  #include <linux/interrupt.h>
     7  #include <linux/export.h>
     8  #include <linux/cpu.h>
     9  #include <linux/debugfs.h>
    10  
    11  #include <asm/tlbflush.h>
    12  #include <asm/mmu_context.h>
    13  #include <asm/nospec-branch.h>
    14  #include <asm/cache.h>
    15  #include <asm/apic.h>
    16  #include <asm/uv/uv.h>
    17  
    18  /*
    19   *      TLB flushing, formerly SMP-only
    20   *              c/o Linus Torvalds.
    21   *
    22   *      These mean you can really definitely utterly forget about
    23   *      writing to user space from interrupts. (Its not allowed anyway).
    24   *
    25   *      Optimizations Manfred Spraul <manf...@colorfullife.com>
    26   *
    27   *      More scalable flush, from Andi Kleen
    28   *
    29   *      Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
    30   */
    31  
    32  /*
    33   * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which 
is
    34   * stored in cpu_tlb_state.last_user_mm_ibpb.
    35   */
    36  #define LAST_USER_MM_IBPB       0x1UL
    37  
    38  /*
    39   * We get here when we do something requiring a TLB invalidation
    40   * but could not go invalidate all of the contexts.  We do the
    41   * necessary invalidation by clearing out the 'ctx_id' which
    42   * forces a TLB flush when the context is loaded.
    43   */
    44  static void clear_asid_other(void)
    45  {
    46          u16 asid;
    47  
    48          /*
    49           * This is only expected to be set if we have disabled
    50           * kernel _PAGE_GLOBAL pages.
    51           */
    52          if (!static_cpu_has(X86_FEATURE_PTI)) {
    53                  WARN_ON_ONCE(1);
    54                  return;
    55          }
    56  
    57          for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
    58                  /* Do not need to flush the current asid */
    59                  if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
    60                          continue;
    61                  /*
    62                   * Make sure the next time we go to switch to
    63                   * this asid, we do a flush:
    64                   */
    65                  this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
    66          }
    67          this_cpu_write(cpu_tlbstate.invalidate_other, false);
    68  }
    69  
    70  atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
    71  
    72  
    73  static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
    74                              u16 *new_asid, bool *need_flush)
    75  {
    76          u16 asid;
    77  
    78          if (!static_cpu_has(X86_FEATURE_PCID)) {
    79                  *new_asid = 0;
    80                  *need_flush = true;
    81                  return;
    82          }
    83  
    84          if (this_cpu_read(cpu_tlbstate.invalidate_other))
    85                  clear_asid_other();
    86  
    87          for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
    88                  if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
    89                      next->context.ctx_id)
    90                          continue;
    91  
    92                  *new_asid = asid;
    93                  *need_flush = 
(this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
    94                                 next_tlb_gen);
    95                  return;
    96          }
    97  
    98          /*
    99           * We don't currently own an ASID slot on this CPU.
   100           * Allocate a slot.
   101           */
   102          *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
   103          if (*new_asid >= TLB_NR_DYN_ASIDS) {
   104                  *new_asid = 0;
   105                  this_cpu_write(cpu_tlbstate.next_asid, 1);
   106          }
   107          *need_flush = true;
   108  }
   109  
   110  static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
   111  {
   112          unsigned long new_mm_cr3;
   113  
   114          if (need_flush) {
   115                  invalidate_user_asid(new_asid);
   116                  new_mm_cr3 = build_cr3(pgdir, new_asid);
   117          } else {
   118                  new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
   119          }
   120  
   121          /*
   122           * Caution: many callers of this function expect
   123           * that load_cr3() is serializing and orders TLB
   124           * fills with respect to the mm_cpumask writes.
   125           */
   126          write_cr3(new_mm_cr3);
   127  }
   128  
   129  void leave_mm(int cpu)
   130  {
   131          struct mm_struct *loaded_mm = 
this_cpu_read(cpu_tlbstate.loaded_mm);
   132  
   133          /*
   134           * It's plausible that we're in lazy TLB mode while our mm is 
init_mm.
   135           * If so, our callers still expect us to flush the TLB, but 
there
   136           * aren't any user TLB entries in init_mm to worry about.
   137           *
   138           * This needs to happen before any other sanity checks due to
   139           * intel_idle's shenanigans.
   140           */
   141          if (loaded_mm == &init_mm)
   142                  return;
   143  
   144          /* Warn if we're not lazy. */
   145          WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
   146  
   147          switch_mm(NULL, &init_mm, NULL);
   148  }
   149  EXPORT_SYMBOL_GPL(leave_mm);
   150  
   151  void switch_mm(struct mm_struct *prev, struct mm_struct *next,
   152                 struct task_struct *tsk)
   153  {
   154          unsigned long flags;
   155  
   156          local_irq_save(flags);
   157          switch_mm_irqs_off(prev, next, tsk);
   158          local_irq_restore(flags);
   159  }
   160  
   161  static void sync_current_stack_to_mm(struct mm_struct *mm)
   162  {
   163          unsigned long sp = current_stack_pointer;
   164          pgd_t *pgd = pgd_offset(mm, sp);
   165  
   166          if (pgtable_l5_enabled()) {
   167                  if (unlikely(pgd_none(*pgd))) {
   168                          pgd_t *pgd_ref = pgd_offset_k(sp);
   169  
   170                          set_pgd(pgd, *pgd_ref);
   171                  }
   172          } else {
   173                  /*
   174                   * "pgd" is faked.  The top level entries are "p4d"s, 
so sync
   175                   * the p4d.  This compiles to approximately the same 
code as
   176                   * the 5-level case.
   177                   */
   178                  p4d_t *p4d = p4d_offset(pgd, sp);
   179  
   180                  if (unlikely(p4d_none(*p4d))) {
   181                          pgd_t *pgd_ref = pgd_offset_k(sp);
   182                          p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
   183  
   184                          set_p4d(p4d, *p4d_ref);
   185                  }
   186          }
   187  }
   188  
   189  static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct 
*next)
   190  {
   191          unsigned long next_tif = task_thread_info(next)->flags;
 > 192          unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & 
 > LAST_USR_MM_IBPB;
   193  
   194          return (unsigned long)next->mm | ibpb;
   195  }
   196  
   197  static void cond_ibpb(struct task_struct *next)
   198  {
   199          if (!next || !next->mm)
   200                  return;
   201  
   202          if (static_branch_likely(&switch_mm_cond_ibpb)) {
   203                  unsigned long prev_mm, next_mm;
   204  
   205                  /*
   206                   * This is a bit more complex than the always mode 
because
   207                   * it has to handle two cases:
   208                   *
   209                   * 1) Switch from a user space task (potential attacker)
   210                   *    which has TIF_SPEC_IB set to a user space task
   211                   *    (potential victim) which has TIF_SPEC_IB not set.
   212                   *
   213                   * 2) Switch from a user space task (potential attacker)
   214                   *    which has TIF_SPEC_IB not set to a user space task
   215                   *    (potential victim) which has TIF_SPEC_IB set.
   216                   *
   217                   * This could be done by unconditionally issuing IBPB 
when
   218                   * a task which has TIF_SPEC_IB set is either scheduled 
in
   219                   * or out. Though that results in two flushes when:
   220                   *
   221                   * - the same user space task is scheduled out and later
   222                   *   scheduled in again and only a kernel thread ran in
   223                   *   between.
   224                   *
   225                   * - a user space task belonging to the same process is
   226                   *   scheduled in after a kernel thread ran in between
   227                   *
   228                   * - a user space task belonging to the same process is
   229                   *   scheduled in immediately.
   230                   *
   231                   * Optimize this with reasonably small overhead for the
   232                   * above cases. Mangle the TIF_SPEC_IB bit into the mm
   233                   * pointer of the incoming task which is stored in
   234                   * cpu_tlbstate.last_user_mm_ibpb for comparison.
   235                   */
   236                  next_mm = mm_mangle_tif_spec_ib(next);
   237                  prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
   238  
   239                  /*
   240                   * Issue IBPB only if the mm's are different and one or
   241                   * both have the IBPB bit set.
   242                   */
 > 243                  if (next_mm != prev_mm && (next_mm | prev_mm) & 
 > LAST_USR_MM_IBPB)
   244                          indirect_branch_prediction_barrier();
   245  
   246                  this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
   247          }
   248  
   249          if (static_branch_unlikely(&switch_mm_always_ibpb)) {
   250                  /*
   251                   * Only flush when switching to a user space task with a
   252                   * different context than the user space task which ran
   253                   * last on this CPU.
   254                   */
   255                  if (this_cpu_read(cpu_tlbstate.last_user_mm) != 
next->mm) {
   256                          indirect_branch_prediction_barrier();
   257                          this_cpu_write(cpu_tlbstate.last_user_mm, 
next->mm);
   258                  }
   259          }
   260  }
   261  

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Attachment: .config.gz
Description: application/gzip

Reply via email to