Re: [PATCH 4/5] LoongArch: Add paravirt interface for guest kernel

2024-01-03 Thread Juergen Gross

On 03.01.24 09:00, maobibo wrote:



On 2024/1/3 下午3:40, Jürgen Groß wrote:

On 03.01.24 08:16, Bibo Mao wrote:

The patch add paravirt interface for guest kernel, it checks whether
system runs on VM mode. If it is, it will detect hypervisor type. And
returns true it is KVM hypervisor, else return false. Currently only
KVM hypervisor is supported, so there is only hypervisor detection
for KVM type.


I guess you are talking of pv_guest_init() here? Or do you mean
kvm_para_available()?

yes, it is pv_guest_init. It will be better if all hypervisor detection
is called in function pv_guest_init. Currently there is only kvm hypervisor, 
kvm_para_available is hard-coded in pv_guest_init here.


I think this is no problem as long as there are not more hypervisors
supported.



I can split file paravirt.c into paravirt.c and kvm.c, paravirt.c is used for 
hypervisor detection, and move code relative with pv_ipi into kvm.c


I wouldn't do that right now.

Just be a little bit more specific in the commit message (use the related
function name instead of "it").


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH v6 5/5] x86/paravirt: remove no longer needed paravirt patching code

2023-12-09 Thread Juergen Gross
Now that paravirt is using the alternatives patching infrastructure,
remove the paravirt patching code.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
 arch/x86/include/asm/paravirt.h   | 13 --
 arch/x86/include/asm/paravirt_types.h | 38 ---
 arch/x86/include/asm/text-patching.h  | 12 -
 arch/x86/kernel/alternative.c | 67 +--
 arch/x86/kernel/paravirt.c| 30 
 arch/x86/kernel/vmlinux.lds.S | 13 --
 arch/x86/tools/relocs.c   |  2 +-
 7 files changed, 3 insertions(+), 172 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 973c2ac2d25c..8bcf7584e7dd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -725,23 +725,10 @@ void native_pv_lock_init(void) __init;
 
 #else  /* __ASSEMBLY__ */
 
-#define _PVSITE(ptype, ops)\
-771:;  \
-   ops;\
-772:;  \
-   .pushsection .parainstructions,"a"; \
-.long 771b-.;  \
-.byte ptype;   \
-.byte 772b-771b;   \
-   .popsection
-
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT_XXL
 #ifdef CONFIG_DEBUG_ENTRY
 
-#define PARA_PATCH(off)((off) / 8)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops)
 #define PARA_INDIRECT(addr)*addr(%rip)
 
 .macro PARA_IRQ_save_fl
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 9cad536fc08d..d8e85d2cf8d5 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -2,15 +2,6 @@
 #ifndef _ASM_X86_PARAVIRT_TYPES_H
 #define _ASM_X86_PARAVIRT_TYPES_H
 
-#ifndef __ASSEMBLY__
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
-   s32 instr_offset;   /* original instructions */
-   u8 type;/* type of this instruction */
-   u8 len; /* length of original instruction */
-} __packed;
-#endif
-
 #ifdef CONFIG_PARAVIRT
 
 #ifndef __ASSEMBLY__
@@ -250,32 +241,6 @@ struct paravirt_patch_template {
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
 
-#define PARAVIRT_PATCH(x)  \
-   (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op)  \
-   [paravirt_typenum] "i" (PARAVIRT_PATCH(op)),\
-   [paravirt_opptr] "m" (pv_ops.op)
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type)   \
-   "771:\n\t" insn_string "\n" "772:\n"\
-   ".pushsection .parainstructions,\"a\"\n"\
-   "  .long 771b-.\n"  \
-   "  .byte " type "\n"\
-   "  .byte 772b-771b\n"   \
-   ".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string)  \
-   _paravirt_alt(insn_string, "%c[paravirt_typenum]")
-
-/* Simple instruction patching code. */
-#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-
-unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
 #define paravirt_ptr(op)   [paravirt_opptr] "m" (pv_ops.op)
 
 int paravirt_disable_iospace(void);
@@ -555,9 +520,6 @@ unsigned long pv_native_read_cr2(void);
 
 #define paravirt_nop   ((void *)nop_func)
 
-extern struct paravirt_patch_site __parainstructions[],
-   __parainstructions_end[];
-
 #endif /* __ASSEMBLY__ */
 
 #define ALT_NOT_XENALT_NOT(X86_FEATURE_XENPV)
diff --git a/arch/x86/include/asm/text-patching.h 
b/arch/x86/include/asm/text-patching.h
index 29832c338cdc..0b70653a98c1 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -6,18 +6,6 @@
 #include 
 #include 
 
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-   struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
 /*
  * Currently, the max observed size in the kernel code is
  * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
diff --git a/

[PATCH v6 4/5] x86/paravirt: switch mixed paravirt/alternative calls to alternative_2

2023-12-09 Thread Juergen Gross
Instead of stacking alternative and paravirt patching, use the new
ALT_FLAG_CALL flag to switch those mixed calls to pure alternative
handling.

This eliminates the need to be careful regarding the sequence of
alternative and paravirt patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V5:
- remove no longer needed extern declarations from alternative.c
  (Boris Petkov)
- add comment about ALTERNATIVE[_2]() macro usage (Boris Petkov)
- rebase to tip/master (Boris Petkov)
V6:
- fix SAVE_FLAGS macro (Boris Petkov)
---
 arch/x86/include/asm/alternative.h|  5 ++--
 arch/x86/include/asm/paravirt.h   | 12 
 arch/x86/include/asm/paravirt_types.h | 40 +++
 arch/x86/kernel/alternative.c |  1 -
 arch/x86/kernel/callthunks.c  | 17 ++--
 arch/x86/kernel/module.c  | 20 --
 6 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 472334eed6f3..fcd20c6dc7f9 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -89,6 +89,8 @@ struct alt_instr {
u8  replacementlen; /* length of new instruction */
 } __packed;
 
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
 /*
  * Debug flag that can be tested to see whether alternative
  * instructions were patched in already:
@@ -104,11 +106,10 @@ extern void apply_fineibt(s32 *start_retpoline, s32 
*end_retpoine,
  s32 *start_cfi, s32 *end_cfi);
 
 struct module;
-struct paravirt_patch_site;
 
 struct callthunk_sites {
s32 *call_start, *call_end;
-   struct paravirt_patch_site  *pv_start, *pv_end;
+   struct alt_instr*alt_start, *alt_end;
 };
 
 #ifdef CONFIG_CALL_THUNKS
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index f18bfa7f3070..973c2ac2d25c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -738,20 +738,20 @@ void native_pv_lock_init(void) __init;
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_DEBUG_ENTRY
 
 #define PARA_PATCH(off)((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops)
 #define PARA_INDIRECT(addr)*addr(%rip)
 
-#ifdef CONFIG_DEBUG_ENTRY
 .macro PARA_IRQ_save_fl
-   PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
- ANNOTATE_RETPOLINE_SAFE;
- call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+   ANNOTATE_RETPOLINE_SAFE;
+   call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
 .endm
 
-#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
-   ALT_NOT_XEN
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;",  \
+"ALT_CALL_INSTR;", ALT_CALL_ALWAYS,\
+"pushf; pop %rax;", ALT_NOT_XEN
 #endif
 #endif /* CONFIG_PARAVIRT_XXL */
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 166e9618158f..9cad536fc08d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -276,15 +276,11 @@ extern struct paravirt_patch_template pv_ops;
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
 unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
+#define paravirt_ptr(op)   [paravirt_opptr] "m" (pv_ops.op)
 
 int paravirt_disable_iospace(void);
 
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
+/* This generates an indirect call based on the operation type number. */
 #define PARAVIRT_CALL  \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
@@ -317,12 +313,6 @@ int paravirt_disable_iospace(void);
  * However, x86_64 also has to clobber all caller saved registers, which
  * unfortunately, are quite a bit (r8 - r11)
  *
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
  * Unfortunately there's no way to get gcc to generate the args setup
  * for the call, and then allow the call itself to be generated by an
  * inline asm.  Because of this, we must do the complete arg setup and
@@ -421,14 +411,27 @@ int paravirt_dis

[PATCH v6 2/5] x86/paravirt: move some functions and defines to alternative

2023-12-09 Thread Juergen Gross
As a preparation for replacing paravirt patching completely by
alternative patching, move some backend functions and #defines to
alternative code and header.

Signed-off-by: Juergen Gross 
---
V4:
- rename x86_nop() to nop_func() and x86_BUG() to BUG_func() (Boris
  Petkov)
---
 arch/x86/include/asm/alternative.h| 16 
 arch/x86/include/asm/paravirt.h   | 12 -
 arch/x86/include/asm/paravirt_types.h |  4 +--
 arch/x86/include/asm/qspinlock_paravirt.h |  4 +--
 arch/x86/kernel/alternative.c | 10 
 arch/x86/kernel/kvm.c |  4 +--
 arch/x86/kernel/paravirt.c| 30 +++
 arch/x86/xen/irq.c|  2 +-
 8 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 65f79092c9d9..ce788ab4e77c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -330,6 +330,22 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
  */
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 
+/* Macro for creating assembler functions avoiding any C magic. */
+#define DEFINE_ASM_FUNC(func, instr, sec)  \
+   asm (".pushsection " #sec ", \"ax\"\n"  \
+".global " #func "\n\t"\
+".type " #func ", @function\n\t"   \
+ASM_FUNC_ALIGN "\n"\
+#func ":\n\t"  \
+ASM_ENDBR  \
+instr "\n\t"   \
+ASM_RET\
+".size " #func ", . - " #func "\n\t"   \
+".popsection")
+
+void BUG_func(void);
+void nop_func(void);
+
 #else /* __ASSEMBLY__ */
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa76ac7c806c..f18bfa7f3070 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -720,18 +720,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 #undef PVOP_VCALL4
 #undef PVOP_CALL4
 
-#define DEFINE_PARAVIRT_ASM(func, instr, sec)  \
-   asm (".pushsection " #sec ", \"ax\"\n"  \
-".global " #func "\n\t"\
-".type " #func ", @function\n\t"   \
-ASM_FUNC_ALIGN "\n"\
-#func ":\n\t"  \
-ASM_ENDBR  \
-instr "\n\t"   \
-ASM_RET\
-".size " #func ", . - " #func "\n\t"   \
-".popsection")
-
 extern void default_banner(void);
 void native_pv_lock_init(void) __init;
 
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 483e19e5ca7a..166e9618158f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -540,8 +540,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),\
 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 
-void _paravirt_nop(void);
-void paravirt_BUG(void);
 unsigned long paravirt_ret0(void);
 #ifdef CONFIG_PARAVIRT_XXL
 u64 _paravirt_ident_64(u64);
@@ -551,7 +549,7 @@ void pv_native_irq_enable(void);
 unsigned long pv_native_read_cr2(void);
 #endif
 
-#define paravirt_nop   ((void *)_paravirt_nop)
+#define paravirt_nop   ((void *)nop_func)
 
 extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h 
b/arch/x86/include/asm/qspinlock_paravirt.h
index 85b6e3609cb9..ef9697f20129 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -56,8 +56,8 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, 
".spinlock.text");
"pop%rdx\n\t"   \
FRAME_END
 
-DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock,
-   PV_UNLOCK_ASM, .spinlock.text);
+DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock,
+   PV_UNLOCK_ASM, .spinlock.text);
 
 #else /* CONFIG_64BIT */
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index be35c8ccf826..ca25dd280b8c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -385,6 +385,16 @@ apply_reloc

[PATCH v6 1/5] x86/paravirt: introduce ALT_NOT_XEN

2023-12-09 Thread Juergen Gross
Introduce the macro ALT_NOT_XEN as a short form of
ALT_NOT(X86_FEATURE_XENPV).

Suggested-by: Peter Zijlstra (Intel) 
Signed-off-by: Juergen Gross 
---
V3:
- split off from next patch
V5:
- move patch to the start of the series (Boris Petkov)
---
 arch/x86/include/asm/paravirt.h   | 42 ---
 arch/x86/include/asm/paravirt_types.h |  3 ++
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 693c61dbdd9c..aa76ac7c806c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x)
 static __always_inline unsigned long read_cr2(void)
 {
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
-   "mov %%cr2, %%rax;",
-   ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%cr2, %%rax;", ALT_NOT_XEN);
 }
 
 static __always_inline void write_cr2(unsigned long x)
@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x)
 static inline unsigned long __read_cr3(void)
 {
return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
- "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr3, %%rax;", ALT_NOT_XEN);
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_ALT_VCALL1(mmu.write_cr3, x,
-   "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
+   PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void);
 
 static __always_inline void wbinvd(void)
 {
-   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 static inline pte_t __pte(pteval_t val)
 {
return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 static inline pmd_t __pmd(pmdval_t val)
 {
return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pmdval_t pmd_val(pmd_t pmd)
 {
return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val)
pudval_t ret;
 
ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+  "mov %%rdi, %%rax", ALT_NOT_XEN);
 
return (pud_t) { ret };
 }
@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val)
 static inline pudval_t pud_val(pud_t pud)
 {
return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void pud_clear(pud_t *pudp)
@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_

[PATCH v6 0/5] 86/paravirt: Get rid of paravirt patching

2023-12-09 Thread Juergen Gross
This is a small series getting rid of paravirt patching by switching
completely to alternative patching for the same functionality.

The basic idea is to add the capability to switch from indirect to
direct calls via a special alternative patching option.

This removes _some_ of the paravirt macro maze, but most of it needs
to stay due to the need of hiding the call instructions from the
compiler in order to avoid needless register save/restore.

What is going away is the nasty stacking of alternative and paravirt
patching and (of course) the special .parainstructions linker section.

I have tested the series on bare metal and as Xen PV domain to still
work.

Note that objtool might need some changes to cope with the new
indirect call patching mechanism. Additionally some paravirt handling
can probably be removed from it.

Changes in V6:
- addressed Boris' comments

Changes in V5:
- addressed Boris' comments
- rebased on top of the tip/master branch

Changes in V4:
- addressed Boris' comments in patch 1
- fixed bugs found by kernel test robot (patch 2)

Changes in V3:
- split v2 patch 3 into 2 patches as requested by Peter and Ingo

Changes in V2:
- split last patch into 2
- rebase of patch 2 as suggested by Peter
- addressed Peter's comments for patch 3

Juergen Gross (5):
  x86/paravirt: introduce ALT_NOT_XEN
  x86/paravirt: move some functions and defines to alternative
  x86/alternative: add indirect call patching
  x86/paravirt: switch mixed paravirt/alternative calls to alternative_2
  x86/paravirt: remove no longer needed paravirt patching code

 arch/x86/include/asm/alternative.h|  30 -
 arch/x86/include/asm/paravirt.h   |  77 
 arch/x86/include/asm/paravirt_types.h |  85 +-
 arch/x86/include/asm/qspinlock_paravirt.h |   4 +-
 arch/x86/include/asm/text-patching.h  |  12 --
 arch/x86/kernel/alternative.c | 136 +++---
 arch/x86/kernel/callthunks.c  |  17 ++-
 arch/x86/kernel/kvm.c |   4 +-
 arch/x86/kernel/module.c  |  20 +---
 arch/x86/kernel/paravirt.c|  54 +
 arch/x86/kernel/vmlinux.lds.S |  13 ---
 arch/x86/tools/relocs.c   |   2 +-
 arch/x86/xen/irq.c|   2 +-
 13 files changed, 169 insertions(+), 287 deletions(-)

-- 
2.35.3




Re: [PATCH v5 4/5] x86/paravirt: switch mixed paravirt/alternative calls to alternative_2

2023-12-08 Thread Juergen Gross

On 08.12.23 13:57, Borislav Petkov wrote:

On Fri, Dec 08, 2023 at 12:53:47PM +0100, Juergen Gross wrote:

Took me a while to find it. Patch 5 was repairing the issue again


Can't have that. Any patch in any set must build and boot successfully
for bisection reasons.



Of course.

The problem will be fixed in V6.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH v5 4/5] x86/paravirt: switch mixed paravirt/alternative calls to alternative_2

2023-12-08 Thread Juergen Gross

On 06.12.23 12:08, Borislav Petkov wrote:

On Wed, Nov 29, 2023 at 02:33:31PM +0100, Juergen Gross wrote:

Instead of stacking alternative and paravirt patching, use the new
ALT_FLAG_CALL flag to switch those mixed calls to pure alternative
handling.

This eliminates the need to be careful regarding the sequence of
alternative and paravirt patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V5:
- remove no longer needed extern declarations from alternative.c
   (Boris Petkov)
- add comment about ALTERNATIVE[_2]() macro usage (Boris Petkov)
- rebase to tip/master (Boris Petkov)
---
  arch/x86/include/asm/alternative.h|  5 ++--
  arch/x86/include/asm/paravirt.h   |  9 --
  arch/x86/include/asm/paravirt_types.h | 40 +++
  arch/x86/kernel/alternative.c |  1 -
  arch/x86/kernel/callthunks.c  | 17 ++--
  arch/x86/kernel/module.c  | 20 --
  6 files changed, 44 insertions(+), 48 deletions(-)


After this one: (.config is attached).

...

Ouch.

Took me a while to find it. Patch 5 was repairing the issue again, and I tested
more thoroughly only with all 5 patches applied.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH v5 1/5] x86/paravirt: introduce ALT_NOT_XEN

2023-11-29 Thread Juergen Gross
Introduce the macro ALT_NOT_XEN as a short form of
ALT_NOT(X86_FEATURE_XENPV).

Suggested-by: Peter Zijlstra (Intel) 
Signed-off-by: Juergen Gross 
---
V3:
- split off from next patch
V5:
- move patch to the start of the series (Boris Petkov)
---
 arch/x86/include/asm/paravirt.h   | 42 ---
 arch/x86/include/asm/paravirt_types.h |  3 ++
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 693c61dbdd9c..aa76ac7c806c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x)
 static __always_inline unsigned long read_cr2(void)
 {
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
-   "mov %%cr2, %%rax;",
-   ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%cr2, %%rax;", ALT_NOT_XEN);
 }
 
 static __always_inline void write_cr2(unsigned long x)
@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x)
 static inline unsigned long __read_cr3(void)
 {
return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
- "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr3, %%rax;", ALT_NOT_XEN);
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_ALT_VCALL1(mmu.write_cr3, x,
-   "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
+   PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void);
 
 static __always_inline void wbinvd(void)
 {
-   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 static inline pte_t __pte(pteval_t val)
 {
return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 static inline pmd_t __pmd(pmdval_t val)
 {
return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
 }
 
 static inline pmdval_t pmd_val(pmd_t pmd)
 {
return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val)
pudval_t ret;
 
ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+  "mov %%rdi, %%rax", ALT_NOT_XEN);
 
return (pud_t) { ret };
 }
@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val)
 static inline pudval_t pud_val(pud_t pud)
 {
return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
-   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+   "mov %%rdi, %%rax", ALT_NOT_XEN);
 }
 
 static inline void pud_clear(pud_t *pudp)
@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_

Re: [PATCH v4 4/5] x86/paravirt: switch mixed paravirt/alternative calls to alternative_2

2023-11-29 Thread Juergen Gross

On 21.11.23 19:45, Borislav Petkov wrote:

On Mon, Oct 30, 2023 at 03:25:07PM +0100, Juergen Gross wrote:

Instead of stacking alternative and paravirt patching, use the new
ALT_FLAG_CALL flag to switch those mixed calls to pure alternative
handling.

This eliminates the need to be careful regarding the sequence of
alternative and paravirt patching.

For call depth tracking callthunks_setup() needs to be adapted to patch
calls at alternative patching sites instead of paravirt calls.


Why is this important so that it is called out explicitly in the commit
message? Is callthunks_setup() special somehow?


IMHO it is a non-obvious change, so I spelled it out explicitly. I can drop
that paragraph if you want.





Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
  arch/x86/include/asm/alternative.h|  5 +++--
  arch/x86/include/asm/paravirt.h   |  9 ++---
  arch/x86/include/asm/paravirt_types.h | 26 +-
  arch/x86/kernel/callthunks.c  | 17 -
  arch/x86/kernel/module.c  | 20 +---
  5 files changed, 31 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 2a74a94bd569..07b17bc615a0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -89,6 +89,8 @@ struct alt_instr {
u8  replacementlen; /* length of new instruction */
  } __packed;
  
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];

+


arch/x86/include/asm/alternative.h:92:extern struct alt_instr 
__alt_instructions[], __alt_instructions_end[];
arch/x86/kernel/alternative.c:163:extern struct alt_instr __alt_instructions[], 
__alt_instructions_end[];

Zap the declaration from the .c file pls.


Okay.




  /*
   * Debug flag that can be tested to see whether alternative
   * instructions were patched in already:
@@ -104,11 +106,10 @@ extern void apply_fineibt(s32 *start_retpoline, s32 
*end_retpoine,
  s32 *start_cfi, s32 *end_cfi);
  
  struct module;

-struct paravirt_patch_site;
  
  struct callthunk_sites {

s32 *call_start, *call_end;
-   struct paravirt_patch_site  *pv_start, *pv_end;
+   struct alt_instr*alt_start, *alt_end;
  };
  
  #ifdef CONFIG_CALL_THUNKS

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 3749311d51c3..9c6c5cfa9fe2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -740,20 +740,23 @@ void native_pv_lock_init(void) __init;
  
  #ifdef CONFIG_X86_64

  #ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_DEBUG_ENTRY
  
  #define PARA_PATCH(off)		((off) / 8)

  #define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
  #define PARA_INDIRECT(addr)   *addr(%rip)
  
-#ifdef CONFIG_DEBUG_ENTRY

  .macro PARA_IRQ_save_fl
PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
  ANNOTATE_RETPOLINE_SAFE;
  call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+   ANNOTATE_RETPOLINE_SAFE;
+   call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
  .endm
  
-#define SAVE_FLAGS	ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \

-   ALT_NOT_XEN
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;",\
+ALT_CALL_INSTR, ALT_CALL_ALWAYS,   \
+"pushf; pop %rax;", ALT_NOT_XEN


How is that supposed to work?

At build time for a PARAVIRT_XXL build it'll have that PARA_IRQ_save_fl
macro in there which issues a .parainstructions section and an indirect
call to

call *pv_ops+240(%rip);

then it'll always patch in "call BUG_func" due to X86_FEATURE_ALWAYS.

I guess this is your way of saying "this should always be patched, one
way or the other, depending on X86_FEATURE_XENPV, and this is a way to
catch unpatched locations...

Then on a pv build which doesn't set X86_FEATURE_XENPV during boot,
it'll replace the "call BUG_func" thing with the pushf;pop.

And if it does set X86_FEATURE_XENPV, it'll patch in the direct call to
 /me greps tree ... pv_native_save_fl I guess.

If anything, how those ALT_CALL_ALWAYS things are supposed to work,
should be documented there, over the macro definition and what the
intent is.


I can do that, but OTOH the existing comments are quite clear:

 * If CPU has feature2, newinstr2 is used.
 * Otherwise, if CPU has feature1, newinstr1 is used.
 * Otherwise, oldinstr is used.



Because otherwise we'll have to swap in the whole machinery back into
our L1s each time we need to touch it.

And btw, this whole patching stuff becomes insanely non-trivial slowly.


Not worse than today. It just replaces the paravirt patching with an
alternative patching.



:-\


diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/ca

Re: [PATCH v4 3/5] x86/paravirt: introduce ALT_NOT_XEN

2023-11-14 Thread Juergen Gross

On 14.11.23 16:09, Borislav Petkov wrote:

On Mon, Oct 30, 2023 at 03:25:06PM +0100, Juergen Gross wrote:

Introduce the macro ALT_NOT_XEN as a short form of
ALT_NOT(X86_FEATURE_XENPV).


Not crazy about adding yet another macro indirection - at least with the
X86_FEATURE_ it is clear what this is. But ok, whatever.

Anyway, this patch can be the first one in the series.



Okay.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH 2/3] x86/xen: move paravirt lazy code

2023-09-13 Thread Juergen Gross

On 13.09.23 15:26, Steven Rostedt wrote:

On Wed, 13 Sep 2023 13:38:27 +0200
Juergen Gross  wrote:


diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index 44a3f565264d..0577f0cdd231 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -6,26 +6,26 @@
  #define _TRACE_XEN_H
  
  #include 

-#include 
+#include 
  #include 
  
  struct multicall_entry;
  
  /* Multicalls */

  DECLARE_EVENT_CLASS(xen_mc__batch,
-   TP_PROTO(enum paravirt_lazy_mode mode),
+   TP_PROTO(enum xen_lazy_mode mode),
TP_ARGS(mode),
TP_STRUCT__entry(
-   __field(enum paravirt_lazy_mode, mode)
+   __field(enum xen_lazy_mode, mode)
),
TP_fast_assign(__entry->mode = mode),
TP_printk("start batch LAZY_%s",
- (__entry->mode == PARAVIRT_LAZY_MMU) ? "MMU" :
- (__entry->mode == PARAVIRT_LAZY_CPU) ? "CPU" : "NONE")
+ (__entry->mode == XEN_LAZY_MMU) ? "MMU" :
+ (__entry->mode == XEN_LAZY_CPU) ? "CPU" : "NONE")


There's helper functions that make the above easier to implement as well as
exports the symbols so that user space can parse this better:

TRACE_DEFINE_ENUM(XEN_LAZY_NONE);
TRACE_DEFINE_ENUM(XEN_LAZY_MMU);
TRACE_DEFINE_ENUM(XEN_LAZY_CPU);

[..]

TP_printk("start batch LAZY_%s",
  __print_symbolic(mode,
   { XEN_LAZY_NONE, "NONE" },
   { XEN_LAZY_MMU,  "MMU   },
   { XEN_LAZY_CPU,  "CPU"  }))

Then user space parsers that read the raw data can convert these events
into something humans can read.


Thanks. I'll add that to another patch I'm just writing for cleaning up
include/trace/events/xen.h (some defined trace events are no longer used).


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


[PATCH 2/3] x86/xen: move paravirt lazy code

2023-09-13 Thread Juergen Gross
Only Xen is using the paravirt lazy mode code, so it can be moved to
Xen specific sources.

This allows to make some of the functions static or to merge them into
their only call sites.

While at it do a rename from "paravirt" to "xen" for all moved
specifiers.

No functional change.

Signed-off-by: Juergen Gross 
---
 arch/x86/include/asm/paravirt_types.h | 15 --
 arch/x86/include/asm/xen/hypervisor.h | 26 +++
 arch/x86/kernel/paravirt.c| 67 ---
 arch/x86/xen/enlighten_pv.c   | 39 +---
 arch/x86/xen/mmu_pv.c | 55 ++
 arch/x86/xen/multicalls.h |  4 +-
 include/trace/events/xen.h| 12 ++---
 7 files changed, 102 insertions(+), 116 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 4acbcc29..772d03487520 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -9,13 +9,6 @@ struct paravirt_patch_site {
u8 type;/* type of this instruction */
u8 len; /* length of original instruction */
 };
-
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
-   PARAVIRT_LAZY_NONE,
-   PARAVIRT_LAZY_MMU,
-   PARAVIRT_LAZY_CPU,
-};
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -549,14 +542,6 @@ int paravirt_disable_iospace(void);
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),\
 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
-
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-void paravirt_flush_lazy_mmu(void);
-
 void _paravirt_nop(void);
 void paravirt_BUG(void);
 unsigned long paravirt_ret0(void);
diff --git a/arch/x86/include/asm/xen/hypervisor.h 
b/arch/x86/include/asm/xen/hypervisor.h
index 5fc35f889cd1..ed05ce3df5c7 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -36,6 +36,7 @@
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
+#include 
 #include 
 
 #define XEN_SIGNATURE "XenVMMXenVMM"
@@ -63,4 +64,29 @@ void __init xen_pvh_init(struct boot_params *boot_params);
 void __init mem_map_via_hcall(struct boot_params *boot_params_p);
 #endif
 
+/* Lazy mode for batching updates / context switch */
+enum xen_lazy_mode {
+   XEN_LAZY_NONE,
+   XEN_LAZY_MMU,
+   XEN_LAZY_CPU,
+};
+
+DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode);
+
+static inline void enter_lazy(enum xen_lazy_mode mode)
+{
+   BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE);
+
+   this_cpu_write(xen_lazy_mode, mode);
+}
+
+static inline void leave_lazy(enum xen_lazy_mode mode)
+{
+   BUG_ON(this_cpu_read(xen_lazy_mode) != mode);
+
+   this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
+}
+
+enum xen_lazy_mode xen_get_lazy_mode(void);
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 975f98d5eee5..97f1436c1a20 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -143,66 +143,7 @@ int paravirt_disable_iospace(void)
return request_resource(_resource, _ioports);
 }
 
-static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = 
PARAVIRT_LAZY_NONE;
-
-static inline void enter_lazy(enum paravirt_lazy_mode mode)
-{
-   BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
-
-   this_cpu_write(paravirt_lazy_mode, mode);
-}
-
-static void leave_lazy(enum paravirt_lazy_mode mode)
-{
-   BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
-
-   this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
-}
-
-void paravirt_enter_lazy_mmu(void)
-{
-   enter_lazy(PARAVIRT_LAZY_MMU);
-}
-
-void paravirt_leave_lazy_mmu(void)
-{
-   leave_lazy(PARAVIRT_LAZY_MMU);
-}
-
-void paravirt_flush_lazy_mmu(void)
-{
-   preempt_disable();
-
-   if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-   arch_leave_lazy_mmu_mode();
-   arch_enter_lazy_mmu_mode();
-   }
-
-   preempt_enable();
-}
-
 #ifdef CONFIG_PARAVIRT_XXL
-void paravirt_start_context_switch(struct task_struct *prev)
-{
-   BUG_ON(preemptible());
-
-   if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
-   arch_leave_lazy_mmu_mode();
-   set_ti_thread_flag(task_thread_info(prev), 
TIF_LAZY_MMU_UPDATES);
-   }
-   enter_lazy(PARAVIRT_LAZY_CPU);
-}
-
-void paravirt_end_context_switch(struct task_struct *next)
-{
-   BUG_ON(preemptible());
-
-   leave_lazy(PARAVIRT_LAZY_CPU);
-
-   if (test_and_clear_ti_thread_flag(task_thread_info(next), 
TIF_LAZY_MMU_UPDATES))
- 

[PATCH 0/3] xen: cleanup and fix lazy mode handling

2023-09-13 Thread Juergen Gross
This small series is cleaning up Xen lazy mode handling by removing
unused stuff and moving purely Xen-specific code away from general
kernel code.

The last patch is fixing a regression which was introduced in the
6.6 merge window.

Juergen Gross (3):
  arm/xen: remove lazy mode related definitions
  x86/xen: move paravirt lazy code
  x86/xen: allow nesting of same lazy mode

 arch/x86/include/asm/paravirt_types.h | 15 --
 arch/x86/include/asm/xen/hypervisor.h | 37 +++
 arch/x86/kernel/paravirt.c| 67 ---
 arch/x86/xen/enlighten_pv.c   | 40 +---
 arch/x86/xen/mmu_pv.c | 55 ++
 arch/x86/xen/multicalls.h |  4 +-
 include/trace/events/xen.h| 12 ++---
 include/xen/arm/hypervisor.h  | 12 -
 8 files changed, 114 insertions(+), 128 deletions(-)

-- 
2.35.3



Re: [PATCH net v3] net: sched: fix packet stuck problem for lockless qdisc

2021-04-13 Thread Juergen Gross

On 13.04.21 11:03, Yunsheng Lin wrote:

On 2021/4/13 16:33, Hillf Danton wrote:

On Tue, 13 Apr 2021 15:57:29  Yunsheng Lin wrote:

On 2021/4/13 15:12, Hillf Danton wrote:

On Tue, 13 Apr 2021 11:34:27 Yunsheng Lin wrote:

On 2021/4/13 11:26, Hillf Danton wrote:

On Tue, 13 Apr 2021 10:56:42 Yunsheng Lin wrote:

On 2021/4/13 10:21, Hillf Danton wrote:

On Mon, 12 Apr 2021 20:00:43  Yunsheng Lin wrote:


Yes, the below patch seems to fix the data race described in
the commit log.
Then what is the difference between my patch and your patch below:)


Hehe, this is one of the tough questions over a bounch of weeks.

If a seqcount can detect the race between skb enqueue and dequeue then we
cant see any excuse for not rolling back to the point without NOLOCK.


I am not sure I understood what you meant above.

As my understanding, the below patch is essentially the same as
your previous patch, the only difference I see is it uses qdisc->pad
instead of __QDISC_STATE_NEED_RESCHEDULE.

So instead of proposing another patch, it would be better if you
comment on my patch, and make improvement upon that.


Happy to do that after you show how it helps revert NOLOCK.


Actually I am not going to revert NOLOCK, but add optimization
to it if the patch fixes the packet stuck problem.


Fix is not optimization, right?


For this patch, it is a fix.
In case you missed it, I do have a couple of idea to optimize the
lockless qdisc:

1. RFC patch to add lockless qdisc bypass optimization:

https://patchwork.kernel.org/project/netdevbpf/patch/1616404156-11772-1-git-send-email-linyunsh...@huawei.com/

2. implement lockless enqueuing for lockless qdisc using the idea
   from Jason and Toke. And it has a noticable proformance increase with
   1-4 threads running using the below prototype based on ptr_ring.

static inline int __ptr_ring_multi_produce(struct ptr_ring *r, void *ptr)
{

int producer, next_producer;


do {
producer = READ_ONCE(r->producer);
if (unlikely(!r->size) || r->queue[producer])
return -ENOSPC;
next_producer = producer + 1;
if (unlikely(next_producer >= r->size))
next_producer = 0;
} while(cmpxchg_relaxed(>producer, producer, next_producer) != 
producer);

/* Make sure the pointer we are storing points to a valid data. */
/* Pairs with the dependency ordering in __ptr_ring_consume. */
smp_wmb();

WRITE_ONCE(r->queue[producer], ptr);
return 0;
}

3. Maybe it is possible to remove the netif_tx_lock for lockless qdisc
   too, because dev_hard_start_xmit is also in the protection of
   qdisc_run_begin()/qdisc_run_end()(if there is only one qdisc using
   a netdev queue, which is true for pfifo_fast, I believe).

4. Remove the qdisc->running seqcount operation for lockless qdisc, which
   is mainly used to do heuristic locking on q->busylock for locked qdisc.



Sounds good. They can stand two months, cant they?



Is there any reason why you want to revert it?


I think you know Jiri's plan and it would be nice to wait a couple of
months for it to complete.


I am not sure I am aware of Jiri's plan.
Is there any link referring to the plan?


https://lore.kernel.org/lkml/eaff25bc-9b64-037e-b9bc-c06fc4a5a...@huawei.com/


I think there is some misunderstanding here.

As my understanding, Jiri and Juergen are from the same team(using
the suse.com mail server).


Yes, we are.


what Jiri said about "I am still planning to have Yunsheng Lin's
(CCing) fix [1] tested in the coming days." is that Juergen has
done the test and provide a "Tested-by" tag.


Correct. And I did this after Jiri asking me to do so.


So if this patch fixes the packet stuck problem, Jiri is ok with
NOLOCK qdisc too.


I think so, yes. Otherwise I don't see why he asked me to test the
patch. :-)


Or do I misunderstand it again? Perhaps Jiri and Juergen can help to
clarify this?


I hope I did. :-)


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 2/7] xen/gntdev,x86: Remove apply_to_page_range() use from module

2021-04-12 Thread Juergen Gross

On 12.04.21 10:26, Christoph Hellwig wrote:

On Mon, Apr 12, 2021 at 10:00:14AM +0200, Peter Zijlstra wrote:

Instead of relying on apply_to_page_range() being available to
modules, move its use into core kernel code and export it's
application.


This doesn't exactly look great, but at least it contains the damage..



NOTE: ideally we do: use_ptemod = !auto_translate_physmap &&
gnttab_map_avail_bits and remove this hack.


Given how much pain the !auto_translate_physmap case causes all over
the kernel I wonder what a realistic timeline might be for dropping
support for this case might be..


Think in the order of years.

It is basically the Xen PV guest support you are speaking of here, and
the planned replacement PVH especially for dom0 is still lacking some
functionality and it has performance issues.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH net v3] net: sched: fix packet stuck problem for lockless qdisc

2021-04-12 Thread Juergen Gross

On 12.04.21 03:04, Yunsheng Lin wrote:

On 2021/4/9 13:31, Juergen Gross wrote:

On 25.03.21 04:13, Yunsheng Lin wrote:

Lockless qdisc has below concurrent problem:
  cpu0 cpu1
   . .
q->enqueue .
   . .
qdisc_run_begin()  .
   . .
dequeue_skb()  .
   . .
sch_direct_xmit()  .
   . .
   .q->enqueue
   . qdisc_run_begin()
   .return and do nothing
   . .
qdisc_run_end().

cpu1 enqueue a skb without calling __qdisc_run() because cpu0
has not released the lock yet and spin_trylock() return false
for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb
enqueued by cpu1 when calling dequeue_skb() because cpu1 may
enqueue the skb after cpu0 calling dequeue_skb() and before
cpu0 calling qdisc_run_end().

Lockless qdisc has below another concurrent problem when
tx_action is involved:

cpu0(serving tx_action) cpu1 cpu2
.   ..
.  q->enqueue.
.qdisc_run_begin()   .
.  dequeue_skb() .
.   .q->enqueue
.   ..
. sch_direct_xmit()  .
.   . qdisc_run_begin()
.   .   return and do nothing
.   ..
   clear __QDISC_STATE_SCHED..
   qdisc_run_begin()..
   return and do nothing..
.   ..
.qdisc_run_end() .

This patch fixes the above data race by:
1. Get the flag before doing spin_trylock().
2. If the first spin_trylock() return false and the flag is not
 set before the first spin_trylock(), Set the flag and retry
 another spin_trylock() in case other CPU may not see the new
 flag after it releases the lock.
3. reschedule if the flags is set after the lock is released
 at the end of qdisc_run_end().

For tx_action case, the flags is also set when cpu1 is at the
end if qdisc_run_end(), so tx_action will be rescheduled
again to dequeue the skb enqueued by cpu2.

Only clear the flag before retrying a dequeuing when dequeuing
returns NULL in order to reduce the overhead of the above double
spin_trylock() and __netif_schedule() calling.

The performance impact of this patch, tested using pktgen and
dummy netdev with pfifo_fast qdisc attached:

   threads  without+this_patch   with+this_patch  delta
  12.61Mpps2.60Mpps   -0.3%
  23.97Mpps3.82Mpps   -3.7%
  45.62Mpps5.59Mpps   -0.5%
  82.78Mpps2.77Mpps   -0.3%
 162.22Mpps2.22Mpps   -0.0%

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Yunsheng Lin 


I have a setup which is able to reproduce the issue quite reliably:

In a Xen guest I'm mounting 8 NFS shares and run sysbench fileio on
each of them. The average latency reported by sysbench is well below
1 msec, but at least once per hour I get latencies in the minute
range.

With this patch I don't see these high latencies any longer (test
is running for more than 20 hours now).

So you can add my:

Tested-by: Juergen Gross 


Hi, Juergen

Thanks for the testing.

With the simulated test case suggested by Michal, I still has some
potential issue to debug, hopefully will send out new version in
this week.

Also, is it possible to run your testcase any longer? I think "72 hours"
would be enough to testify that it fixes the problem completely:)


This should be possible, yes.

I'm using the setup to catch another bug which is showing up every few
days. I don't see a reason why I shouldn't be able to add your patch
and verify it in parallel.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


[GIT PULL] xen: branch for v5.12-rc7

2021-04-09 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-5.12b-rc7-tag

xen: branch for v5.12-rc7

It contains a single fix of a 5.12 patch for the rather uncommon problem of
running as a Xen guest with a real time kernel config.


Thanks.

Juergen

 drivers/xen/events/events_base.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

Luca Fancellu (1):
  xen/evtchn: Change irq_info lock to raw_spinlock_t


Re: [PATCH net v3] net: sched: fix packet stuck problem for lockless qdisc

2021-04-08 Thread Juergen Gross

On 25.03.21 04:13, Yunsheng Lin wrote:

Lockless qdisc has below concurrent problem:
 cpu0 cpu1
  . .
q->enqueue .
  . .
qdisc_run_begin()  .
  . .
dequeue_skb()  .
  . .
sch_direct_xmit()  .
  . .
  .q->enqueue
  . qdisc_run_begin()
  .return and do nothing
  . .
qdisc_run_end().

cpu1 enqueue a skb without calling __qdisc_run() because cpu0
has not released the lock yet and spin_trylock() return false
for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb
enqueued by cpu1 when calling dequeue_skb() because cpu1 may
enqueue the skb after cpu0 calling dequeue_skb() and before
cpu0 calling qdisc_run_end().

Lockless qdisc has below another concurrent problem when
tx_action is involved:

cpu0(serving tx_action) cpu1 cpu2
   .   ..
   .  q->enqueue.
   .qdisc_run_begin()   .
   .  dequeue_skb() .
   .   .q->enqueue
   .   ..
   . sch_direct_xmit()  .
   .   . qdisc_run_begin()
   .   .   return and do nothing
   .   ..
  clear __QDISC_STATE_SCHED..
  qdisc_run_begin()..
  return and do nothing..
   .   ..
   .qdisc_run_end() .

This patch fixes the above data race by:
1. Get the flag before doing spin_trylock().
2. If the first spin_trylock() return false and the flag is not
set before the first spin_trylock(), Set the flag and retry
another spin_trylock() in case other CPU may not see the new
flag after it releases the lock.
3. reschedule if the flags is set after the lock is released
at the end of qdisc_run_end().

For tx_action case, the flags is also set when cpu1 is at the
end if qdisc_run_end(), so tx_action will be rescheduled
again to dequeue the skb enqueued by cpu2.

Only clear the flag before retrying a dequeuing when dequeuing
returns NULL in order to reduce the overhead of the above double
spin_trylock() and __netif_schedule() calling.

The performance impact of this patch, tested using pktgen and
dummy netdev with pfifo_fast qdisc attached:

  threads  without+this_patch   with+this_patch  delta
 12.61Mpps2.60Mpps   -0.3%
 23.97Mpps3.82Mpps   -3.7%
 45.62Mpps5.59Mpps   -0.5%
 82.78Mpps2.77Mpps   -0.3%
162.22Mpps2.22Mpps   -0.0%

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Signed-off-by: Yunsheng Lin 


I have a setup which is able to reproduce the issue quite reliably:

In a Xen guest I'm mounting 8 NFS shares and run sysbench fileio on
each of them. The average latency reported by sysbench is well below
1 msec, but at least once per hour I get latencies in the minute
range.

With this patch I don't see these high latencies any longer (test
is running for more than 20 hours now).

So you can add my:

Tested-by: Juergen Gross 


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


Re: Packet gets stuck in NOLOCK pfifo_fast qdisc

2021-04-06 Thread Juergen Gross

On 06.04.21 09:06, Michal Kubecek wrote:

On Tue, Apr 06, 2021 at 08:55:41AM +0800, Yunsheng Lin wrote:


Hi, Jiri
Do you have a reproducer that can be shared here?
With reproducer, I can debug and test it myself too.


I'm afraid we are not aware of a simple reproducer. As mentioned in the
original discussion, the race window is extremely small and the other
thread has to do quite a lot in the meantime which is probably why, as
far as I know, this was never observed on real hardware, only in
virtualization environments. NFS may also be important as, IIUC, it can
often issue an RPC request from a different CPU right after a data
transfer. Perhaps you could cheat a bit and insert a random delay
between the empty queue check and releasing q->seqlock to make it more
likely to happen.

Other than that, it's rather just "run this complex software in a xen VM
and wait".


Being the one who has managed to reproduce the issue I can share my
setup, maybe you can setup something similar (we have seen the issue
with this kind of setup on two different machines).

I'm using a physical machine with 72 cpus and 48 GB of memory. It is
running Xen as virtualization platform.

Xen dom0 is limited to 40 vcpus and 32 GB of memory, the dom0 vcpus are
limited to run on the first 40 physical cpus (no idea whether that
matters, though).

In a guest with 16 vcpu and 8GB of memory I'm running 8 parallel
sysbench instances in a loop, those instances are prepared via

sysbench --file-test-mode=rndrd --test=fileio prepare

and then started in a do while loop via:

sysbench --test=fileio --file-test-mode=rndrw --rand-seed=0 
--max-time=300 --max-requests=0 run


Each instance is using a dedicated NFS mount to run on. The NFS
server for the 8 mounts is running in dom0 of the same server, the
data of the NFS shares is located in a RAM disk (size is a little bit
above 16GB). The shares are mounted in the guest with:

mount -t nfs -o 
rw,proto=tcp,nolock,nfsvers=3,rsize=65536,wsize=65536,nosharetransport 
dom0:/ramdisk/share[1-8] /mnt[1-8]


The guests vcpus are limited to run on physical cpus 40-55, on the same
physical cpus I have 16 small guests running eating up cpu time, each of
those guests is pinned to one of the physical cpus 40-55.

That's basically it. All you need to do is to watch out for sysbench
reporting maximum latencies above one second or so (in my setup there
are latencies of several minutes at least once each hour of testing).

In case you'd like to have some more details about the setup don't
hesitate to contact me directly. I can provide you with some scripts
and config runes if you want.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH v3 3/4] kernel/smp: add more data to CSD lock debugging

2021-04-05 Thread Juergen Gross

On 02.04.21 18:11, Paul E. McKenney wrote:

On Fri, Apr 02, 2021 at 05:46:52PM +0200, Juergen Gross wrote:

On 30.03.21 19:33, Paul E. McKenney wrote:

On Wed, Mar 24, 2021 at 11:18:03AM +0100, Jürgen Groß wrote:

On 02.03.21 07:28, Juergen Gross wrote:

In order to help identifying problems with IPI handling and remote
function execution add some more data to IPI debugging code.

There have been multiple reports of cpus looping long times (many
seconds) in smp_call_function_many() waiting for another cpu executing
a function like tlb flushing. Most of these reports have been for
cases where the kernel was running as a guest on top of KVM or Xen
(there are rumours of that happening under VMWare, too, and even on
bare metal).

Finding the root cause hasn't been successful yet, even after more than
2 years of chasing this bug by different developers.

Commit 35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout
diagnostics") tried to address this by adding some debug code and by
issuing another IPI when a hang was detected. This helped mitigating
the problem (the repeated IPI unlocks the hang), but the root cause is
still unknown.

Current available data suggests that either an IPI wasn't sent when it
should have been, or that the IPI didn't result in the target cpu
executing the queued function (due to the IPI not reaching the cpu,
the IPI handler not being called, or the handler not seeing the queued
request).

Try to add more diagnostic data by introducing a global atomic counter
which is being incremented when doing critical operations (before and
after queueing a new request, when sending an IPI, and when dequeueing
a request). The counter value is stored in percpu variables which can
be printed out when a hang is detected.

The data of the last event (consisting of sequence counter, source
cpu, target cpu, and event type) is stored in a global variable. When
a new event is to be traced, the data of the last event is stored in
the event related percpu location and the global data is updated with
the new event's data. This allows to track two events in one data
location: one by the value of the event data (the event before the
current one), and one by the location itself (the current event).

A typical printout with a detected hang will look like this:

csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 53 ns for 
CPU#06 scf_handler_1+0x0/0x50(0xa2a881bb1410).
csd: CSD lock (#1) handling prior 
scf_handler_1+0x0/0x50(0xa2a8813823c0) request.
   csd: cnt(8cc): -> dequeue (src cpu 0 == empty)
   csd: cnt(8cd): ->0006 idle
   csd: cnt(0003668): 0001->0006 queue
   csd: cnt(0003669): 0001->0006 ipi
   csd: cnt(0003e0f): 0007->000a queue
   csd: cnt(0003e10): 0001-> ping
   csd: cnt(0003e71): 0003-> ping
   csd: cnt(0003e72): ->0006 gotipi
   csd: cnt(0003e73): ->0006 handle
   csd: cnt(0003e74): ->0006 dequeue (src cpu 0 == empty)
   csd: cnt(0003e7f): 0004->0006 ping
   csd: cnt(0003e80): 0001-> pinged
   csd: cnt(0003eb2): 0005->0001 noipi
   csd: cnt(0003eb3): 0001->0006 queue
   csd: cnt(0003eb4): 0001->0006 noipi
   csd: cnt now: 0003f00

This example (being an artificial one, produced with a previous version
of this patch without the "hdlend" event), shows that cpu#6 started to
handle an IPI (cnt 3e72-3e74), bit didn't start to handle another IPI
(sent by cpu#4, cnt 3e7f). The next request from cpu#1 for cpu#6 was
queued (3eb3), but no IPI was needed (cnt 3eb4, there was the event
from cpu#4 in the queue already).

The idea is to print only relevant entries. Those are all events which
are associated with the hang (so sender side events for the source cpu
of the hanging request, and receiver side events for the target cpu),
and the related events just before those (for adding data needed to
identify a possible race). Printing all available data would be
possible, but this would add large amounts of data printed on larger
configurations.

Signed-off-by: Juergen Gross 
Tested-by: Paul E. McKenney 


Just an update regarding current status with debugging the underlying
issue:

On a customer's machine with a backport of this patch applied we've
seen another case of the hang. In the logs we've found:

smp: csd: Detected non-responsive CSD lock (#1) on CPU#18, waiting
500046 ns for CPU#06 do_flush_tlb_all+0x0/0x30(  (null)).
smp:csd: CSD lock (#1) unresponsive.
smp:csd: cnt(000): -> queue
smp:csd: cnt(001): ->0006 idle
smp:csd: cnt(0025dba): 0012->0006 queue
smp:csd: cnt(0025dbb): 0012->0006 noipi
smp:csd: cnt(01d1333): 001a->0006 pinged
smp:csd: cnt(01d1334): ->0006 gotipi
smp:csd: cnt(01d1335): ->0006 handle
smp:csd: cnt(01d1336): ->

Re: [PATCH v3 3/4] kernel/smp: add more data to CSD lock debugging

2021-04-02 Thread Juergen Gross

On 30.03.21 19:33, Paul E. McKenney wrote:

On Wed, Mar 24, 2021 at 11:18:03AM +0100, Jürgen Groß wrote:

On 02.03.21 07:28, Juergen Gross wrote:

In order to help identifying problems with IPI handling and remote
function execution add some more data to IPI debugging code.

There have been multiple reports of cpus looping long times (many
seconds) in smp_call_function_many() waiting for another cpu executing
a function like tlb flushing. Most of these reports have been for
cases where the kernel was running as a guest on top of KVM or Xen
(there are rumours of that happening under VMWare, too, and even on
bare metal).

Finding the root cause hasn't been successful yet, even after more than
2 years of chasing this bug by different developers.

Commit 35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout
diagnostics") tried to address this by adding some debug code and by
issuing another IPI when a hang was detected. This helped mitigating
the problem (the repeated IPI unlocks the hang), but the root cause is
still unknown.

Current available data suggests that either an IPI wasn't sent when it
should have been, or that the IPI didn't result in the target cpu
executing the queued function (due to the IPI not reaching the cpu,
the IPI handler not being called, or the handler not seeing the queued
request).

Try to add more diagnostic data by introducing a global atomic counter
which is being incremented when doing critical operations (before and
after queueing a new request, when sending an IPI, and when dequeueing
a request). The counter value is stored in percpu variables which can
be printed out when a hang is detected.

The data of the last event (consisting of sequence counter, source
cpu, target cpu, and event type) is stored in a global variable. When
a new event is to be traced, the data of the last event is stored in
the event related percpu location and the global data is updated with
the new event's data. This allows to track two events in one data
location: one by the value of the event data (the event before the
current one), and one by the location itself (the current event).

A typical printout with a detected hang will look like this:

csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 53 ns for 
CPU#06 scf_handler_1+0x0/0x50(0xa2a881bb1410).
csd: CSD lock (#1) handling prior 
scf_handler_1+0x0/0x50(0xa2a8813823c0) request.
  csd: cnt(8cc): -> dequeue (src cpu 0 == empty)
  csd: cnt(8cd): ->0006 idle
  csd: cnt(0003668): 0001->0006 queue
  csd: cnt(0003669): 0001->0006 ipi
  csd: cnt(0003e0f): 0007->000a queue
  csd: cnt(0003e10): 0001-> ping
  csd: cnt(0003e71): 0003-> ping
  csd: cnt(0003e72): ->0006 gotipi
  csd: cnt(0003e73): ->0006 handle
  csd: cnt(0003e74): ->0006 dequeue (src cpu 0 == empty)
  csd: cnt(0003e7f): 0004->0006 ping
  csd: cnt(0003e80): 0001-> pinged
  csd: cnt(0003eb2): 0005->0001 noipi
  csd: cnt(0003eb3): 0001->0006 queue
  csd: cnt(0003eb4): 0001->0006 noipi
  csd: cnt now: 0003f00

This example (being an artificial one, produced with a previous version
of this patch without the "hdlend" event), shows that cpu#6 started to
handle an IPI (cnt 3e72-3e74), bit didn't start to handle another IPI
(sent by cpu#4, cnt 3e7f). The next request from cpu#1 for cpu#6 was
queued (3eb3), but no IPI was needed (cnt 3eb4, there was the event
from cpu#4 in the queue already).

The idea is to print only relevant entries. Those are all events which
are associated with the hang (so sender side events for the source cpu
of the hanging request, and receiver side events for the target cpu),
and the related events just before those (for adding data needed to
identify a possible race). Printing all available data would be
possible, but this would add large amounts of data printed on larger
configurations.

Signed-off-by: Juergen Gross 
Tested-by: Paul E. McKenney 


Just an update regarding current status with debugging the underlying
issue:

On a customer's machine with a backport of this patch applied we've
seen another case of the hang. In the logs we've found:

smp: csd: Detected non-responsive CSD lock (#1) on CPU#18, waiting
500046 ns for CPU#06 do_flush_tlb_all+0x0/0x30(  (null)).
smp:csd: CSD lock (#1) unresponsive.
smp:csd: cnt(000): -> queue
smp:csd: cnt(001): ->0006 idle
smp:csd: cnt(0025dba): 0012->0006 queue
smp:csd: cnt(0025dbb): 0012->0006 noipi
smp:csd: cnt(01d1333): 001a->0006 pinged
smp:csd: cnt(01d1334): ->0006 gotipi
smp:csd: cnt(01d1335): ->0006 handle
smp:csd: cnt(01d1336): ->0006 dequeue (src cpu 0 == empty)
smp:csd: cnt(01d1337): ->0006 hdlend (src cpu 0 == early)
smp:csd: cnt(0

[GIT PULL] xen: branch for v5.12-rc6

2021-03-30 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-5.12b-rc6-tag

xen: branch for v5.12-rc6

It contains one Xen related security fix (XSA-371).

Thanks.

Juergen

 drivers/block/xen-blkback/blkback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Jan Beulich (1):
  xen-blkback: don't leak persistent grants from xen_blkbk_map()


Re: [PATCH] xen/pciback: Fix incorrect type warnings

2021-03-29 Thread Juergen Gross

On 26.03.21 19:14, Muhammad Usama Anjum wrote:

Correct enum pci_channel_io_normal should be used instead of putting
integer value 1.

Fix following smatch warnings:
drivers/xen/xen-pciback/pci_stub.c:805:40: warning: incorrect type in argument 
2 (different base types)
drivers/xen/xen-pciback/pci_stub.c:805:40:expected restricted 
pci_channel_state_t [usertype] state
drivers/xen/xen-pciback/pci_stub.c:805:40:got int
drivers/xen/xen-pciback/pci_stub.c:862:40: warning: incorrect type in argument 
2 (different base types)
drivers/xen/xen-pciback/pci_stub.c:862:40:expected restricted 
pci_channel_state_t [usertype] state
drivers/xen/xen-pciback/pci_stub.c:862:40:got int
drivers/xen/xen-pciback/pci_stub.c:973:31: warning: incorrect type in argument 
2 (different base types)
drivers/xen/xen-pciback/pci_stub.c:973:31:expected restricted 
pci_channel_state_t [usertype] state
drivers/xen/xen-pciback/pci_stub.c:973:31:got int

Signed-off-by: Muhammad Usama Anjum 


Reviewed-by: Juergen Gross 


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: application/pgp-keys


OpenPGP_signature
Description: OpenPGP digital signature


[GIT PULL] xen: branch for v5.12-rc5

2021-03-26 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-5.12b-rc5-tag

xen: branch for v5.12-rc5

It contains a small series with a more elegant fix of a problem which
was originally fixed in rc2.

Thanks.

Juergen

 arch/x86/include/asm/xen/page.h | 12 
 arch/x86/xen/p2m.c  |  7 ++-
 arch/x86/xen/setup.c| 16 ++--
 drivers/xen/Kconfig |  4 ++--
 4 files changed, 18 insertions(+), 21 deletions(-)

Roger Pau Monne (2):
  xen/x86: make XEN_BALLOON_MEMORY_HOTPLUG_LIMIT depend on MEMORY_HOTPLUG
  Revert "xen: fix p2m size in dom0 for disabled memory hotplug case"


[tip: irq/core] irq: Simplify condition in irq_matrix_reserve()

2021-03-17 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the irq/core branch of tip:

Commit-ID: 2c6b02185cc608c19a22691fadc6ca2cd114c286
Gitweb:
https://git.kernel.org/tip/2c6b02185cc608c19a22691fadc6ca2cd114c286
Author:Juergen Gross 
AuthorDate:Thu, 11 Feb 2021 08:09:53 +01:00
Committer: Thomas Gleixner 
CommitterDate: Wed, 17 Mar 2021 21:44:01 +01:00

irq: Simplify condition in irq_matrix_reserve()

The if condition in irq_matrix_reserve() can be much simpler.

While at it fix a typo in the comment.

Signed-off-by: Juergen Gross 
Signed-off-by: Thomas Gleixner 
Link: https://lore.kernel.org/r/20210211070953.5914-1-jgr...@suse.com

---
 kernel/irq/matrix.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 7a9465f..6f8b1d1 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -337,15 +337,14 @@ void irq_matrix_assign(struct irq_matrix *m, unsigned int 
bit)
  * irq_matrix_reserve - Reserve interrupts
  * @m: Matrix pointer
  *
- * This is merily a book keeping call. It increments the number of globally
+ * This is merely a book keeping call. It increments the number of globally
  * reserved interrupt bits w/o actually allocating them. This allows to
  * setup interrupt descriptors w/o assigning low level resources to it.
  * The actual allocation happens when the interrupt gets activated.
  */
 void irq_matrix_reserve(struct irq_matrix *m)
 {
-   if (m->global_reserved <= m->global_available &&
-   m->global_reserved + 1 > m->global_available)
+   if (m->global_reserved == m->global_available)
pr_warn("Interrupt reservation exceeds available resources\n");
 
m->global_reserved++;


[tip: x86/alternatives] x86/alternative: Support not-feature

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: dda7bb76484978316bb412a353789ebc5901de36
Gitweb:
https://git.kernel.org/tip/dda7bb76484978316bb412a353789ebc5901de36
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:10 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 16:44:01 +01:00

x86/alternative: Support not-feature

Add support for alternative patching for the case a feature is not
present on the current CPU. For users of ALTERNATIVE() and friends, an
inverted feature is specified by applying the ALT_NOT() macro to it,
e.g.:

  ALTERNATIVE(old, new, ALT_NOT(feature));

Committer note:

The decision to encode the NOT-bit in the feature bit itself is because
a future change which would make objtool generate such alternative
calls, would keep the code in objtool itself fairly simple.

Also, this allows for the alternative macros to support the NOT feature
without having to change them.

Finally, the u16 cpuid member encoding the X86_FEATURE_ flags is not an
ABI so if more bits are needed, cpuid itself can be enlarged or a flags
field can be added to struct alt_instr after having considered the size
growth in either cases.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210311142319.4723-6-jgr...@suse.com
---
 arch/x86/include/asm/alternative.h |  3 +++
 arch/x86/kernel/alternative.c  | 20 +++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 53f295f..649e56f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 
+#define ALTINSTR_FLAG_INV  (1 << 15)
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
 #ifndef __ASSEMBLY__
 
 #include 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e4..133b549 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -388,21 +388,31 @@ void __init_or_module noinline apply_alternatives(struct 
alt_instr *start,
 */
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+   /* Mask away "NOT" flag bit for feature to test. */
+   u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
 
instr = (u8 *)>instr_offset + a->instr_offset;
replacement = (u8 *)>repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
-   BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-   if (!boot_cpu_has(a->cpuid)) {
+   BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
+
+   /*
+* Patch if either:
+* - feature is present
+* - feature not present but ALTINSTR_FLAG_INV is set to mean,
+*   patch if feature is *NOT* present.
+*/
+   if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
if (a->padlen > 1)
optimize_nops(a, instr);
 
continue;
}
 
-   DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, 
len: %d), pad: %d",
-   a->cpuid >> 5,
-   a->cpuid & 0x1f,
+   DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: 
(%px, len: %d), pad: %d",
+   (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+   feature >> 5,
+   feature & 0x1f,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
 


[tip: x86/alternatives] x86/alternative: Merge include files

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 5e21a3ecad1500e35b46701e7f3f232e15d78e69
Gitweb:
https://git.kernel.org/tip/5e21a3ecad1500e35b46701e7f3f232e15d78e69
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:06 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 15:58:02 +01:00

x86/alternative: Merge include files

Merge arch/x86/include/asm/alternative-asm.h into
arch/x86/include/asm/alternative.h in order to make it easier to use
common definitions later.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210311142319.4723-2-jgr...@suse.com
---
 arch/x86/entry/entry_32.S|   2 +-
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +-
 arch/x86/include/asm/alternative-asm.h   | 114 +--
 arch/x86/include/asm/alternative.h   | 112 +-
 arch/x86/include/asm/nospec-branch.h |   1 +-
 arch/x86/include/asm/smap.h  |   5 +-
 arch/x86/lib/atomic64_386_32.S   |   2 +-
 arch/x86/lib/atomic64_cx8_32.S   |   2 +-
 arch/x86/lib/copy_page_64.S  |   2 +-
 arch/x86/lib/copy_user_64.S  |   2 +-
 arch/x86/lib/memcpy_64.S |   2 +-
 arch/x86/lib/memmove_64.S|   2 +-
 arch/x86/lib/memset_64.S |   2 +-
 arch/x86/lib/retpoline.S |   2 +-
 14 files changed, 120 insertions(+), 132 deletions(-)
 delete mode 100644 arch/x86/include/asm/alternative-asm.h

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017..4e079f2 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S 
b/arch/x86/entry/vdso/vdso32/system_call.S
index de1fff7..d6a6080 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -6,7 +6,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
.text
.globl __kernel_vsyscall
diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 464034d..000
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include 
-
-#ifdef CONFIG_SMP
-   .macro LOCK_PREFIX
-672:   lock
-   .pushsection .smp_locks,"a"
-   .balign 4
-   .long 672b - .
-   .popsection
-   .endm
-#else
-   .macro LOCK_PREFIX
-   .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
-   .Lannotate_\@:
-   .pushsection .discard.ignore_alts
-   .long .Lannotate_\@ - .
-   .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
-   .long \orig - .
-   .long \alt - .
-   .word \feature
-   .byte \orig_len
-   .byte \alt_len
-   .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
-   \oldinstr
-141:
-   .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
-   .pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
-   .popsection
-
-   .pushsection .altinstr_replacement,"ax"
-143:
-   \newinstr
-144:
-   .popsection
-.endm
-
-#define old_len141b-140b
-#define new_len1   144f-143f
-#define new_len2   145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)((a) ^ (((a) ^ (b)) & -(-((a) < (b)
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
-   \oldinstr
-14

[tip: x86/alternatives] static_call: Add function to query current function

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 6ea312d95e0226b306bb4b8ee3a0727d880378cb
Gitweb:
https://git.kernel.org/tip/6ea312d95e0226b306bb4b8ee3a0727d880378cb
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:08 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 16:12:33 +01:00

static_call: Add function to query current function

Some users of paravirtualized functions need to query which function
has been specified in a pv_ops vector element. In order to be able to
switch such paravirtualized functions to static_calls instead, there
needs to be a function to query the function which will be called via
static_call().

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-4-jgr...@suse.com
---
 include/linux/static_call.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 76b8812..e01b61a 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -20,6 +20,7 @@
  *   static_call(name)(args...);
  *   static_call_cond(name)(args...);
  *   static_call_update(name, func);
+ *   static_call_query(name);
  *
  * Usage example:
  *
@@ -91,6 +92,10 @@
  *
  *   which will include the required value tests to avoid NULL-pointer
  *   dereferences.
+ *
+ *   To query which function is currently set to be called, use:
+ *
+ *   func = static_call_query(name);
  */
 
 #include 
@@ -118,6 +123,8 @@ extern void arch_static_call_transform(void *site, void 
*tramp, void *func, bool
 STATIC_CALL_TRAMP_ADDR(name), func);   \
 })
 
+#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
+
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
 
 extern int __init static_call_init(void);
@@ -191,6 +198,7 @@ static inline int static_call_init(void) { return 0; }
};  \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+
 #define static_call_cond(name) (void)__static_call(name)
 
 static inline


[tip: x86/alternatives] static_call: Move struct static_call_key definition to static_call_types.h

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: b046664872dd78a8bebe3d5f3bb9da9baa93f5ca
Gitweb:
https://git.kernel.org/tip/b046664872dd78a8bebe3d5f3bb9da9baa93f5ca
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:07 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 16:04:39 +01:00

static_call: Move struct static_call_key definition to static_call_types.h

Having the definition of static_call() in static_call_types.h makes
no sense as long struct static_call_key isn't defined there, as the
generic implementation of static_call() is referencing this structure.

So move the definition of struct static_call_key to static_call_types.h.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-3-jgr...@suse.com
---
 include/linux/static_call.h | 18 --
 include/linux/static_call_types.h   | 18 ++
 tools/include/linux/static_call_types.h | 18 ++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 85ecc78..76b8812 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -128,16 +128,6 @@ struct static_call_mod {
struct static_call_site *sites;
 };
 
-struct static_call_key {
-   void *func;
-   union {
-   /* bit 0: 0 = mods, 1 = sites */
-   unsigned long type;
-   struct static_call_mod *mods;
-   struct static_call_site *sites;
-   };
-};
-
 /* For finding the key associated with a trampoline */
 struct static_call_tramp_key {
s32 tramp;
@@ -187,10 +177,6 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 #define __DEFINE_STATIC_CALL(name, _func, _func_init)  \
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
@@ -243,10 +229,6 @@ static inline long __static_call_return0(void)
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 static inline long __static_call_return0(void)
 {
return 0;
diff --git a/include/linux/static_call_types.h 
b/include/linux/static_call_types.h
index ae5662d..5a00b8b 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
diff --git a/tools/include/linux/static_call_types.h 
b/tools/include/linux/static_call_types.h
index ae5662d..5a00b8b 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 


[tip: x86/alternatives] x86/alternative: Support ALTERNATIVE_TERNARY

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: e208b3c4a9748b2c17aa09ba663b5096ccf82dce
Gitweb:
https://git.kernel.org/tip/e208b3c4a9748b2c17aa09ba663b5096ccf82dce
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:11 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 16:57:31 +01:00

x86/alternative: Support ALTERNATIVE_TERNARY

Add ALTERNATIVE_TERNARY support for replacing an initial instruction
with either of two instructions depending on a feature:

  ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
  "feature_on_instr", "feature_off_instr"

which will start with "default_instr" and at patch time will,
depending on FEATURE_NR being set or not, patch that with either
"feature_on_instr" or "feature_off_instr".

 [ bp: Add comment ontop. ]

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-7-jgr...@suse.com
---
 arch/x86/include/asm/alternative.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 649e56f..17b3609 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -179,6 +179,11 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+   ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS,\
+ newinstr_yes, feature)
+
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3)
\
".pushsection .altinstructions,\"a\"\n" 
\
@@ -210,6 +215,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, 
newinstr2, feature2) ::: "memory")
 
+#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
+   asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, 
newinstr_yes, newinstr_no) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
@@ -380,6 +388,11 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
.popsection
 .endm
 
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+   ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,\
+   newinstr_yes, feature
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */


[tip: x86/alternatives] x86/paravirt: Switch time pvops functions to use static_call()

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: a0e2bf7cb7006b5a58ee81f4da4fe575875f2781
Gitweb:
https://git.kernel.org/tip/a0e2bf7cb7006b5a58ee81f4da4fe575875f2781
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:09 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 16:17:52 +01:00

x86/paravirt: Switch time pvops functions to use static_call()

The time pvops functions are the only ones left which might be
used in 32-bit mode and which return a 64-bit value.

Switch them to use the static_call() mechanism instead of pvops, as
this allows quite some simplification of the pvops implementation.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-5-jgr...@suse.com
---
 arch/arm/include/asm/paravirt.h   | 14 +-
 arch/arm/kernel/paravirt.c|  9 +++--
 arch/arm64/include/asm/paravirt.h | 14 +-
 arch/arm64/kernel/paravirt.c  | 13 +
 arch/x86/Kconfig  |  1 +-
 arch/x86/include/asm/mshyperv.h   |  2 +-
 arch/x86/include/asm/paravirt.h   | 15 ---
 arch/x86/include/asm/paravirt_types.h |  6 +--
 arch/x86/kernel/cpu/vmware.c  |  5 +++--
 arch/x86/kernel/kvm.c |  2 +-
 arch/x86/kernel/kvmclock.c|  2 +-
 arch/x86/kernel/paravirt.c| 13 +
 arch/x86/kernel/tsc.c |  3 ++-
 arch/x86/xen/time.c   | 26 +-
 drivers/xen/time.c|  3 ++-
 15 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index cdbf02d..95d5b0d 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 #endif
 
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 4cfed91..7dd9806 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -9,10 +9,15 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
diff --git a/arch/arm64/include/asm/paravirt.h 
b/arch/arm64/include/asm/paravirt.h
index cf3a0fd..9aa193e 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 
 int __init pv_time_init(void);
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index c07d7a0..75fed44 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -26,8 +27,12 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 /* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
 {
struct pv_time_stolen_time_region *reg;
 
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
 

[tip: x86/alternatives] x86/paravirt: Remove no longer needed 32-bit pvops cruft

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 33634e42e38be61f320183dfc264b9caba292d4e
Gitweb:
https://git.kernel.org/tip/33634e42e38be61f320183dfc264b9caba292d4e
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:14 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 19:51:55 +01:00

x86/paravirt: Remove no longer needed 32-bit pvops cruft

PVOP_VCALL4() is only used for Xen PV, while PVOP_CALL4() isn't used
at all. Keep PVOP_CALL4() for 64 bits due to symmetry reasons.

This allows to remove the 32-bit definitions of those macros leading
to a substantial simplification of the paravirt macros, as those were
the only ones needing non-empty "pre" and "post" parameters.

PVOP_CALLEE2() and PVOP_VCALLEE2() are used nowhere, so remove them.

Another no longer needed case is special handling of return types
larger than unsigned long. Replace that with a BUILD_BUG_ON().

DISABLE_INTERRUPTS() is used in 32-bit code only, so it can just be
replaced by cli.

INTERRUPT_RETURN in 32-bit code can be replaced by iret.

ENABLE_INTERRUPTS is used nowhere, so it can be removed.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-10-jgr...@suse.com
---
 arch/x86/entry/entry_32.S |   4 +-
 arch/x86/include/asm/irqflags.h   |   5 +-
 arch/x86/include/asm/paravirt.h   |  35 +
 arch/x86/include/asm/paravirt_types.h | 112 +++--
 arch/x86/kernel/asm-offsets.c |   2 +-
 5 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4e079f2..96f0848 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -430,7 +430,7 @@
 * will soon execute iret and the tracer was already set to
 * the irqstate after the IRET:
 */
-   DISABLE_INTERRUPTS(CLBR_ANY)
+   cli
lss (%esp), %esp/* switch to espfix segment */
 .Lend_\@:
 #endif /* CONFIG_X86_ESPFIX32 */
@@ -1077,7 +1077,7 @@ restore_all_switch_stack:
 * when returning from IPI handler and when returning from
 * scheduler to user-space.
 */
-   INTERRUPT_RETURN
+   iret
 
 .section .fixup, "ax"
 SYM_CODE_START(asm_iret_error)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70e..a0efbcd 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,9 +109,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)   sti
-#define DISABLE_INTERRUPTS(x)  cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(x)  pushfq; popq %rax
@@ -119,8 +116,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #define INTERRUPT_RETURN   jmp native_iret
 
-#else
-#define INTERRUPT_RETURN   iret
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index def450f..a780509 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -719,6 +719,7 @@ extern void default_banner(void);
.if ((~(set)) & mask); pop %reg; .endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT_XXL
 
 #define PV_SAVE_REGS(set)  \
COND_PUSH(set, CLBR_RAX, rax);  \
@@ -744,46 +745,12 @@ extern void default_banner(void);
 #define PARA_PATCH(off)((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)  \
-   COND_PUSH(set, CLBR_EAX, eax);  \
-   COND_PUSH(set, CLBR_EDI, edi);  \
-   COND_PUSH(set, CLBR_ECX, ecx);  \
-   COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)   \
-   COND_POP(set, CLBR_EDX, edx);   \
-   COND_POP(set, CLBR_ECX, ecx);   \
-   COND_POP(set, CLBR_EDI, edi);   \
-   COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)((off) / 4)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)*%cs:addr
-#endif
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define INTERRUPT_RETURN   \
PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
  ANNOTATE_RETPOLINE_SAFE;  \
  jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
 
-#define DISABLE_INTERRUPTS(clobbers)   \
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),   \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
-  

[tip: x86/alternatives] x86/alternative: Use ALTERNATIVE_TERNARY() in _static_cpu_has()

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 2fe2a2c7a97c9bc32acc79154b75e754280f7867
Gitweb:
https://git.kernel.org/tip/2fe2a2c7a97c9bc32acc79154b75e754280f7867
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:12 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 19:33:43 +01:00

x86/alternative: Use ALTERNATIVE_TERNARY() in _static_cpu_has()

_static_cpu_has() contains a completely open coded version of
ALTERNATIVE_TERNARY(). Replace that with the macro instead.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210311142319.4723-8-jgr...@suse.com
---
 arch/x86/include/asm/cpufeature.h | 41 ++
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 1728d4c..16a51e7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 enum cpuid_leafs
 {
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned 
int bit);
  */
 static __always_inline bool _static_cpu_has(u16 bit)
 {
-   asm_volatile_goto("1: jmp 6f\n"
-"2:\n"
-".skip -(((5f-4f) - (2b-1b)) > 0) * "
-"((5f-4f) - (2b-1b)),0x90\n"
-"3:\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 4f - .\n"  /* repl offset */
-" .word %P[always]\n"  /* always replace */
-" .byte 3b - 1b\n" /* src len */
-" .byte 5f - 4f\n" /* repl len */
-" .byte 3b - 2b\n" /* pad len */
-".previous\n"
-".section .altinstr_replacement,\"ax\"\n"
-"4: jmp %l[t_no]\n"
-"5:\n"
-".previous\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 0\n"   /* no replacement */
-" .word %P[feature]\n" /* feature bit */
-" .byte 3b - 1b\n" /* src len */
-" .byte 0\n"   /* repl len */
-" .byte 0\n"   /* pad len */
-".previous\n"
-".section .altinstr_aux,\"ax\"\n"
-"6:\n"
-" testb %[bitnum],%[cap_byte]\n"
-" jnz %l[t_yes]\n"
-" jmp %l[t_no]\n"
-".previous\n"
+   asm_volatile_goto(
+   ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+   ".section .altinstr_aux,\"ax\"\n"
+   "6:\n"
+   " testb %[bitnum],%[cap_byte]\n"
+   " jnz %l[t_yes]\n"
+   " jmp %l[t_no]\n"
+   ".previous\n"
 : : [feature]  "i" (bit),
-[always]   "i" (X86_FEATURE_ALWAYS),
 [bitnum]   "i" (1 << (bit & 7)),
 [cap_byte] "m" (((const char 
*)boot_cpu_data.x86_capability)[bit >> 3])
 : : t_yes, t_no);


[tip: x86/alternatives] x86/paravirt: Add new features for paravirt patching

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 4e6292114c741221479046515b1aa8145cf1e3f6
Gitweb:
https://git.kernel.org/tip/4e6292114c741221479046515b1aa8145cf1e3f6
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:13 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 19:51:49 +01:00

x86/paravirt: Add new features for paravirt patching

For being able to switch paravirt patching from special cased custom
code sequences to ALTERNATIVE handling some X86_FEATURE_* are needed
as new features. This enables to have the standard indirect pv call
as the default code and to patch that with the non-Xen custom code
sequence via ALTERNATIVE patching later.

Make sure paravirt patching is performed before alternatives patching.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-9-jgr...@suse.com
---
 arch/x86/include/asm/cpufeatures.h   |  2 ++-
 arch/x86/include/asm/paravirt.h  | 10 +-
 arch/x86/kernel/alternative.c| 30 +--
 arch/x86/kernel/paravirt-spinlocks.c |  9 -
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26..b440c95 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table 
access-dirty bit */
 #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports 
the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL( 8*32+19) /* "" VMware prefers 
VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK   ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT( 8*32+21) /* "" PV 
vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6408fd0..def450f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -45,6 +45,10 @@ static inline u64 paravirt_steal_clock(int cpu)
return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
@@ -809,5 +813,11 @@ static inline void paravirt_arch_exit_mmap(struct 
mm_struct *mm)
 {
 }
 #endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 133b549..76ad4ce 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 int __read_mostly alternatives_patched;
 
@@ -733,6 +734,33 @@ void __init alternative_instructions(void)
 * patching.
 */
 
+   /*
+* Paravirt patching and alternative patching can be combined to
+* replace a function call with a short direct code sequence (e.g.
+* by setting a constant return value instead of doing that in an
+* external function).
+* In order to make this work the following sequence is required:
+* 1. set (artificial) features depending on used paravirt
+*functions which can later influence alternative patching
+* 2. apply paravirt patching (generally replacing an indirect
+*function call with a direct one)
+* 3. apply alternative patching (e.g. replacing a direct function
+*call with a custom code sequence)
+* Doing paravirt patching after alternative patching would clobber
+* the optimization of the custom code with a function call again.
+*/
+   paravirt_set_cap();
+
+   /*
+* First patch paravirt functions, such that we overwrite the indirect
+* call with the direct call.
+*/
+   apply_paravirt(__parainstructions, __parainstructions_end);
+
+   /*
+* Then patch alternatives, such that those paravirt calls that are in
+* alternatives can be overwritten by their immediate fragments.
+*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 #ifdef CONFIG_SMP
@@ -751,8 +779,6 @@ void __init alternative_instructions(void)
}
 #endif
 
-   apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
 }
diff --git a/arch/x86/kernel/paravirt-spinlocks.c 
b/arch/x86/kernel/paravirt-s

[tip: x86/alternatives] x86/paravirt: Add new PVOP_ALT* macros to support pvops in ALTERNATIVEs

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 00aa3193ab7a04b25bb8c68e377815696eb5bf56
Gitweb:
https://git.kernel.org/tip/00aa3193ab7a04b25bb8c68e377815696eb5bf56
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:17 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 20:05:44 +01:00

x86/paravirt: Add new PVOP_ALT* macros to support pvops in ALTERNATIVEs

Instead of using paravirt patching for custom code sequences add
support for using ALTERNATIVE handling combined with paravirt call
patching.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-13-jgr...@suse.com
---
 arch/x86/include/asm/paravirt_types.h | 49 +-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 0afdac8..0ed9762 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -477,44 +477,91 @@ int paravirt_disable_iospace(void);
ret;\
})
 
+#define PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...)  \
+   ({  \
+   PVOP_CALL_ARGS; \
+   PVOP_TEST_NULL(op); \
+   asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),   \
+alt, cond) \
+: call_clbr, ASM_CALL_CONSTRAINT   \
+: paravirt_type(op),   \
+  paravirt_clobber(clbr),  \
+  ##__VA_ARGS__\
+: "memory", "cc" extra_clbr);  \
+   ret;\
+   })
+
 #define __PVOP_CALL(rettype, op, ...)  \
PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)   \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,   \
+ ##__VA_ARGS__)
+
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,  \
  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
 #define __PVOP_VCALL(op, ...)  \
(void)PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,\
   VEXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)   \
+   (void)PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,  \
+   PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,   \
+   ##__VA_ARGS__)
+
 #define __PVOP_VCALLEESAVE(op, ...)\
(void)PVOP_CALL(, op.func, CLBR_RET_REG,\
- PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+   (void)PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)
\
__PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)
\
__PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+   __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+   __PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)  \
__PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)  \
__PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)   \
+   __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VC

[tip: x86/alternatives] x86/paravirt: Switch iret pvops to ALTERNATIVE

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: ae755b5a45482b5de4d96d6f35823076af77445e
Gitweb:
https://git.kernel.org/tip/ae755b5a45482b5de4d96d6f35823076af77445e
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:16 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 19:58:54 +01:00

x86/paravirt: Switch iret pvops to ALTERNATIVE

The iret paravirt op is rather special as it is using a jmp instead
of a call instruction. Switch it to ALTERNATIVE.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-12-jgr...@suse.com
---
 arch/x86/include/asm/paravirt.h   |  6 +++---
 arch/x86/include/asm/paravirt_types.h |  5 +
 arch/x86/kernel/asm-offsets.c |  5 +-
 arch/x86/kernel/paravirt.c| 26 ++
 arch/x86/xen/enlighten_pv.c   |  3 +--
 5 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a780509..913acf7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -747,9 +747,9 @@ extern void default_banner(void);
 #define PARA_INDIRECT(addr)*addr(%rip)
 
 #define INTERRUPT_RETURN   \
-   PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
- ANNOTATE_RETPOLINE_SAFE;  \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
+   ANNOTATE_RETPOLINE_SAFE;\
+   ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);",\
+   X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)\
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 45bd216..0afdac8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -151,10 +151,6 @@ struct pv_cpu_ops {
 
u64 (*read_pmc)(int counter);
 
-   /* Normal iret.  Jump to this with the standard iret stack
-  frame set up. */
-   void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -294,6 +290,7 @@ struct paravirt_patch_template {
 
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
 
 #define PARAVIRT_PATCH(x)  \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 7365080..ecd3fd6 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -61,11 +61,6 @@ static void __used common(void)
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
 
-#ifdef CONFIG_PARAVIRT_XXL
-   BLANK();
-   OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
 #ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a688edf..9b0f568 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -86,25 +86,6 @@ u64 notrace _paravirt_ident_64(u64 x)
 {
return x;
 }
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
-  unsigned long addr, unsigned len)
-{
-   struct branch *b = insn_buff;
-   unsigned long delta = (unsigned long)target - (addr+5);
-
-   if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-   WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void 
*)addr);
-#endif
-   return len; /* call too long for patch site */
-   }
-
-   b->opcode = 0xe9;   /* jmp */
-   b->delta = delta;
-
-   return 5;
-}
 #endif
 
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -136,9 +117,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insn_buff, len);
 
-   else if (type == PARAVIRT_PATCH(cpu.iret))
-   /* If operation requires a jmp, then jmp */
-   ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
 #endif
else
/* Otherwise call the function. */
@@ -313,8 +291,6 @@ struct paravirt_patch_template pv_ops = {
 
.cpu.load_sp0   = native_load_sp0,
 
-   .cpu.iret   = native_iret,
-
 #ifdef CONFIG_X86_IOPL_IOPERM
.cpu.invalidate_io_bitmap   = native_tss_invalidate_io_bitmap,
.cpu.update_io_bitmap 

[tip: x86/alternatives] x86/paravirt: Simplify paravirt macros

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 0b8d366a942fd48a83dfa728e9f8a8d8b20e735f
Gitweb:
https://git.kernel.org/tip/0b8d366a942fd48a83dfa728e9f8a8d8b20e735f
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:15 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 19:52:52 +01:00

x86/paravirt: Simplify paravirt macros

The central pvops call macros PVOP_CALL() and PVOP_VCALL() are
looking very similar now.

The main differences are using PVOP_VCALL_ARGS or PVOP_CALL_ARGS, which
are identical, and the return value handling.

So drop PVOP_VCALL_ARGS and instead of PVOP_VCALL() just use
(void)PVOP_CALL(long, ...).

Note that it isn't easily possible to just redefine PVOP_VCALL()
to use PVOP_CALL() instead, as this would require further hiding of
commas in macro parameters.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-11-jgr...@suse.com
---
 arch/x86/include/asm/paravirt_types.h | 41 +++---
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 42f9eef..45bd216 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -408,11 +408,9 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS
\
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "c" ((unsigned long)(x))
@@ -428,12 +426,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS\
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "d" ((unsigned long)(x))
@@ -458,59 +454,46 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)  \
+#define PVOP_RETVAL(rettype)   \
({  unsigned long __mask = ~0UL;\
+   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
switch (sizeof(rettype)) {  \
case 1: __mask =   0xffUL; break;   \
case 2: __mask = 0xUL; break;   \
case 4: __mask = 0xUL; break;   \
default: break; \
}   \
-   __mask; \
+   __mask & __eax; \
})
 
 
-#define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, ...)   \
+#define PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)   \
({  \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
-   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
asm volatile(paravirt_alt(PARAVIRT_CALL)\
 : call_clbr, ASM_CALL_CONSTRAINT   \
 : paravirt_type(op),   \
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   (rettype)(__eax & PVOP_RETMASK(rettype));   \
+   ret;\
})
 
 #define __PVOP_CALL(rettype, op, ...)  \
-   PVOP_CALL(rettype, 

[tip: x86/alternatives] x86/paravirt: Switch functions with custom code to ALTERNATIVE

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: fafe5e74229fd3f425e3cbfc68b90e615aa6d62f
Gitweb:
https://git.kernel.org/tip/fafe5e74229fd3f425e3cbfc68b90e615aa6d62f
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:18 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 20:07:01 +01:00

x86/paravirt: Switch functions with custom code to ALTERNATIVE

Instead of using paravirt patching for custom code sequences use
ALTERNATIVE for the functions with custom code replacements.

Instead of patching an ud2 instruction for unpopulated vector entries
into the caller site, use a simple function just calling BUG() as a
replacement.

Simplify the register defines for assembler paravirt calling, as there
isn't much usage left.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-14-jgr...@suse.com
---
 arch/x86/entry/entry_64.S |   2 +-
 arch/x86/include/asm/irqflags.h   |   2 +-
 arch/x86/include/asm/paravirt.h   | 101 -
 arch/x86/include/asm/paravirt_types.h |   6 +-
 arch/x86/kernel/paravirt.c|  16 +---
 arch/x86/kernel/paravirt_patch.c  |  88 +--
 6 files changed, 58 insertions(+), 157 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 400908d..12e2e3c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
pushq %rax
-   SAVE_FLAGS(CLBR_RAX)
+   SAVE_FLAGS
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a0efbcd..c5ce984 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -111,7 +111,7 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)  pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
 #endif
 
 #define INTERRUPT_RETURN   jmp native_iret
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 913acf7..43992e5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -135,7 +135,9 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-   return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+   return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+   "mov %%cr2, %%rax;",
+   ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr2(unsigned long x)
@@ -145,12 +147,14 @@ static inline void write_cr2(unsigned long x)
 
 static inline unsigned long __read_cr3(void)
 {
-   return PVOP_CALL0(unsigned long, mmu.read_cr3);
+   return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_VCALL1(mmu.write_cr3, x);
+   PVOP_ALT_VCALL1(mmu.write_cr3, x,
+   "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -170,7 +174,7 @@ static inline void halt(void)
 
 static inline void wbinvd(void)
 {
-   PVOP_VCALL0(cpu.wbinvd);
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -384,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-   return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+   return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-   return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+   return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-   return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+   return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-   return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+   return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XEN

[tip: x86/alternatives] x86/paravirt: Have only one paravirt patch function

2021-03-12 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: 054ac8ad5ebe4a69e1f0e842483821ddbe560121
Gitweb:
https://git.kernel.org/tip/054ac8ad5ebe4a69e1f0e842483821ddbe560121
Author:Juergen Gross 
AuthorDate:Thu, 11 Mar 2021 15:23:19 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 11 Mar 2021 20:11:09 +01:00

x86/paravirt: Have only one paravirt patch function

There is no need any longer to have different paravirt patch functions
for native and Xen. Eliminate native_patch() and rename
paravirt_patch_default() to paravirt_patch().

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210311142319.4723-15-jgr...@suse.com
---
 arch/x86/include/asm/paravirt_types.h | 19 +--
 arch/x86/kernel/Makefile  |  3 +--
 arch/x86/kernel/alternative.c |  2 +-
 arch/x86/kernel/paravirt.c| 20 ++--
 arch/x86/kernel/paravirt_patch.c  | 11 ---
 arch/x86/xen/enlighten_pv.c   |  1 -
 6 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 588ff14..9d1ddb7 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -68,19 +68,6 @@ struct pv_info {
const char *name;
 };
 
-struct pv_init_ops {
-   /*
-* Patch may replace one of the defined code sequences with
-* arbitrary code, subject to the same register constraints.
-* This generally means the code is not free to clobber any
-* registers other than EAX.  The patch function should return
-* the number of bytes of code generated, as we nop pad the
-* rest in generic code.
-*/
-   unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -276,7 +263,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-   struct pv_init_ops  init;
struct pv_cpu_ops   cpu;
struct pv_irq_ops   irq;
struct pv_mmu_ops   mmu;
@@ -317,10 +303,7 @@ extern void (*paravirt_iret)(void);
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, 
unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char 
*start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned 
len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
 
 int paravirt_disable_iospace(void);
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf083..0704c2a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o   
:= n
 KCSAN_SANITIZE := n
 
 OBJECT_FILES_NON_STANDARD_test_nx.o:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB)+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 76ad4ce..f810e6f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -616,7 +616,7 @@ void __init_or_module apply_paravirt(struct 
paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insn_buff, p->instr, p->len);
-   used = pv_ops.init.patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
+   used = paravirt_patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
 
BUG_ON(used > p->len);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 855ae08..d073026 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -99,8 +99,8 @@ void __init native_pv_lock_init(void)
  

[GIT PULL] xen: branch for v5.12-rc3

2021-03-12 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-5.12b-rc3-tag

xen: branch for v5.12-rc3

It contains two patch series and a single patch:

- a small cleanup patch to remove unneeded symbol exports
- a series to cleanup Xen grant handling (avoiding allocations in some
  cases, and using common defines for "invalid" values)
- a series to address a race issue in Xen event channel handling


Thanks.

Juergen

 arch/arm/xen/p2m.c   |   5 +-
 arch/x86/xen/p2m.c   |   6 +-
 drivers/pci/xen-pcifront.c   |   4 +-
 drivers/xen/events/events_2l.c   |  22 --
 drivers/xen/events/events_base.c | 130 +++
 drivers/xen/events/events_fifo.c |   7 --
 drivers/xen/events/events_internal.h |  14 ++--
 drivers/xen/gntdev.c |  54 +--
 include/xen/grant_table.h|   7 ++
 include/xen/xenbus.h |   1 -
 10 files changed, 169 insertions(+), 81 deletions(-)

Jan Beulich (4):
  Xen: drop exports of {set,clear}_foreign_p2m_mapping()
  Xen/gntdev: don't needlessly allocate k{,un}map_ops[]
  Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}
  Xen/gntdev: don't needlessly use kvcalloc()

Juergen Gross (3):
  xen/events: reset affinity of 2-level event when tearing it down
  xen/events: don't unmask an event channel when an eoi is pending
  xen/events: avoid handling the same event on two cpus at the same time


[PATCH v7 06/14] x86/alternative: support ALTERNATIVE_TERNARY

2021-03-11 Thread Juergen Gross
Add ALTERNATIVE_TERNARY support for replacing an initial instruction
with either of two instructions depending on a feature:

  ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
  "feature_on_instr", "feature_off_instr"

which will start with "default_instr" and at patch time will, depending
on FEATURE_NR being set or not, patch that with either
"feature_on_instr" or "feature_off_instr".

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- new patch
V4:
- use X86_FEATURE_ALWAYS instead of negated feature (Boris Petkov)
- unfortunately this isn't enough to get rid of the "not feature"
  support, as this is needed in the patch "x86/paravirt: switch
  functions with custom code to ALTERNATIVE", too
V5:
- carve out the "not feature" part
V7:
- rename parameter names (Boris Petkov)
---
 arch/x86/include/asm/alternative.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 649e56f70889..a044e59cbdf5 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -179,6 +179,10 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+   ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS,\
+ newinstr_yes, feature)
+
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3)
\
".pushsection .altinstructions,\"a\"\n" 
\
@@ -210,6 +214,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, 
newinstr2, feature2) ::: "memory")
 
+#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
+   asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, 
newinstr_yes, newinstr_no) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
@@ -380,6 +387,10 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
.popsection
 .endm
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+   ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,\
+   newinstr_yes, feature
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
-- 
2.26.2



[PATCH v7 05/14] x86/alternative: support not-feature

2021-03-11 Thread Juergen Gross
Add support for alternative patching for the case a feature is not
present on the current cpu.

For users of ALTERNATIVE() and friends an inverted feature is specified
by applying the ALT_NOT() macro to it, e.g.:

ALTERNATIVE(old, new, ALT_NOT(feature))

Signed-off-by: Juergen Gross 
---
V5:
- split off from next patch
- reworked to use flag byte (Boris Petkov)
V6:
- rework again to not use flag byte (Boris Petkov)
V7:
- minor tweaks: move defines up, comment wording (Boris Petkov)
---
 arch/x86/include/asm/alternative.h |  3 +++
 arch/x86/kernel/alternative.c  | 20 +++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 53f295f41c34..649e56f70889 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 
+#define ALTINSTR_FLAG_INV  (1 << 15)
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
 #ifndef __ASSEMBLY__
 
 #include 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d..133b549dc091 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -388,21 +388,31 @@ void __init_or_module noinline apply_alternatives(struct 
alt_instr *start,
 */
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+   /* Mask away "NOT" flag bit for feature to test. */
+   u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
 
instr = (u8 *)>instr_offset + a->instr_offset;
replacement = (u8 *)>repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
-   BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-   if (!boot_cpu_has(a->cpuid)) {
+   BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
+
+   /*
+* Patch if either:
+* - feature is present
+* - feature not present but ALTINSTR_FLAG_INV is set to mean,
+*   patch if feature is *NOT* present.
+*/
+   if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
if (a->padlen > 1)
optimize_nops(a, instr);
 
continue;
}
 
-   DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, 
len: %d), pad: %d",
-   a->cpuid >> 5,
-   a->cpuid & 0x1f,
+   DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: 
(%px, len: %d), pad: %d",
+   (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+   feature >> 5,
+   feature & 0x1f,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
 
-- 
2.26.2



[PATCH v7 00/14] x86: major paravirt cleanup

2021-03-11 Thread Juergen Gross
This is a major cleanup of the paravirt infrastructure aiming at
eliminating all custom code patching via paravirt patching.

This is achieved by using ALTERNATIVE instead, leading to the ability
to give objtool access to the patched in instructions.

In order to remove most of the 32-bit special handling from pvops the
time related operations are switched to use static_call() instead.

At the end of this series all paravirt patching has to do is to
replace indirect calls with direct ones. In a further step this could
be switched to static_call(), too.

Changes in V7:
- dropped patch 3, as already applied on tip tree
- new patch 3 (patches 1 and 7 have been added to V6 late)
- addressed comments by Boris

Changes in V6:
- switched back to "not" bit in feature value for "not feature"
- other minor comments addressed

Changes in V5:
- patches 1-5 of V4 dropped, as already applied
- new patches 1+3
- fixed patch 2
- split V4 patch 8 into patches 4+5
- use flag byte instead of negative feature bit for "not feature"

Changes in V4:
- fixed several build failures
- removed objtool patch, as objtool patches are in tip now
- added patch 1 for making usage of static_call easier
- even more cleanup

Changes in V3:
- added patches 7 and 12
- addressed all comments

Changes in V2:
- added patches 5-12

Juergen Gross (14):
  x86/alternative: merge include files
  static_call: move struct static_call_key definition to
static_call_types.h
  static_call: add function to query current function
  x86/paravirt: switch time pvops functions to use static_call()
  x86/alternative: support not-feature
  x86/alternative: support ALTERNATIVE_TERNARY
  x86/alternative: don't open code ALTERNATIVE_TERNARY() in
_static_cpu_has()
  x86: add new features for paravirt patching
  x86/paravirt: remove no longer needed 32-bit pvops cruft
  x86/paravirt: simplify paravirt macros
  x86/paravirt: switch iret pvops to ALTERNATIVE
  x86/paravirt: add new macros PVOP_ALT* supporting pvops in
ALTERNATIVEs
  x86/paravirt: switch functions with custom code to ALTERNATIVE
  x86/paravirt: have only one paravirt patch function

 arch/arm/include/asm/paravirt.h  |  14 +-
 arch/arm/kernel/paravirt.c   |   9 +-
 arch/arm64/include/asm/paravirt.h|  14 +-
 arch/arm64/kernel/paravirt.c |  13 +-
 arch/x86/Kconfig |   1 +
 arch/x86/entry/entry_32.S|   6 +-
 arch/x86/entry/entry_64.S|   2 +-
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +-
 arch/x86/include/asm/alternative-asm.h   | 114 
 arch/x86/include/asm/alternative.h   | 126 +-
 arch/x86/include/asm/cpufeature.h|  41 +
 arch/x86/include/asm/cpufeatures.h   |   2 +
 arch/x86/include/asm/irqflags.h  |   7 +-
 arch/x86/include/asm/mshyperv.h  |   2 +-
 arch/x86/include/asm/nospec-branch.h |   1 -
 arch/x86/include/asm/paravirt.h  | 167 --
 arch/x86/include/asm/paravirt_types.h| 210 +--
 arch/x86/include/asm/smap.h  |   5 +-
 arch/x86/kernel/Makefile |   3 +-
 arch/x86/kernel/alternative.c|  52 +-
 arch/x86/kernel/asm-offsets.c|   7 -
 arch/x86/kernel/cpu/vmware.c |   5 +-
 arch/x86/kernel/kvm.c|   2 +-
 arch/x86/kernel/kvmclock.c   |   2 +-
 arch/x86/kernel/paravirt-spinlocks.c |   9 +
 arch/x86/kernel/paravirt.c   |  75 ++--
 arch/x86/kernel/paravirt_patch.c |  99 ---
 arch/x86/kernel/tsc.c|   3 +-
 arch/x86/lib/atomic64_386_32.S   |   2 +-
 arch/x86/lib/atomic64_cx8_32.S   |   2 +-
 arch/x86/lib/copy_page_64.S  |   2 +-
 arch/x86/lib/copy_user_64.S  |   2 +-
 arch/x86/lib/memcpy_64.S |   2 +-
 arch/x86/lib/memmove_64.S|   2 +-
 arch/x86/lib/memset_64.S |   2 +-
 arch/x86/lib/retpoline.S |   2 +-
 arch/x86/xen/enlighten_pv.c  |   4 +-
 arch/x86/xen/time.c  |  26 +--
 drivers/xen/time.c   |   3 +-
 include/linux/static_call.h  |  26 +--
 include/linux/static_call_types.h|  18 ++
 tools/include/linux/static_call_types.h  |  18 ++
 42 files changed, 473 insertions(+), 631 deletions(-)
 delete mode 100644 arch/x86/include/asm/alternative-asm.h
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

-- 
2.26.2



[PATCH v7 04/14] x86/paravirt: switch time pvops functions to use static_call()

2021-03-11 Thread Juergen Gross
The time pvops functions are the only ones left which might be
used in 32-bit mode and which return a 64-bit value.

Switch them to use the static_call() mechanism instead of pvops, as
this allows quite some simplification of the pvops implementation.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V4:
- drop paravirt_time.h again
- don't move Hyper-V code (Michael Kelley)
V5:
- drop no longer needed Hyper-V modification (Michael Kelley)
- switch Arm and Arm64 to static_call(), too (kernel test robot)
V6:
- factor out common parts in Xen pv/pvh initialization (Boris Petkov)
V7:
- use new function static_call_query()
---
 arch/arm/include/asm/paravirt.h   | 14 +-
 arch/arm/kernel/paravirt.c|  9 +++--
 arch/arm64/include/asm/paravirt.h | 14 +-
 arch/arm64/kernel/paravirt.c  | 13 +
 arch/x86/Kconfig  |  1 +
 arch/x86/include/asm/mshyperv.h   |  2 +-
 arch/x86/include/asm/paravirt.h   | 15 ---
 arch/x86/include/asm/paravirt_types.h |  6 --
 arch/x86/kernel/cpu/vmware.c  |  5 +++--
 arch/x86/kernel/kvm.c |  2 +-
 arch/x86/kernel/kvmclock.c|  2 +-
 arch/x86/kernel/paravirt.c| 13 +
 arch/x86/kernel/tsc.c |  3 ++-
 arch/x86/xen/time.c   | 26 +-
 drivers/xen/time.c|  3 ++-
 15 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index cdbf02d9c1d4..95d5b0d625cd 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 #endif
 
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 4cfed91fe256..7dd9806369fb 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -9,10 +9,15 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
diff --git a/arch/arm64/include/asm/paravirt.h 
b/arch/arm64/include/asm/paravirt.h
index cf3a0fd7c1a7..9aa193e0e8f2 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 
 int __init pv_time_init(void);
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index c07d7a034941..75fed4460407 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -26,8 +27,12 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 /* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
 {
struct pv_time_stolen_time_region *reg;
 
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
if (ret)
return ret;
 
-   pv_ops.time.steal_clock = pv_steal_clock;
+   static_call_update(pv_steal_clock, para_steal_clock);
 
static_key_slow_inc(_ste

[PATCH v7 14/14] x86/paravirt: have only one paravirt patch function

2021-03-11 Thread Juergen Gross
There is no need any longer to have different paravirt patch functions
for native and Xen. Eliminate native_patch() and rename
paravirt_patch_default() to paravirt_patch().

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- remove paravirt_patch_insns() (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 19 +--
 arch/x86/kernel/Makefile  |  3 +--
 arch/x86/kernel/alternative.c |  2 +-
 arch/x86/kernel/paravirt.c| 20 ++--
 arch/x86/kernel/paravirt_patch.c  | 11 ---
 arch/x86/xen/enlighten_pv.c   |  1 -
 6 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 588ff14ce969..9d1ddb7b4350 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -68,19 +68,6 @@ struct pv_info {
const char *name;
 };
 
-struct pv_init_ops {
-   /*
-* Patch may replace one of the defined code sequences with
-* arbitrary code, subject to the same register constraints.
-* This generally means the code is not free to clobber any
-* registers other than EAX.  The patch function should return
-* the number of bytes of code generated, as we nop pad the
-* rest in generic code.
-*/
-   unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -276,7 +263,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-   struct pv_init_ops  init;
struct pv_cpu_ops   cpu;
struct pv_irq_ops   irq;
struct pv_mmu_ops   mmu;
@@ -317,10 +303,7 @@ extern void (*paravirt_iret)(void);
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, 
unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char 
*start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned 
len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
 
 int paravirt_disable_iospace(void);
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf08351f0b..0704c2a94272 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o   
:= n
 KCSAN_SANITIZE := n
 
 OBJECT_FILES_NON_STANDARD_test_nx.o:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB)+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 76ad4ce454c0..f810e6fececd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -616,7 +616,7 @@ void __init_or_module apply_paravirt(struct 
paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insn_buff, p->instr, p->len);
-   used = pv_ops.init.patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
+   used = paravirt_patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
 
BUG_ON(used > p->len);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 855ae08a05a1..d0730264786b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -99,8 +99,8 @@ void __init native_pv_lock_init(void)
static_branch_disable(_spin_lock_key);
 }
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff,
-   unsigned long addr, unsigned len)
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
+   unsigned int len)
 {
/*
 * Neat trick to map patch type back to the call within the
@@ -121,19 +121,6 @@ unsigned paravirt_patch_default(

[PATCH v7 13/14] x86/paravirt: switch functions with custom code to ALTERNATIVE

2021-03-11 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences use
ALTERNATIVE for the functions with custom code replacements.

Instead of patching an ud2 instruction for unpopulated vector entries
into the caller site, use a simple function just calling BUG() as a
replacement.

Simplify the register defines for assembler paravirt calling, as there
isn't much usage left.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V4:
- fixed SAVE_FLAGS() (kernel test robot)
- added assembler paravirt cleanup
---
 arch/x86/entry/entry_64.S |   2 +-
 arch/x86/include/asm/irqflags.h   |   2 +-
 arch/x86/include/asm/paravirt.h   | 101 +-
 arch/x86/include/asm/paravirt_types.h |   6 --
 arch/x86/kernel/paravirt.c|  16 ++--
 arch/x86/kernel/paravirt_patch.c  |  88 --
 6 files changed, 58 insertions(+), 157 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 400908dff42e..12e2e3cd58be 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
pushq %rax
-   SAVE_FLAGS(CLBR_RAX)
+   SAVE_FLAGS
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a0efbcd24b86..c5ce9845c999 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -111,7 +111,7 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)  pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
 #endif
 
 #define INTERRUPT_RETURN   jmp native_iret
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 913acf7a0ebf..43992e5c52c2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -135,7 +135,9 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-   return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+   return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+   "mov %%cr2, %%rax;",
+   ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr2(unsigned long x)
@@ -145,12 +147,14 @@ static inline void write_cr2(unsigned long x)
 
 static inline unsigned long __read_cr3(void)
 {
-   return PVOP_CALL0(unsigned long, mmu.read_cr3);
+   return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_VCALL1(mmu.write_cr3, x);
+   PVOP_ALT_VCALL1(mmu.write_cr3, x,
+   "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -170,7 +174,7 @@ static inline void halt(void)
 
 static inline void wbinvd(void)
 {
-   PVOP_VCALL0(cpu.wbinvd);
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -384,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-   return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+   return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-   return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+   return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-   return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+   return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-   return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+   return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -432,12 +442,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 
 static inline pmd_t __pmd(pmdval_t val)
 {
-   return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+   return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax",
+

[PATCH v7 10/14] x86/paravirt: simplify paravirt macros

2021-03-11 Thread Juergen Gross
The central pvops call macros PVOP_CALL() and PVOP_VCALL() are
looking very similar now.

The main differences are using PVOP_VCALL_ARGS or PVOP_CALL_ARGS, which
are identical, and the return value handling.

So drop PVOP_VCALL_ARGS and instead of PVOP_VCALL() just use
(void)PVOP_CALL(long, ...).

Note that it isn't easily possible to just redefine PVOP_VCALL()
to use PVOP_CALL() instead, as this would require further hiding of
commas in macro parameters.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- new patch
V4:
- fix build warnings with clang (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 41 ---
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 42f9eef84131..45bd21647dd8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -408,11 +408,9 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS
\
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "c" ((unsigned long)(x))
@@ -428,12 +426,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS\
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "d" ((unsigned long)(x))
@@ -458,59 +454,46 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)  \
+#define PVOP_RETVAL(rettype)   \
({  unsigned long __mask = ~0UL;\
+   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
switch (sizeof(rettype)) {  \
case 1: __mask =   0xffUL; break;   \
case 2: __mask = 0xUL; break;   \
case 4: __mask = 0xUL; break;   \
default: break; \
}   \
-   __mask; \
+   __mask & __eax; \
})
 
 
-#define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, ...)   \
+#define PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)   \
({  \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
-   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
asm volatile(paravirt_alt(PARAVIRT_CALL)\
 : call_clbr, ASM_CALL_CONSTRAINT   \
 : paravirt_type(op),   \
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   (rettype)(__eax & PVOP_RETMASK(rettype));   \
+   ret;\
})
 
 #define __PVOP_CALL(rettype, op, ...)  \
-   PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,\
- EXTRA_CLOBBERS, ##__VA_ARGS__)
+   PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
-   PVOP_CALL(rettype, op.func, CLBR_RET_REG,   \
+   PVOP_CALL(PVOP_RETVAL(rettype), op.func, 

[PATCH v7 09/14] x86/paravirt: remove no longer needed 32-bit pvops cruft

2021-03-11 Thread Juergen Gross
PVOP_VCALL4() is only used for Xen PV, while PVOP_CALL4() isn't used
at all. Keep PVOP_CALL4() for 64 bits due to symmetry reasons.

This allows to remove the 32-bit definitions of those macros leading
to a substantial simplification of the paravirt macros, as those were
the only ones needing non-empty "pre" and "post" parameters.

PVOP_CALLEE2() and PVOP_VCALLEE2() are used nowhere, so remove them.

Another no longer needed case is special handling of return types
larger than unsigned long. Replace that with a BUILD_BUG_ON().

DISABLE_INTERRUPTS() is used in 32-bit code only, so it can just be
replaced by cli.

INTERRUPT_RETURN in 32-bit code can be replaced by iret.

ENABLE_INTERRUPTS is used nowhere, so it can be removed.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
 arch/x86/entry/entry_32.S |   4 +-
 arch/x86/include/asm/irqflags.h   |   5 --
 arch/x86/include/asm/paravirt.h   |  35 +---
 arch/x86/include/asm/paravirt_types.h | 112 --
 arch/x86/kernel/asm-offsets.c |   2 -
 5 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4e079f250962..96f084868ec7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -430,7 +430,7 @@
 * will soon execute iret and the tracer was already set to
 * the irqstate after the IRET:
 */
-   DISABLE_INTERRUPTS(CLBR_ANY)
+   cli
lss (%esp), %esp/* switch to espfix segment */
 .Lend_\@:
 #endif /* CONFIG_X86_ESPFIX32 */
@@ -1077,7 +1077,7 @@ restore_all_switch_stack:
 * when returning from IPI handler and when returning from
 * scheduler to user-space.
 */
-   INTERRUPT_RETURN
+   iret
 
 .section .fixup, "ax"
 SYM_CODE_START(asm_iret_error)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70ea4393..a0efbcd24b86 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,9 +109,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)   sti
-#define DISABLE_INTERRUPTS(x)  cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(x)  pushfq; popq %rax
@@ -119,8 +116,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #define INTERRUPT_RETURN   jmp native_iret
 
-#else
-#define INTERRUPT_RETURN   iret
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index def450f46097..a780509186bd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -719,6 +719,7 @@ extern void default_banner(void);
.if ((~(set)) & mask); pop %reg; .endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT_XXL
 
 #define PV_SAVE_REGS(set)  \
COND_PUSH(set, CLBR_RAX, rax);  \
@@ -744,46 +745,12 @@ extern void default_banner(void);
 #define PARA_PATCH(off)((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)  \
-   COND_PUSH(set, CLBR_EAX, eax);  \
-   COND_PUSH(set, CLBR_EDI, edi);  \
-   COND_PUSH(set, CLBR_ECX, ecx);  \
-   COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)   \
-   COND_POP(set, CLBR_EDX, edx);   \
-   COND_POP(set, CLBR_ECX, ecx);   \
-   COND_POP(set, CLBR_EDI, edi);   \
-   COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)((off) / 4)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)*%cs:addr
-#endif
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define INTERRUPT_RETURN   \
PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
  ANNOTATE_RETPOLINE_SAFE;  \
  jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
 
-#define DISABLE_INTERRUPTS(clobbers)   \
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),   \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable);\
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)\
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable),\
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA

[PATCH v7 12/14] x86/paravirt: add new macros PVOP_ALT* supporting pvops in ALTERNATIVEs

2021-03-11 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences add
support for using ALTERNATIVE handling combined with paravirt call
patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- drop PVOP_ALT_VCALL() macro
---
 arch/x86/include/asm/paravirt_types.h | 49 ++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 0afdac83f926..0ed976286d49 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -477,44 +477,91 @@ int paravirt_disable_iospace(void);
ret;\
})
 
+#define PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...)  \
+   ({  \
+   PVOP_CALL_ARGS; \
+   PVOP_TEST_NULL(op); \
+   asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),   \
+alt, cond) \
+: call_clbr, ASM_CALL_CONSTRAINT   \
+: paravirt_type(op),   \
+  paravirt_clobber(clbr),  \
+  ##__VA_ARGS__\
+: "memory", "cc" extra_clbr);  \
+   ret;\
+   })
+
 #define __PVOP_CALL(rettype, op, ...)  \
PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)   \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,   \
+ ##__VA_ARGS__)
+
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,  \
  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
 #define __PVOP_VCALL(op, ...)  \
(void)PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,\
   VEXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)   \
+   (void)PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,  \
+   PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,   \
+   ##__VA_ARGS__)
+
 #define __PVOP_VCALLEESAVE(op, ...)\
(void)PVOP_CALL(, op.func, CLBR_RET_REG,\
- PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+   (void)PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)
\
__PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)
\
__PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+   __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+   __PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)  \
__PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)  \
__PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)   \
+   __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond)   \
+   __PVOP_ALT_VCALLEESAVE(op, alt, cond)
 
 
 #define PVOP_CALL1(rettype, op, arg1)  \
__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)  \
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond)   \
+   __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLE

[PATCH v7 11/14] x86/paravirt: switch iret pvops to ALTERNATIVE

2021-03-11 Thread Juergen Gross
The iret paravirt op is rather special as it is using a jmp instead
of a call instruction. Switch it to ALTERNATIVE.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- use ALTERNATIVE_TERNARY
---
 arch/x86/include/asm/paravirt.h   |  6 +++---
 arch/x86/include/asm/paravirt_types.h |  5 +
 arch/x86/kernel/asm-offsets.c |  5 -
 arch/x86/kernel/paravirt.c| 26 ++
 arch/x86/xen/enlighten_pv.c   |  3 +--
 5 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a780509186bd..913acf7a0ebf 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -747,9 +747,9 @@ extern void default_banner(void);
 #define PARA_INDIRECT(addr)*addr(%rip)
 
 #define INTERRUPT_RETURN   \
-   PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
- ANNOTATE_RETPOLINE_SAFE;  \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
+   ANNOTATE_RETPOLINE_SAFE;\
+   ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);",\
+   X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)\
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 45bd21647dd8..0afdac83f926 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -151,10 +151,6 @@ struct pv_cpu_ops {
 
u64 (*read_pmc)(int counter);
 
-   /* Normal iret.  Jump to this with the standard iret stack
-  frame set up. */
-   void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -294,6 +290,7 @@ struct paravirt_patch_template {
 
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
 
 #define PARAVIRT_PATCH(x)  \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 736508004b30..ecd3fd6993d1 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -61,11 +61,6 @@ static void __used common(void)
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
 
-#ifdef CONFIG_PARAVIRT_XXL
-   BLANK();
-   OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
 #ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a688edf35e31..9b0f568b0200 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -86,25 +86,6 @@ u64 notrace _paravirt_ident_64(u64 x)
 {
return x;
 }
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
-  unsigned long addr, unsigned len)
-{
-   struct branch *b = insn_buff;
-   unsigned long delta = (unsigned long)target - (addr+5);
-
-   if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-   WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void 
*)addr);
-#endif
-   return len; /* call too long for patch site */
-   }
-
-   b->opcode = 0xe9;   /* jmp */
-   b->delta = delta;
-
-   return 5;
-}
 #endif
 
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -136,9 +117,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insn_buff, len);
 
-   else if (type == PARAVIRT_PATCH(cpu.iret))
-   /* If operation requires a jmp, then jmp */
-   ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
 #endif
else
/* Otherwise call the function. */
@@ -313,8 +291,6 @@ struct paravirt_patch_template pv_ops = {
 
.cpu.load_sp0   = native_load_sp0,
 
-   .cpu.iret   = native_iret,
-
 #ifdef CONFIG_X86_IOPL_IOPERM
.cpu.invalidate_io_bitmap   = native_tss_invalidate_io_bitmap,
.cpu.update_io_bitmap   = native_tss_update_io_bitmap,
@@ -419,6 +395,8 @@ struct paravirt_patch_template pv_ops = {
 NOKPROBE_SYMBOL(native_get_debugreg);
 NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
+
+void (*paravirt_iret)(void) = native_iret;
 #endif
 
 EXPORT_SYMBOL(pv_ops);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index dc0a337f985b..08dca7bebb30 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlight

[PATCH v7 07/14] x86/alternative: don't open code ALTERNATIVE_TERNARY() in _static_cpu_has()

2021-03-11 Thread Juergen Gross
_static_cpu_has() contains a completely open coded version of
ALTERNATIVE_TERNARY(). Replace that with the macro instead.

Signed-off-by: Juergen Gross 
---
V6:
- new patch
V7:
- moved patch earlier in series (Boris Petkov)
---
 arch/x86/include/asm/cpufeature.h | 41 +++
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 1728d4ce5730..16a51e7288d5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 enum cpuid_leafs
 {
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned 
int bit);
  */
 static __always_inline bool _static_cpu_has(u16 bit)
 {
-   asm_volatile_goto("1: jmp 6f\n"
-"2:\n"
-".skip -(((5f-4f) - (2b-1b)) > 0) * "
-"((5f-4f) - (2b-1b)),0x90\n"
-"3:\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 4f - .\n"  /* repl offset */
-" .word %P[always]\n"  /* always replace */
-" .byte 3b - 1b\n" /* src len */
-" .byte 5f - 4f\n" /* repl len */
-" .byte 3b - 2b\n" /* pad len */
-".previous\n"
-".section .altinstr_replacement,\"ax\"\n"
-"4: jmp %l[t_no]\n"
-"5:\n"
-".previous\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 0\n"   /* no replacement */
-" .word %P[feature]\n" /* feature bit */
-" .byte 3b - 1b\n" /* src len */
-" .byte 0\n"   /* repl len */
-" .byte 0\n"   /* pad len */
-".previous\n"
-".section .altinstr_aux,\"ax\"\n"
-"6:\n"
-" testb %[bitnum],%[cap_byte]\n"
-" jnz %l[t_yes]\n"
-" jmp %l[t_no]\n"
-".previous\n"
+   asm_volatile_goto(
+   ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+   ".section .altinstr_aux,\"ax\"\n"
+   "6:\n"
+   " testb %[bitnum],%[cap_byte]\n"
+   " jnz %l[t_yes]\n"
+   " jmp %l[t_no]\n"
+   ".previous\n"
 : : [feature]  "i" (bit),
-[always]   "i" (X86_FEATURE_ALWAYS),
 [bitnum]   "i" (1 << (bit & 7)),
 [cap_byte] "m" (((const char 
*)boot_cpu_data.x86_capability)[bit >> 3])
 : : t_yes, t_no);
-- 
2.26.2



[PATCH v7 03/14] static_call: add function to query current function

2021-03-11 Thread Juergen Gross
Some users of paravirtualized functions need to query which function
has been specified in a pv_ops vector element. In order to be able to
switch such paravirtualized functions to static_calls instead, there
needs to be a function to query the function which will be called via
static_call().

Signed-off-by: Juergen Gross 
---
V7:
- new patch
---
 include/linux/static_call.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 76b881259144..e01b61ab86b1 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -20,6 +20,7 @@
  *   static_call(name)(args...);
  *   static_call_cond(name)(args...);
  *   static_call_update(name, func);
+ *   static_call_query(name);
  *
  * Usage example:
  *
@@ -91,6 +92,10 @@
  *
  *   which will include the required value tests to avoid NULL-pointer
  *   dereferences.
+ *
+ *   To query which function is currently set to be called, use:
+ *
+ *   func = static_call_query(name);
  */
 
 #include 
@@ -118,6 +123,8 @@ extern void arch_static_call_transform(void *site, void 
*tramp, void *func, bool
 STATIC_CALL_TRAMP_ADDR(name), func);   \
 })
 
+#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
+
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
 
 extern int __init static_call_init(void);
@@ -191,6 +198,7 @@ static inline int static_call_init(void) { return 0; }
};  \
ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
+
 #define static_call_cond(name) (void)__static_call(name)
 
 static inline
-- 
2.26.2



[PATCH v7 01/14] x86/alternative: merge include files

2021-03-11 Thread Juergen Gross
Merge arch/x86/include/asm/alternative-asm.h into
arch/x86/include/asm/alternative.h in order to make it easier to use
common definitions later.

Signed-off-by: Juergen Gross 
---
V6:
- new patch
V7:
- moved to begin of series (Boris Petkov)
---
 arch/x86/entry/entry_32.S|   2 +-
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +-
 arch/x86/include/asm/alternative-asm.h   | 114 ---
 arch/x86/include/asm/alternative.h   | 112 +-
 arch/x86/include/asm/nospec-branch.h |   1 -
 arch/x86/include/asm/smap.h  |   5 +-
 arch/x86/lib/atomic64_386_32.S   |   2 +-
 arch/x86/lib/atomic64_cx8_32.S   |   2 +-
 arch/x86/lib/copy_page_64.S  |   2 +-
 arch/x86/lib/copy_user_64.S  |   2 +-
 arch/x86/lib/memcpy_64.S |   2 +-
 arch/x86/lib/memmove_64.S|   2 +-
 arch/x86/lib/memset_64.S |   2 +-
 arch/x86/lib/retpoline.S |   2 +-
 14 files changed, 120 insertions(+), 132 deletions(-)
 delete mode 100644 arch/x86/include/asm/alternative-asm.h

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017e6161..4e079f250962 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S 
b/arch/x86/entry/vdso/vdso32/system_call.S
index de1fff7188aa..d6a6080bade0 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -6,7 +6,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
.text
.globl __kernel_vsyscall
diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 464034db299f..
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include 
-
-#ifdef CONFIG_SMP
-   .macro LOCK_PREFIX
-672:   lock
-   .pushsection .smp_locks,"a"
-   .balign 4
-   .long 672b - .
-   .popsection
-   .endm
-#else
-   .macro LOCK_PREFIX
-   .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
-   .Lannotate_\@:
-   .pushsection .discard.ignore_alts
-   .long .Lannotate_\@ - .
-   .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
-   .long \orig - .
-   .long \alt - .
-   .word \feature
-   .byte \orig_len
-   .byte \alt_len
-   .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
-   \oldinstr
-141:
-   .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
-   .pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
-   .popsection
-
-   .pushsection .altinstr_replacement,"ax"
-143:
-   \newinstr
-144:
-   .popsection
-.endm
-
-#define old_len141b-140b
-#define new_len1   144f-143f
-#define new_len2   145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)((a) ^ (((a) ^ (b)) & -(-((a) < (b)
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
-   \oldinstr
-141:
-   .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
-   (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
-   .pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-   altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
-   .popsection
-
-   .pushsectio

[PATCH v7 08/14] x86: add new features for paravirt patching

2021-03-11 Thread Juergen Gross
For being able to switch paravirt patching from special cased custom
code sequences to ALTERNATIVE handling some X86_FEATURE_* are needed
as new features. This enables to have the standard indirect pv call
as the default code and to patch that with the non-Xen custom code
sequence via ALTERNATIVE patching later.

Make sure paravirt patching is performed before alternative patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- add comment (Boris Petkov)
- no negative features (Boris Petkov)
V4:
- move paravirt_set_cap() to paravirt-spinlocks.c
---
 arch/x86/include/asm/cpufeatures.h   |  2 ++
 arch/x86/include/asm/paravirt.h  | 10 ++
 arch/x86/kernel/alternative.c| 30 ++--
 arch/x86/kernel/paravirt-spinlocks.c |  9 +
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..b440c950246d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table 
access-dirty bit */
 #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports 
the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL( 8*32+19) /* "" VMware prefers 
VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK   ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT( 8*32+21) /* "" PV 
vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6408fd0f55ab..def450f46097 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -45,6 +45,10 @@ static inline u64 paravirt_steal_clock(int cpu)
return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
@@ -809,5 +813,11 @@ static inline void paravirt_arch_exit_mmap(struct 
mm_struct *mm)
 {
 }
 #endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 133b549dc091..76ad4ce454c0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 int __read_mostly alternatives_patched;
 
@@ -733,6 +734,33 @@ void __init alternative_instructions(void)
 * patching.
 */
 
+   /*
+* Paravirt patching and alternative patching can be combined to
+* replace a function call with a short direct code sequence (e.g.
+* by setting a constant return value instead of doing that in an
+* external function).
+* In order to make this work the following sequence is required:
+* 1. set (artificial) features depending on used paravirt
+*functions which can later influence alternative patching
+* 2. apply paravirt patching (generally replacing an indirect
+*function call with a direct one)
+* 3. apply alternative patching (e.g. replacing a direct function
+*call with a custom code sequence)
+* Doing paravirt patching after alternative patching would clobber
+* the optimization of the custom code with a function call again.
+*/
+   paravirt_set_cap();
+
+   /*
+* First patch paravirt functions, such that we overwrite the indirect
+* call with the direct call.
+*/
+   apply_paravirt(__parainstructions, __parainstructions_end);
+
+   /*
+* Then patch alternatives, such that those paravirt calls that are in
+* alternatives can be overwritten by their immediate fragments.
+*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 #ifdef CONFIG_SMP
@@ -751,8 +779,6 @@ void __init alternative_instructions(void)
}
 #endif
 
-   apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
 }
diff --git a/arch/x86/kernel/paravirt-spinlocks.c 
b/arch/x86/kernel/paravirt-spinlocks.c
index 4f75d0cf6305..9e1ea99ad9df 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void)
return pv_ops.lock.vcpu_is_preempted.func ==
__raw_callee_save___native_vcpu_is_preempted;
 }
+
+void __init paravirt_set_cap(void)

[PATCH v7 02/14] static_call: move struct static_call_key definition to static_call_types.h

2021-03-11 Thread Juergen Gross
Having the definition of static_call() in static_call_types.h makes
no sense as long struct static_call_key isn't defined there, as the
generic implementation of static_call() is referencing this structure.

So move the definition of struct static_call_key to static_call_types.h.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V5:
- new patch
---
 include/linux/static_call.h | 18 --
 include/linux/static_call_types.h   | 18 ++
 tools/include/linux/static_call_types.h | 18 ++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 85ecc789f4ff..76b881259144 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -128,16 +128,6 @@ struct static_call_mod {
struct static_call_site *sites;
 };
 
-struct static_call_key {
-   void *func;
-   union {
-   /* bit 0: 0 = mods, 1 = sites */
-   unsigned long type;
-   struct static_call_mod *mods;
-   struct static_call_site *sites;
-   };
-};
-
 /* For finding the key associated with a trampoline */
 struct static_call_tramp_key {
s32 tramp;
@@ -187,10 +177,6 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 #define __DEFINE_STATIC_CALL(name, _func, _func_init)  \
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
@@ -243,10 +229,6 @@ static inline long __static_call_return0(void)
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 static inline long __static_call_return0(void)
 {
return 0;
diff --git a/include/linux/static_call_types.h 
b/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
diff --git a/tools/include/linux/static_call_types.h 
b/tools/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
-- 
2.26.2



[tip: x86/alternatives] x86/alternative: Drop unused feature parameter from ALTINSTR_REPLACEMENT()

2021-03-09 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the x86/alternatives branch of tip:

Commit-ID: db16e07269c2b4346e4332e43f04e447ef14fd2f
Gitweb:
https://git.kernel.org/tip/db16e07269c2b4346e4332e43f04e447ef14fd2f
Author:Juergen Gross 
AuthorDate:Tue, 09 Mar 2021 14:48:04 +01:00
Committer: Borislav Petkov 
CommitterDate: Tue, 09 Mar 2021 20:08:28 +01:00

x86/alternative: Drop unused feature parameter from ALTINSTR_REPLACEMENT()

The macro ALTINSTR_REPLACEMENT() doesn't make use of the feature
parameter, so drop it.

Signed-off-by: Juergen Gross 
Signed-off-by: Borislav Petkov 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210309134813.23912-4-jgr...@suse.com
---
 arch/x86/include/asm/alternative.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 13adca3..5753fb2 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
" .byte " alt_rlen(num) "\n"/* replacement len */ \
" .byte " alt_pad_len "\n"  /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */   
\
+#define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */   
\
"# ALT: replacement " #num "\n" 
\
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
 
@@ -161,7 +161,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature, 1)  \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr, feature, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr, 1)   \
".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,8 +171,8 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)\
-   ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)\
+   ALTINSTR_REPLACEMENT(newinstr1, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
@@ -183,9 +183,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feat3, 3)
\
".popsection\n" 
\
".pushsection .altinstr_replacement, \"ax\"\n"  
\
-   ALTINSTR_REPLACEMENT(newinsn1, feat1, 1)
\
-   ALTINSTR_REPLACEMENT(newinsn2, feat2, 2)
\
-   ALTINSTR_REPLACEMENT(newinsn3, feat3, 3)
\
+   ALTINSTR_REPLACEMENT(newinsn1, 1)   
\
+   ALTINSTR_REPLACEMENT(newinsn2, 2)   
\
+   ALTINSTR_REPLACEMENT(newinsn3, 3)   
\
".popsection\n"
 
 /*


[PATCH v6 13/12] x86/alternative: merge include files

2021-03-09 Thread Juergen Gross
Merge arch/x86/include/asm/alternative-asm.h into
arch/x86/include/asm/alternative.h in order to make it easier to use
common definitions later.

Signed-off-by: Juergen Gross 
---
 arch/x86/entry/entry_32.S|   2 +-
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +-
 arch/x86/include/asm/alternative-asm.h   | 121 ---
 arch/x86/include/asm/alternative.h   | 121 +--
 arch/x86/include/asm/nospec-branch.h |   1 -
 arch/x86/include/asm/smap.h  |   5 +-
 arch/x86/lib/atomic64_386_32.S   |   2 +-
 arch/x86/lib/atomic64_cx8_32.S   |   2 +-
 arch/x86/lib/copy_page_64.S  |   2 +-
 arch/x86/lib/copy_user_64.S  |   2 +-
 arch/x86/lib/memcpy_64.S |   2 +-
 arch/x86/lib/memmove_64.S|   2 +-
 arch/x86/lib/memset_64.S |   2 +-
 arch/x86/lib/retpoline.S |   2 +-
 14 files changed, 126 insertions(+), 142 deletions(-)
 delete mode 100644 arch/x86/include/asm/alternative-asm.h

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 765487e57d6e..96f084868ec7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S 
b/arch/x86/entry/vdso/vdso32/system_call.S
index de1fff7188aa..d6a6080bade0 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -6,7 +6,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
.text
.globl __kernel_vsyscall
diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 80bc6b533358..
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include 
-
-#define ALTINSTR_FLAG_INV  (1 << 15)
-#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
-
-#ifdef CONFIG_SMP
-   .macro LOCK_PREFIX
-672:   lock
-   .pushsection .smp_locks,"a"
-   .balign 4
-   .long 672b - .
-   .popsection
-   .endm
-#else
-   .macro LOCK_PREFIX
-   .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
-   .Lannotate_\@:
-   .pushsection .discard.ignore_alts
-   .long .Lannotate_\@ - .
-   .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
-   .long \orig - .
-   .long \alt - .
-   .word \feature
-   .byte \orig_len
-   .byte \alt_len
-   .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
-   \oldinstr
-141:
-   .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
-   .pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
-   .popsection
-
-   .pushsection .altinstr_replacement,"ax"
-143:
-   \newinstr
-144:
-   .popsection
-.endm
-
-#define old_len141b-140b
-#define new_len1   144f-143f
-#define new_len2   145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)((a) ^ (((a) ^ (b)) & -(-((a) < (b)
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
-   \oldinstr
-141:
-   .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
-   (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
-   .pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-   altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
-   .p

[PATCH v6 14/12] x86/alternative: don't open code ALTERNATIVE_TERNARY() in _static_cpu_has()

2021-03-09 Thread Juergen Gross
_static_cpu_has() contains a completely open coded version of
ALTERNATIVE_TERNARY(). Replace that with the macro instead.

Signed-off-by: Juergen Gross 
---
 arch/x86/include/asm/cpufeature.h | 41 +++
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 1728d4ce5730..16a51e7288d5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 enum cpuid_leafs
 {
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned 
int bit);
  */
 static __always_inline bool _static_cpu_has(u16 bit)
 {
-   asm_volatile_goto("1: jmp 6f\n"
-"2:\n"
-".skip -(((5f-4f) - (2b-1b)) > 0) * "
-"((5f-4f) - (2b-1b)),0x90\n"
-"3:\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 4f - .\n"  /* repl offset */
-" .word %P[always]\n"  /* always replace */
-" .byte 3b - 1b\n" /* src len */
-" .byte 5f - 4f\n" /* repl len */
-" .byte 3b - 2b\n" /* pad len */
-".previous\n"
-".section .altinstr_replacement,\"ax\"\n"
-"4: jmp %l[t_no]\n"
-"5:\n"
-".previous\n"
-".section .altinstructions,\"a\"\n"
-" .long 1b - .\n"  /* src offset */
-" .long 0\n"   /* no replacement */
-" .word %P[feature]\n" /* feature bit */
-" .byte 3b - 1b\n" /* src len */
-" .byte 0\n"   /* repl len */
-" .byte 0\n"   /* pad len */
-".previous\n"
-".section .altinstr_aux,\"ax\"\n"
-"6:\n"
-" testb %[bitnum],%[cap_byte]\n"
-" jnz %l[t_yes]\n"
-" jmp %l[t_no]\n"
-".previous\n"
+   asm_volatile_goto(
+   ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+   ".section .altinstr_aux,\"ax\"\n"
+   "6:\n"
+   " testb %[bitnum],%[cap_byte]\n"
+   " jnz %l[t_yes]\n"
+   " jmp %l[t_no]\n"
+   ".previous\n"
 : : [feature]  "i" (bit),
-[always]   "i" (X86_FEATURE_ALWAYS),
 [bitnum]   "i" (1 << (bit & 7)),
 [cap_byte] "m" (((const char 
*)boot_cpu_data.x86_capability)[bit >> 3])
 : : t_yes, t_no);
-- 
2.26.2



[PATCH v6 11/12] x86/paravirt: switch functions with custom code to ALTERNATIVE

2021-03-09 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences use
ALTERNATIVE for the functions with custom code replacements.

Instead of patching an ud2 instruction for unpopulated vector entries
into the caller site, use a simple function just calling BUG() as a
replacement.

Simplify the register defines for assembler paravirt calling, as there
isn't much usage left.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V4:
- fixed SAVE_FLAGS() (kernel test robot)
- added assembler paravirt cleanup
---
 arch/x86/entry/entry_64.S |   2 +-
 arch/x86/include/asm/irqflags.h   |   2 +-
 arch/x86/include/asm/paravirt.h   | 101 +-
 arch/x86/include/asm/paravirt_types.h |   6 --
 arch/x86/kernel/paravirt.c|  16 ++--
 arch/x86/kernel/paravirt_patch.c  |  88 --
 6 files changed, 58 insertions(+), 157 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 400908dff42e..12e2e3cd58be 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
pushq %rax
-   SAVE_FLAGS(CLBR_RAX)
+   SAVE_FLAGS
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a0efbcd24b86..c5ce9845c999 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -111,7 +111,7 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)  pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
 #endif
 
 #define INTERRUPT_RETURN   jmp native_iret
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 36cd71fa097f..b32b408958e8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -137,7 +137,9 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-   return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+   return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+   "mov %%cr2, %%rax;",
+   ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr2(unsigned long x)
@@ -147,12 +149,14 @@ static inline void write_cr2(unsigned long x)
 
 static inline unsigned long __read_cr3(void)
 {
-   return PVOP_CALL0(unsigned long, mmu.read_cr3);
+   return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_VCALL1(mmu.write_cr3, x);
+   PVOP_ALT_VCALL1(mmu.write_cr3, x,
+   "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -172,7 +176,7 @@ static inline void halt(void)
 
 static inline void wbinvd(void)
 {
-   PVOP_VCALL0(cpu.wbinvd);
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -386,22 +390,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-   return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+   return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-   return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+   return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-   return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+   return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-   return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+   return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+   "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -434,12 +444,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 
 static inline pmd_t __pmd(pmdval_t val)
 {
-   return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+   return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax",
+

[PATCH v6 10/12] x86/paravirt: add new macros PVOP_ALT* supporting pvops in ALTERNATIVEs

2021-03-09 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences add
support for using ALTERNATIVE handling combined with paravirt call
patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- drop PVOP_ALT_VCALL() macro
---
 arch/x86/include/asm/paravirt_types.h | 49 ++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 0afdac83f926..0ed976286d49 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -477,44 +477,91 @@ int paravirt_disable_iospace(void);
ret;\
})
 
+#define PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...)  \
+   ({  \
+   PVOP_CALL_ARGS; \
+   PVOP_TEST_NULL(op); \
+   asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),   \
+alt, cond) \
+: call_clbr, ASM_CALL_CONSTRAINT   \
+: paravirt_type(op),   \
+  paravirt_clobber(clbr),  \
+  ##__VA_ARGS__\
+: "memory", "cc" extra_clbr);  \
+   ret;\
+   })
+
 #define __PVOP_CALL(rettype, op, ...)  \
PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)   \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,   \
+ ##__VA_ARGS__)
+
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,  \
  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
 #define __PVOP_VCALL(op, ...)  \
(void)PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,\
   VEXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)   \
+   (void)PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,  \
+   PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,   \
+   ##__VA_ARGS__)
+
 #define __PVOP_VCALLEESAVE(op, ...)\
(void)PVOP_CALL(, op.func, CLBR_RET_REG,\
- PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+   (void)PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)
\
__PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)
\
__PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+   __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+   __PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)  \
__PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)  \
__PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)   \
+   __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond)   \
+   __PVOP_ALT_VCALLEESAVE(op, alt, cond)
 
 
 #define PVOP_CALL1(rettype, op, arg1)  \
__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)  \
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond)   \
+   __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLE

[PATCH v6 05/12] x86/alternative: support ALTERNATIVE_TERNARY

2021-03-09 Thread Juergen Gross
Add ALTERNATIVE_TERNARY support for replacing an initial instruction
with either of two instructions depending on a feature:

  ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
  "feature_on_instr", "feature_off_instr"

which will start with "default_instr" and at patch time will, depending
on FEATURE_NR being set or not, patch that with either
"feature_on_instr" or "feature_off_instr".

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- new patch
V4:
- use X86_FEATURE_ALWAYS instead of negated feature (Boris Petkov)
- unfortunately this isn't enough to get rid of the "not feature"
  support, as this is needed in the patch "x86/paravirt: switch
  functions with custom code to ALTERNATIVE", too
V5:
- carve out the "not feature" part
---
 arch/x86/include/asm/alternative-asm.h | 4 
 arch/x86/include/asm/alternative.h | 6 ++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
index 3965daf0460e..80bc6b533358 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -112,6 +112,10 @@
.popsection
 .endm
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, newinstr2)   \
+   ALTERNATIVE_2 oldinstr, newinstr2, X86_FEATURE_ALWAYS,  \
+   newinstr1, feature
+
 #endif  /*  __ASSEMBLY__  */
 
 #endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 89889618ae01..4fb844e29d26 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -178,6 +178,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, newinstr2)   \
+   ALTERNATIVE_2(oldinstr, newinstr2, X86_FEATURE_ALWAYS, newinstr1, 
feature)
+
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3)
\
".pushsection .altinstructions,\"a\"\n" 
\
@@ -209,6 +212,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, 
newinstr2, feature2) ::: "memory")
 
+#define alternative_ternary(oldinstr, feature, newinstr1, newinstr2)   \
+   asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, 
newinstr2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
-- 
2.26.2



[PATCH v6 07/12] x86/paravirt: remove no longer needed 32-bit pvops cruft

2021-03-09 Thread Juergen Gross
PVOP_VCALL4() is only used for Xen PV, while PVOP_CALL4() isn't used
at all. Keep PVOP_CALL4() for 64 bits due to symmetry reasons.

This allows to remove the 32-bit definitions of those macros leading
to a substantial simplification of the paravirt macros, as those were
the only ones needing non-empty "pre" and "post" parameters.

PVOP_CALLEE2() and PVOP_VCALLEE2() are used nowhere, so remove them.

Another no longer needed case is special handling of return types
larger than unsigned long. Replace that with a BUILD_BUG_ON().

DISABLE_INTERRUPTS() is used in 32-bit code only, so it can just be
replaced by cli.

INTERRUPT_RETURN in 32-bit code can be replaced by iret.

ENABLE_INTERRUPTS is used nowhere, so it can be removed.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
 arch/x86/entry/entry_32.S |   4 +-
 arch/x86/include/asm/irqflags.h   |   5 --
 arch/x86/include/asm/paravirt.h   |  35 +---
 arch/x86/include/asm/paravirt_types.h | 112 --
 arch/x86/kernel/asm-offsets.c |   2 -
 5 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017e6161..765487e57d6e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -430,7 +430,7 @@
 * will soon execute iret and the tracer was already set to
 * the irqstate after the IRET:
 */
-   DISABLE_INTERRUPTS(CLBR_ANY)
+   cli
lss (%esp), %esp/* switch to espfix segment */
 .Lend_\@:
 #endif /* CONFIG_X86_ESPFIX32 */
@@ -1077,7 +1077,7 @@ restore_all_switch_stack:
 * when returning from IPI handler and when returning from
 * scheduler to user-space.
 */
-   INTERRUPT_RETURN
+   iret
 
 .section .fixup, "ax"
 SYM_CODE_START(asm_iret_error)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70ea4393..a0efbcd24b86 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,9 +109,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)   sti
-#define DISABLE_INTERRUPTS(x)  cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(x)  pushfq; popq %rax
@@ -119,8 +116,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #define INTERRUPT_RETURN   jmp native_iret
 
-#else
-#define INTERRUPT_RETURN   iret
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8c354099d9c3..c6496a82fad1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -721,6 +721,7 @@ extern void default_banner(void);
.if ((~(set)) & mask); pop %reg; .endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT_XXL
 
 #define PV_SAVE_REGS(set)  \
COND_PUSH(set, CLBR_RAX, rax);  \
@@ -746,46 +747,12 @@ extern void default_banner(void);
 #define PARA_PATCH(off)((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)  \
-   COND_PUSH(set, CLBR_EAX, eax);  \
-   COND_PUSH(set, CLBR_EDI, edi);  \
-   COND_PUSH(set, CLBR_ECX, ecx);  \
-   COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)   \
-   COND_POP(set, CLBR_EDX, edx);   \
-   COND_POP(set, CLBR_ECX, ecx);   \
-   COND_POP(set, CLBR_EDI, edi);   \
-   COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)((off) / 4)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)*%cs:addr
-#endif
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define INTERRUPT_RETURN   \
PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
  ANNOTATE_RETPOLINE_SAFE;  \
  jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
 
-#define DISABLE_INTERRUPTS(clobbers)   \
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),   \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable);\
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)\
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable),\
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA

[PATCH v6 00/12] x86: major paravirt cleanup

2021-03-09 Thread Juergen Gross
This is a major cleanup of the paravirt infrastructure aiming at
eliminating all custom code patching via paravirt patching.

This is achieved by using ALTERNATIVE instead, leading to the ability
to give objtool access to the patched in instructions.

In order to remove most of the 32-bit special handling from pvops the
time related operations are switched to use static_call() instead.

At the end of this series all paravirt patching has to do is to
replace indirect calls with direct ones. In a further step this could
be switched to static_call(), too.

Changes in V6:
- switched back to "not" bit in feature value for "not feature"
- other minor comments addressed

Changes in V5:
- patches 1-5 of V4 dropped, as already applied
- new patches 1+3
- fixed patch 2
- split V4 patch 8 into patches 4+5
- use flag byte instead of negative feature bit for "not feature"

Changes in V4:
- fixed several build failures
- removed objtool patch, as objtool patches are in tip now
- added patch 1 for making usage of static_call easier
- even more cleanup

Changes in V3:
- added patches 7 and 12
- addressed all comments

Changes in V2:
- added patches 5-12

Juergen Gross (12):
  static_call: move struct static_call_key definition to
static_call_types.h
  x86/paravirt: switch time pvops functions to use static_call()
  x86/alternative: drop feature parameter from ALTINSTR_REPLACEMENT()
  x86/alternative: support not-feature
  x86/alternative: support ALTERNATIVE_TERNARY
  x86: add new features for paravirt patching
  x86/paravirt: remove no longer needed 32-bit pvops cruft
  x86/paravirt: simplify paravirt macros
  x86/paravirt: switch iret pvops to ALTERNATIVE
  x86/paravirt: add new macros PVOP_ALT* supporting pvops in
ALTERNATIVEs
  x86/paravirt: switch functions with custom code to ALTERNATIVE
  x86/paravirt: have only one paravirt patch function

 arch/arm/include/asm/paravirt.h |  14 +-
 arch/arm/kernel/paravirt.c  |   9 +-
 arch/arm64/include/asm/paravirt.h   |  14 +-
 arch/arm64/kernel/paravirt.c|  13 +-
 arch/x86/Kconfig|   1 +
 arch/x86/entry/entry_32.S   |   4 +-
 arch/x86/entry/entry_64.S   |   2 +-
 arch/x86/include/asm/alternative-asm.h  |   7 +
 arch/x86/include/asm/alternative.h  |  23 ++-
 arch/x86/include/asm/cpufeatures.h  |   2 +
 arch/x86/include/asm/irqflags.h |   7 +-
 arch/x86/include/asm/mshyperv.h |   2 +-
 arch/x86/include/asm/paravirt.h | 169 +--
 arch/x86/include/asm/paravirt_types.h   | 210 +---
 arch/x86/kernel/Makefile|   3 +-
 arch/x86/kernel/alternative.c   |  51 +-
 arch/x86/kernel/asm-offsets.c   |   7 -
 arch/x86/kernel/cpu/vmware.c|   5 +-
 arch/x86/kernel/kvm.c   |   2 +-
 arch/x86/kernel/kvmclock.c  |   2 +-
 arch/x86/kernel/paravirt-spinlocks.c|   9 +
 arch/x86/kernel/paravirt.c  |  78 +++--
 arch/x86/kernel/paravirt_patch.c|  99 ---
 arch/x86/kernel/tsc.c   |   2 +-
 arch/x86/xen/enlighten_pv.c |   4 +-
 arch/x86/xen/time.c |  26 +--
 drivers/xen/time.c  |   3 +-
 include/linux/static_call.h |  18 --
 include/linux/static_call_types.h   |  18 ++
 tools/include/linux/static_call_types.h |  18 ++
 30 files changed, 348 insertions(+), 474 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

-- 
2.26.2



[PATCH v6 09/12] x86/paravirt: switch iret pvops to ALTERNATIVE

2021-03-09 Thread Juergen Gross
The iret paravirt op is rather special as it is using a jmp instead
of a call instruction. Switch it to ALTERNATIVE.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- use ALTERNATIVE_TERNARY
---
 arch/x86/include/asm/paravirt.h   |  6 +++---
 arch/x86/include/asm/paravirt_types.h |  5 +
 arch/x86/kernel/asm-offsets.c |  5 -
 arch/x86/kernel/paravirt.c| 26 ++
 arch/x86/xen/enlighten_pv.c   |  3 +--
 5 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c6496a82fad1..36cd71fa097f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -749,9 +749,9 @@ extern void default_banner(void);
 #define PARA_INDIRECT(addr)*addr(%rip)
 
 #define INTERRUPT_RETURN   \
-   PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
- ANNOTATE_RETPOLINE_SAFE;  \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
+   ANNOTATE_RETPOLINE_SAFE;\
+   ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);",\
+   X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)\
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 45bd21647dd8..0afdac83f926 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -151,10 +151,6 @@ struct pv_cpu_ops {
 
u64 (*read_pmc)(int counter);
 
-   /* Normal iret.  Jump to this with the standard iret stack
-  frame set up. */
-   void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -294,6 +290,7 @@ struct paravirt_patch_template {
 
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
 
 #define PARAVIRT_PATCH(x)  \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 736508004b30..ecd3fd6993d1 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -61,11 +61,6 @@ static void __used common(void)
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
 
-#ifdef CONFIG_PARAVIRT_XXL
-   BLANK();
-   OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
 #ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 44e5b0fe28cb..0553a339d850 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -86,25 +86,6 @@ u64 notrace _paravirt_ident_64(u64 x)
 {
return x;
 }
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
-  unsigned long addr, unsigned len)
-{
-   struct branch *b = insn_buff;
-   unsigned long delta = (unsigned long)target - (addr+5);
-
-   if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-   WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void 
*)addr);
-#endif
-   return len; /* call too long for patch site */
-   }
-
-   b->opcode = 0xe9;   /* jmp */
-   b->delta = delta;
-
-   return 5;
-}
 #endif
 
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -136,9 +117,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insn_buff, len);
 
-   else if (type == PARAVIRT_PATCH(cpu.iret))
-   /* If operation requires a jmp, then jmp */
-   ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
 #endif
else
/* Otherwise call the function. */
@@ -316,8 +294,6 @@ struct paravirt_patch_template pv_ops = {
 
.cpu.load_sp0   = native_load_sp0,
 
-   .cpu.iret   = native_iret,
-
 #ifdef CONFIG_X86_IOPL_IOPERM
.cpu.invalidate_io_bitmap   = native_tss_invalidate_io_bitmap,
.cpu.update_io_bitmap   = native_tss_update_io_bitmap,
@@ -422,6 +398,8 @@ struct paravirt_patch_template pv_ops = {
 NOKPROBE_SYMBOL(native_get_debugreg);
 NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
+
+void (*paravirt_iret)(void) = native_iret;
 #endif
 
 EXPORT_SYMBOL(pv_ops);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index dc0a337f985b..08dca7bebb30 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlight

[PATCH v6 06/12] x86: add new features for paravirt patching

2021-03-09 Thread Juergen Gross
For being able to switch paravirt patching from special cased custom
code sequences to ALTERNATIVE handling some X86_FEATURE_* are needed
as new features. This enables to have the standard indirect pv call
as the default code and to patch that with the non-Xen custom code
sequence via ALTERNATIVE patching later.

Make sure paravirt patching is performed before alternative patching.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- add comment (Boris Petkov)
- no negative features (Boris Petkov)
V4:
- move paravirt_set_cap() to paravirt-spinlocks.c
---
 arch/x86/include/asm/cpufeatures.h   |  2 ++
 arch/x86/include/asm/paravirt.h  | 10 ++
 arch/x86/kernel/alternative.c| 30 ++--
 arch/x86/kernel/paravirt-spinlocks.c |  9 +
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..b440c950246d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table 
access-dirty bit */
 #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports 
the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL( 8*32+19) /* "" VMware prefers 
VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK   ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT( 8*32+21) /* "" PV 
vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1e45b46fae84..8c354099d9c3 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -47,6 +47,10 @@ static inline u64 paravirt_steal_clock(int cpu)
return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
@@ -811,5 +815,11 @@ static inline void paravirt_arch_exit_mmap(struct 
mm_struct *mm)
 {
 }
 #endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d8e669a1546f..1f12901e75f2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 int __read_mostly alternatives_patched;
 
@@ -732,6 +733,33 @@ void __init alternative_instructions(void)
 * patching.
 */
 
+   /*
+* Paravirt patching and alternative patching can be combined to
+* replace a function call with a short direct code sequence (e.g.
+* by setting a constant return value instead of doing that in an
+* external function).
+* In order to make this work the following sequence is required:
+* 1. set (artificial) features depending on used paravirt
+*functions which can later influence alternative patching
+* 2. apply paravirt patching (generally replacing an indirect
+*function call with a direct one)
+* 3. apply alternative patching (e.g. replacing a direct function
+*call with a custom code sequence)
+* Doing paravirt patching after alternative patching would clobber
+* the optimization of the custom code with a function call again.
+*/
+   paravirt_set_cap();
+
+   /*
+* First patch paravirt functions, such that we overwrite the indirect
+* call with the direct call.
+*/
+   apply_paravirt(__parainstructions, __parainstructions_end);
+
+   /*
+* Then patch alternatives, such that those paravirt calls that are in
+* alternatives can be overwritten by their immediate fragments.
+*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 #ifdef CONFIG_SMP
@@ -750,8 +778,6 @@ void __init alternative_instructions(void)
}
 #endif
 
-   apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
 }
diff --git a/arch/x86/kernel/paravirt-spinlocks.c 
b/arch/x86/kernel/paravirt-spinlocks.c
index 4f75d0cf6305..9e1ea99ad9df 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void)
return pv_ops.lock.vcpu_is_preempted.func ==
__raw_callee_save___native_vcpu_is_preempted;
 }
+
+void __init paravirt_set_cap(void)

[PATCH v6 12/12] x86/paravirt: have only one paravirt patch function

2021-03-09 Thread Juergen Gross
There is no need any longer to have different paravirt patch functions
for native and Xen. Eliminate native_patch() and rename
paravirt_patch_default() to paravirt_patch().

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- remove paravirt_patch_insns() (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 19 +--
 arch/x86/kernel/Makefile  |  3 +--
 arch/x86/kernel/alternative.c |  2 +-
 arch/x86/kernel/paravirt.c| 20 ++--
 arch/x86/kernel/paravirt_patch.c  | 11 ---
 arch/x86/xen/enlighten_pv.c   |  1 -
 6 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 588ff14ce969..9d1ddb7b4350 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -68,19 +68,6 @@ struct pv_info {
const char *name;
 };
 
-struct pv_init_ops {
-   /*
-* Patch may replace one of the defined code sequences with
-* arbitrary code, subject to the same register constraints.
-* This generally means the code is not free to clobber any
-* registers other than EAX.  The patch function should return
-* the number of bytes of code generated, as we nop pad the
-* rest in generic code.
-*/
-   unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -276,7 +263,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-   struct pv_init_ops  init;
struct pv_cpu_ops   cpu;
struct pv_irq_ops   irq;
struct pv_mmu_ops   mmu;
@@ -317,10 +303,7 @@ extern void (*paravirt_iret)(void);
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, 
unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char 
*start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned 
len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
 
 int paravirt_disable_iospace(void);
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf08351f0b..0704c2a94272 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o   
:= n
 KCSAN_SANITIZE := n
 
 OBJECT_FILES_NON_STANDARD_test_nx.o:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB)+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1f12901e75f2..cb3eb8c2f50d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -615,7 +615,7 @@ void __init_or_module apply_paravirt(struct 
paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insn_buff, p->instr, p->len);
-   used = pv_ops.init.patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
+   used = paravirt_patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
 
BUG_ON(used > p->len);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 082954930809..3d7b989ed6be 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -99,8 +99,8 @@ void __init native_pv_lock_init(void)
static_branch_disable(_spin_lock_key);
 }
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff,
-   unsigned long addr, unsigned len)
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
+   unsigned int len)
 {
/*
 * Neat trick to map patch type back to the call within the
@@ -121,19 +121,6 @@ unsigned paravirt_patch_default(

[PATCH v6 04/12] x86/alternative: support not-feature

2021-03-09 Thread Juergen Gross
Add support for alternative patching for the case a feature is not
present on the current cpu.

For users of ALTERNATIVE() and friends an inverted feature is specified
by applying the ALT_NOT() macro to it, e.g.:

ALTERNATIVE(old, new, ALT_NOT(feature))

Signed-off-by: Juergen Gross 
---
V5:
- split off from next patch
- reworked to use flag byte (Boris Petkov)
V6:
- rework again to not use flag byte (Boris Petkov)
---
 arch/x86/include/asm/alternative-asm.h |  3 +++
 arch/x86/include/asm/alternative.h |  3 +++
 arch/x86/kernel/alternative.c  | 19 ++-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
index 464034db299f..3965daf0460e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -6,6 +6,9 @@
 
 #include 
 
+#define ALTINSTR_FLAG_INV  (1 << 15)
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
 #ifdef CONFIG_SMP
.macro LOCK_PREFIX
 672:   lock
diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 5753fb2ac489..89889618ae01 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -59,11 +59,14 @@ struct alt_instr {
s32 instr_offset;   /* original instruction */
s32 repl_offset;/* offset to replacement instruction */
u16 cpuid;  /* cpuid bit set for replacement */
+#define ALTINSTR_FLAG_INV (1 << 15)
u8  instrlen;   /* length of original instruction */
u8  replacementlen; /* length of new instruction */
u8  padlen; /* length of build-time padding */
 } __packed;
 
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
 /*
  * Debug flag that can be tested to see whether alternative
  * instructions were patched in already:
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d..d8e669a1546f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -388,21 +388,30 @@ void __init_or_module noinline apply_alternatives(struct 
alt_instr *start,
 */
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+   /* Mask away "NOT" flag bit for feature to test. */
+   u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
 
instr = (u8 *)>instr_offset + a->instr_offset;
replacement = (u8 *)>repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
-   BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-   if (!boot_cpu_has(a->cpuid)) {
+   BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
+
+   /*
+* Drop out if either:
+* - feature not available, but required, or
+* - feature available, but NOT required
+*/
+   if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
if (a->padlen > 1)
optimize_nops(a, instr);
 
continue;
}
 
-   DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, 
len: %d), pad: %d",
-   a->cpuid >> 5,
-   a->cpuid & 0x1f,
+   DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: 
(%px, len: %d), pad: %d",
+   (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+   feature >> 5,
+   feature & 0x1f,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
 
-- 
2.26.2



[PATCH v6 02/12] x86/paravirt: switch time pvops functions to use static_call()

2021-03-09 Thread Juergen Gross
The time pvops functions are the only ones left which might be
used in 32-bit mode and which return a 64-bit value.

Switch them to use the static_call() mechanism instead of pvops, as
this allows quite some simplification of the pvops implementation.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V4:
- drop paravirt_time.h again
- don't move Hyper-V code (Michael Kelley)
V5:
- drop no longer needed Hyper-V modification (Michael Kelley)
- switch Arm and Arm64 to static_call(), too (kernel test robot)
V6:
- factor out common parts in Xen pv/pvh initialization (Boris Petkov)
---
 arch/arm/include/asm/paravirt.h   | 14 +-
 arch/arm/kernel/paravirt.c|  9 +++--
 arch/arm64/include/asm/paravirt.h | 14 +-
 arch/arm64/kernel/paravirt.c  | 13 +
 arch/x86/Kconfig  |  1 +
 arch/x86/include/asm/mshyperv.h   |  2 +-
 arch/x86/include/asm/paravirt.h   | 17 ++---
 arch/x86/include/asm/paravirt_types.h |  6 --
 arch/x86/kernel/cpu/vmware.c  |  5 +++--
 arch/x86/kernel/kvm.c |  2 +-
 arch/x86/kernel/kvmclock.c|  2 +-
 arch/x86/kernel/paravirt.c| 16 
 arch/x86/kernel/tsc.c |  2 +-
 arch/x86/xen/time.c   | 26 +-
 drivers/xen/time.c|  3 ++-
 15 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index cdbf02d9c1d4..95d5b0d625cd 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 #endif
 
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 4cfed91fe256..7dd9806369fb 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -9,10 +9,15 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
diff --git a/arch/arm64/include/asm/paravirt.h 
b/arch/arm64/include/asm/paravirt.h
index cf3a0fd7c1a7..9aa193e0e8f2 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 
 int __init pv_time_init(void);
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index c07d7a034941..75fed4460407 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -26,8 +27,12 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 /* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
 {
struct pv_time_stolen_time_region *reg;
 
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
if (ret)
return ret;
 
-   pv_ops.time.steal_clock = pv_steal_clock;
+   static_call_update(pv_steal_clock, para_steal_clock);
 
static_key_slow_inc(_steal_enabled);
if (steal_acc)

[PATCH v6 08/12] x86/paravirt: simplify paravirt macros

2021-03-09 Thread Juergen Gross
The central pvops call macros PVOP_CALL() and PVOP_VCALL() are
looking very similar now.

The main differences are using PVOP_VCALL_ARGS or PVOP_CALL_ARGS, which
are identical, and the return value handling.

So drop PVOP_VCALL_ARGS and instead of PVOP_VCALL() just use
(void)PVOP_CALL(long, ...).

Note that it isn't easily possible to just redefine PVOP_VCALL()
to use PVOP_CALL() instead, as this would require further hiding of
commas in macro parameters.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V3:
- new patch
V4:
- fix build warnings with clang (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 41 ---
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 42f9eef84131..45bd21647dd8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -408,11 +408,9 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS
\
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "c" ((unsigned long)(x))
@@ -428,12 +426,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS\
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "d" ((unsigned long)(x))
@@ -458,59 +454,46 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)  \
+#define PVOP_RETVAL(rettype)   \
({  unsigned long __mask = ~0UL;\
+   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
switch (sizeof(rettype)) {  \
case 1: __mask =   0xffUL; break;   \
case 2: __mask = 0xUL; break;   \
case 4: __mask = 0xUL; break;   \
default: break; \
}   \
-   __mask; \
+   __mask & __eax; \
})
 
 
-#define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, ...)   \
+#define PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)   \
({  \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
-   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
asm volatile(paravirt_alt(PARAVIRT_CALL)\
 : call_clbr, ASM_CALL_CONSTRAINT   \
 : paravirt_type(op),   \
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   (rettype)(__eax & PVOP_RETMASK(rettype));   \
+   ret;\
})
 
 #define __PVOP_CALL(rettype, op, ...)  \
-   PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,\
- EXTRA_CLOBBERS, ##__VA_ARGS__)
+   PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
-   PVOP_CALL(rettype, op.func, CLBR_RET_REG,   \
+   PVOP_CALL(PVOP_RETVAL(rettype), op.func, 

[PATCH v6 03/12] x86/alternative: drop feature parameter from ALTINSTR_REPLACEMENT()

2021-03-09 Thread Juergen Gross
The macro ALTINSTR_REPLACEMENT() doesn't make use of the feature
parameter, so drop it.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V5:
- new patch
---
 arch/x86/include/asm/alternative.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 13adca37c99a..5753fb2ac489 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
" .byte " alt_rlen(num) "\n"/* replacement len */ \
" .byte " alt_pad_len "\n"  /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */   
\
+#define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */   
\
"# ALT: replacement " #num "\n" 
\
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
 
@@ -161,7 +161,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature, 1)  \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr, feature, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr, 1)   \
".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,8 +171,8 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)\
-   ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)\
+   ALTINSTR_REPLACEMENT(newinstr1, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
@@ -183,9 +183,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feat3, 3)
\
".popsection\n" 
\
".pushsection .altinstr_replacement, \"ax\"\n"  
\
-   ALTINSTR_REPLACEMENT(newinsn1, feat1, 1)
\
-   ALTINSTR_REPLACEMENT(newinsn2, feat2, 2)
\
-   ALTINSTR_REPLACEMENT(newinsn3, feat3, 3)
\
+   ALTINSTR_REPLACEMENT(newinsn1, 1)   
\
+   ALTINSTR_REPLACEMENT(newinsn2, 2)   
\
+   ALTINSTR_REPLACEMENT(newinsn3, 3)   
\
".popsection\n"
 
 /*
-- 
2.26.2



[PATCH v6 01/12] static_call: move struct static_call_key definition to static_call_types.h

2021-03-09 Thread Juergen Gross
Having the definition of static_call() in static_call_types.h makes
no sense as long struct static_call_key isn't defined there, as the
generic implementation of static_call() is referencing this structure.

So move the definition of struct static_call_key to static_call_types.h.

Signed-off-by: Juergen Gross 
Acked-by: Peter Zijlstra (Intel) 
---
V5:
- new patch
---
 include/linux/static_call.h | 18 --
 include/linux/static_call_types.h   | 18 ++
 tools/include/linux/static_call_types.h | 18 ++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 85ecc789f4ff..76b881259144 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -128,16 +128,6 @@ struct static_call_mod {
struct static_call_site *sites;
 };
 
-struct static_call_key {
-   void *func;
-   union {
-   /* bit 0: 0 = mods, 1 = sites */
-   unsigned long type;
-   struct static_call_mod *mods;
-   struct static_call_site *sites;
-   };
-};
-
 /* For finding the key associated with a trampoline */
 struct static_call_tramp_key {
s32 tramp;
@@ -187,10 +177,6 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 #define __DEFINE_STATIC_CALL(name, _func, _func_init)  \
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
@@ -243,10 +229,6 @@ static inline long __static_call_return0(void)
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 static inline long __static_call_return0(void)
 {
return 0;
diff --git a/include/linux/static_call_types.h 
b/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
diff --git a/tools/include/linux/static_call_types.h 
b/tools/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
-- 
2.26.2



[PATCH v5 10/12] x86/paravirt: add new macros PVOP_ALT* supporting pvops in ALTERNATIVEs

2021-03-08 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences add
support for using ALTERNATIVE handling combined with paravirt call
patching.

Signed-off-by: Juergen Gross 
---
V3:
- drop PVOP_ALT_VCALL() macro
---
 arch/x86/include/asm/paravirt_types.h | 49 ++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 0afdac83f926..0ed976286d49 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -477,44 +477,91 @@ int paravirt_disable_iospace(void);
ret;\
})
 
+#define PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...)  \
+   ({  \
+   PVOP_CALL_ARGS; \
+   PVOP_TEST_NULL(op); \
+   asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),   \
+alt, cond) \
+: call_clbr, ASM_CALL_CONSTRAINT   \
+: paravirt_type(op),   \
+  paravirt_clobber(clbr),  \
+  ##__VA_ARGS__\
+: "memory", "cc" extra_clbr);  \
+   ret;\
+   })
+
 #define __PVOP_CALL(rettype, op, ...)  \
PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)   \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,   \
+ ##__VA_ARGS__)
+
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,  \
  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+   PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
 #define __PVOP_VCALL(op, ...)  \
(void)PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,\
   VEXTRA_CLOBBERS, ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALL(op, alt, cond, ...)   \
+   (void)PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,  \
+   PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,   \
+   ##__VA_ARGS__)
+
 #define __PVOP_VCALLEESAVE(op, ...)\
(void)PVOP_CALL(, op.func, CLBR_RET_REG,\
- PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+   (void)PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+   PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)
\
__PVOP_CALL(rettype, op)
 #define PVOP_VCALL0(op)
\
__PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+   __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+   __PVOP_ALT_VCALL(op, alt, cond)
 
 #define PVOP_CALLEE0(rettype, op)  \
__PVOP_CALLEESAVE(rettype, op)
 #define PVOP_VCALLEE0(op)  \
__PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond)   \
+   __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond)   \
+   __PVOP_ALT_VCALLEESAVE(op, alt, cond)
 
 
 #define PVOP_CALL1(rettype, op, arg1)  \
__PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)  \
__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond)   \
+   __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLE

[PATCH v5 12/12] x86/paravirt: have only one paravirt patch function

2021-03-08 Thread Juergen Gross
There is no need any longer to have different paravirt patch functions
for native and Xen. Eliminate native_patch() and rename
paravirt_patch_default() to paravirt_patch().

Signed-off-by: Juergen Gross 
---
V3:
- remove paravirt_patch_insns() (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 19 +--
 arch/x86/kernel/Makefile  |  3 +--
 arch/x86/kernel/alternative.c |  2 +-
 arch/x86/kernel/paravirt.c| 20 ++--
 arch/x86/kernel/paravirt_patch.c  | 11 ---
 arch/x86/xen/enlighten_pv.c   |  1 -
 6 files changed, 5 insertions(+), 51 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 588ff14ce969..62efbf8bd8f0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -68,19 +68,6 @@ struct pv_info {
const char *name;
 };
 
-struct pv_init_ops {
-   /*
-* Patch may replace one of the defined code sequences with
-* arbitrary code, subject to the same register constraints.
-* This generally means the code is not free to clobber any
-* registers other than EAX.  The patch function should return
-* the number of bytes of code generated, as we nop pad the
-* rest in generic code.
-*/
-   unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
 #ifdef CONFIG_PARAVIRT_XXL
 struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -276,7 +263,6 @@ struct pv_lock_ops {
  * number for each function using the offset which we use to indicate
  * what to patch. */
 struct paravirt_patch_template {
-   struct pv_init_ops  init;
struct pv_cpu_ops   cpu;
struct pv_irq_ops   irq;
struct pv_mmu_ops   mmu;
@@ -317,10 +303,7 @@ extern void (*paravirt_iret)(void);
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, 
unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char 
*start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned 
len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, 
unsigned int len);
 
 int paravirt_disable_iospace(void);
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf08351f0b..0704c2a94272 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o   
:= n
 KCSAN_SANITIZE := n
 
 OBJECT_FILES_NON_STANDARD_test_nx.o:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB)+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ab9ad729fc5a..8f922b65c3c5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -607,7 +607,7 @@ void __init_or_module apply_paravirt(struct 
paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insn_buff, p->instr, p->len);
-   used = pv_ops.init.patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
+   used = paravirt_patch(p->type, insn_buff, (unsigned 
long)p->instr, p->len);
 
BUG_ON(used > p->len);
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 082954930809..3d7b989ed6be 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -99,8 +99,8 @@ void __init native_pv_lock_init(void)
static_branch_disable(_spin_lock_key);
 }
 
-unsigned paravirt_patch_default(u8 type, void *insn_buff,
-   unsigned long addr, unsigned len)
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
+   unsigned int len)
 {
/*
 * Neat trick to map patch type back to the call within the
@@ -121,19 +121,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,

[PATCH v5 11/12] x86/paravirt: switch functions with custom code to ALTERNATIVE

2021-03-08 Thread Juergen Gross
Instead of using paravirt patching for custom code sequences use
ALTERNATIVE for the functions with custom code replacements.

Instead of patching an ud2 instruction for unpopulated vector entries
into the caller site, use a simple function just calling BUG() as a
replacement.

Simplify the register defines for assembler paravirt calling, as there
isn't much usage left.

Signed-off-by: Juergen Gross 
---
V4:
- fixed SAVE_FLAGS() (kernel test robot)
- added assembler paravirt cleanup
---
 arch/x86/entry/entry_64.S |  2 +-
 arch/x86/include/asm/irqflags.h   |  2 +-
 arch/x86/include/asm/paravirt.h   | 99 +--
 arch/x86/include/asm/paravirt_types.h |  6 --
 arch/x86/kernel/paravirt.c| 16 ++---
 arch/x86/kernel/paravirt_patch.c  | 88 
 6 files changed, 56 insertions(+), 157 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 400908dff42e..12e2e3cd58be 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
pushq %rax
-   SAVE_FLAGS(CLBR_RAX)
+   SAVE_FLAGS
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index a0efbcd24b86..c5ce9845c999 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -111,7 +111,7 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x)  pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
 #endif
 
 #define INTERRUPT_RETURN   jmp native_iret
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 36cd71fa097f..04b3067f31b5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -137,7 +137,8 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-   return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+   return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+   "mov %%cr2, %%rax;", ~X86_FEATURE_XENPV);
 }
 
 static inline void write_cr2(unsigned long x)
@@ -147,12 +148,14 @@ static inline void write_cr2(unsigned long x)
 
 static inline unsigned long __read_cr3(void)
 {
-   return PVOP_CALL0(unsigned long, mmu.read_cr3);
+   return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ~X86_FEATURE_XENPV);
 }
 
 static inline void write_cr3(unsigned long x)
 {
-   PVOP_VCALL1(mmu.write_cr3, x);
+   PVOP_ALT_VCALL1(mmu.write_cr3, x,
+   "mov %%rdi, %%cr3", ~X86_FEATURE_XENPV);
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -172,7 +175,7 @@ static inline void halt(void)
 
 static inline void wbinvd(void)
 {
-   PVOP_VCALL0(cpu.wbinvd);
+   PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ~X86_FEATURE_XENPV);
 }
 
 static inline u64 paravirt_read_msr(unsigned msr)
@@ -386,22 +389,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
 
 static inline pte_t __pte(pteval_t val)
 {
-   return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+   return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ~X86_FEATURE_XENPV) };
 }
 
 static inline pteval_t pte_val(pte_t pte)
 {
-   return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+   return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+   "mov %%rdi, %%rax", ~X86_FEATURE_XENPV);
 }
 
 static inline pgd_t __pgd(pgdval_t val)
 {
-   return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+   return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ~X86_FEATURE_XENPV) };
 }
 
 static inline pgdval_t pgd_val(pgd_t pgd)
 {
-   return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+   return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+   "mov %%rdi, %%rax", ~X86_FEATURE_XENPV);
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -434,12 +443,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 
 static inline pmd_t __pmd(pmdval_t val)
 {
-   return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+   return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax",
+ ~X86_FEATURE_XENPV) };
 }
 
 static inline pmdval_t pmd_val(pmd_t pmd)
 {
-   return PVOP_CALLEE1(pmdva

[PATCH v5 08/12] x86/paravirt: simplify paravirt macros

2021-03-08 Thread Juergen Gross
The central pvops call macros PVOP_CALL() and PVOP_VCALL() are
looking very similar now.

The main differences are using PVOP_VCALL_ARGS or PVOP_CALL_ARGS, which
are identical, and the return value handling.

So drop PVOP_VCALL_ARGS and instead of PVOP_VCALL() just use
(void)PVOP_CALL(long, ...).

Note that it isn't easily possible to just redefine PVOP_VCALL()
to use PVOP_CALL() instead, as this would require further hiding of
commas in macro parameters.

Signed-off-by: Juergen Gross 
---
V3:
- new patch
V4:
- fix build warnings with clang (kernel test robot)
---
 arch/x86/include/asm/paravirt_types.h | 41 ---
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 42f9eef84131..45bd21647dd8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -408,11 +408,9 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS
\
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "a" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "d" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "c" ((unsigned long)(x))
@@ -428,12 +426,10 @@ int paravirt_disable_iospace(void);
 #define VEXTRA_CLOBBERS
 #else  /* CONFIG_X86_64 */
 /* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS\
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
 
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
 #define PVOP_CALL_ARG1(x)  "D" ((unsigned long)(x))
 #define PVOP_CALL_ARG2(x)  "S" ((unsigned long)(x))
 #define PVOP_CALL_ARG3(x)  "d" ((unsigned long)(x))
@@ -458,59 +454,46 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op) ((void)pv_ops.op)
 #endif
 
-#define PVOP_RETMASK(rettype)  \
+#define PVOP_RETVAL(rettype)   \
({  unsigned long __mask = ~0UL;\
+   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
switch (sizeof(rettype)) {  \
case 1: __mask =   0xffUL; break;   \
case 2: __mask = 0xUL; break;   \
case 4: __mask = 0xUL; break;   \
default: break; \
}   \
-   __mask; \
+   __mask & __eax; \
})
 
 
-#define PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, ...)   \
+#define PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)   \
({  \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
-   BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long));  \
asm volatile(paravirt_alt(PARAVIRT_CALL)\
 : call_clbr, ASM_CALL_CONSTRAINT   \
 : paravirt_type(op),   \
   paravirt_clobber(clbr),  \
   ##__VA_ARGS__\
 : "memory", "cc" extra_clbr);  \
-   (rettype)(__eax & PVOP_RETMASK(rettype));   \
+   ret;\
})
 
 #define __PVOP_CALL(rettype, op, ...)  \
-   PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,\
- EXTRA_CLOBBERS, ##__VA_ARGS__)
+   PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,   \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
 #define __PVOP_CALLEESAVE(rettype, op, ...)\
-   PVOP_CALL(rettype, op.func, CLBR_RET_REG,   \
+   PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,  \
  

[PATCH v5 00/12] x86: major paravirt cleanup

2021-03-08 Thread Juergen Gross
This is a major cleanup of the paravirt infrastructure aiming at
eliminating all custom code patching via paravirt patching.

This is achieved by using ALTERNATIVE instead, leading to the ability
to give objtool access to the patched in instructions.

In order to remove most of the 32-bit special handling from pvops the
time related operations are switched to use static_call() instead.

At the end of this series all paravirt patching has to do is to
replace indirect calls with direct ones. In a further step this could
be switched to static_call(), too.

Changes in V5:
- patches 1-5 of V4 dropped, as already applied
- new patches 1+3
- fixed patch 2
- split V4 patch 8 into patches 4+5
- use flag byte instead of negative feature bit for "not feature"

Changes in V4:
- fixed several build failures
- removed objtool patch, as objtool patches are in tip now
- added patch 1 for making usage of static_call easier
- even more cleanup

Changes in V3:
- added patches 7 and 12
- addressed all comments

Changes in V2:
- added patches 5-12

Juergen Gross (12):
  staticcall: move struct static_call_key definition to
static_call_types.h
  x86/paravirt: switch time pvops functions to use static_call()
  x86/alternative: drop feature parameter from ALTINSTR_REPLACEMENT()
  x86/alternative: support not-feature
  x86/alternative: support ALTERNATIVE_TERNARY
  x86: add new features for paravirt patching
  x86/paravirt: remove no longer needed 32-bit pvops cruft
  x86/paravirt: simplify paravirt macros
  x86/paravirt: switch iret pvops to ALTERNATIVE
  x86/paravirt: add new macros PVOP_ALT* supporting pvops in
ALTERNATIVEs
  x86/paravirt: switch functions with custom code to ALTERNATIVE
  x86/paravirt: have only one paravirt patch function

 arch/arm/include/asm/paravirt.h   |  14 +-
 arch/arm/kernel/paravirt.c|   9 +-
 arch/arm64/include/asm/paravirt.h |  14 +-
 arch/arm64/kernel/paravirt.c  |  13 +-
 arch/x86/Kconfig  |   1 +
 arch/x86/entry/entry_32.S |   4 +-
 arch/x86/entry/entry_64.S |   2 +-
 arch/x86/include/asm/alternative-asm.h|  10 +
 arch/x86/include/asm/alternative.h|  28 ++-
 arch/x86/include/asm/cpufeature.h |   2 +
 arch/x86/include/asm/cpufeatures.h|   2 +
 arch/x86/include/asm/irqflags.h   |   7 +-
 arch/x86/include/asm/mshyperv.h   |   2 +-
 arch/x86/include/asm/paravirt.h   | 167 +++---
 arch/x86/include/asm/paravirt_types.h | 210 +++---
 arch/x86/kernel/Makefile  |   3 +-
 arch/x86/kernel/alternative.c |  37 ++-
 arch/x86/kernel/asm-offsets.c |   7 -
 arch/x86/kernel/cpu/vmware.c  |   5 +-
 arch/x86/kernel/kvm.c |   2 +-
 arch/x86/kernel/kvmclock.c|   2 +-
 arch/x86/kernel/paravirt-spinlocks.c  |   9 +
 arch/x86/kernel/paravirt.c|  78 ++-
 arch/x86/kernel/paravirt_patch.c  |  99 -
 arch/x86/kernel/tsc.c |   2 +-
 arch/x86/xen/enlighten_pv.c   |   4 +-
 arch/x86/xen/time.c   |  11 +-
 drivers/xen/time.c|   3 +-
 include/linux/static_call.h   |  18 --
 include/linux/static_call_types.h |  18 ++
 tools/include/linux/static_call_types.h   |  18 ++
 tools/objtool/arch/x86/include/arch/special.h |   6 +-
 32 files changed, 339 insertions(+), 468 deletions(-)
 delete mode 100644 arch/x86/kernel/paravirt_patch.c

-- 
2.26.2



[PATCH v5 07/12] x86/paravirt: remove no longer needed 32-bit pvops cruft

2021-03-08 Thread Juergen Gross
PVOP_VCALL4() is only used for Xen PV, while PVOP_CALL4() isn't used
at all. Keep PVOP_CALL4() for 64 bits due to symmetry reasons.

This allows to remove the 32-bit definitions of those macros leading
to a substantial simplification of the paravirt macros, as those were
the only ones needing non-empty "pre" and "post" parameters.

PVOP_CALLEE2() and PVOP_VCALLEE2() are used nowhere, so remove them.

Another no longer needed case is special handling of return types
larger than unsigned long. Replace that with a BUILD_BUG_ON().

DISABLE_INTERRUPTS() is used in 32-bit code only, so it can just be
replaced by cli.

INTERRUPT_RETURN in 32-bit code can be replaced by iret.

ENABLE_INTERRUPTS is used nowhere, so it can be removed.

Signed-off-by: Juergen Gross 
---
 arch/x86/entry/entry_32.S |   4 +-
 arch/x86/include/asm/irqflags.h   |   5 --
 arch/x86/include/asm/paravirt.h   |  35 +---
 arch/x86/include/asm/paravirt_types.h | 112 --
 arch/x86/kernel/asm-offsets.c |   2 -
 5 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017e6161..765487e57d6e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -430,7 +430,7 @@
 * will soon execute iret and the tracer was already set to
 * the irqstate after the IRET:
 */
-   DISABLE_INTERRUPTS(CLBR_ANY)
+   cli
lss (%esp), %esp/* switch to espfix segment */
 .Lend_\@:
 #endif /* CONFIG_X86_ESPFIX32 */
@@ -1077,7 +1077,7 @@ restore_all_switch_stack:
 * when returning from IPI handler and when returning from
 * scheduler to user-space.
 */
-   INTERRUPT_RETURN
+   iret
 
 .section .fixup, "ax"
 SYM_CODE_START(asm_iret_error)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70ea4393..a0efbcd24b86 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,9 +109,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 }
 #else
 
-#define ENABLE_INTERRUPTS(x)   sti
-#define DISABLE_INTERRUPTS(x)  cli
-
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(x)  pushfq; popq %rax
@@ -119,8 +116,6 @@ static __always_inline unsigned long 
arch_local_irq_save(void)
 
 #define INTERRUPT_RETURN   jmp native_iret
 
-#else
-#define INTERRUPT_RETURN   iret
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8c354099d9c3..c6496a82fad1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -721,6 +721,7 @@ extern void default_banner(void);
.if ((~(set)) & mask); pop %reg; .endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT_XXL
 
 #define PV_SAVE_REGS(set)  \
COND_PUSH(set, CLBR_RAX, rax);  \
@@ -746,46 +747,12 @@ extern void default_banner(void);
 #define PARA_PATCH(off)((off) / 8)
 #define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .quad, 8)
 #define PARA_INDIRECT(addr)*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)  \
-   COND_PUSH(set, CLBR_EAX, eax);  \
-   COND_PUSH(set, CLBR_EDI, edi);  \
-   COND_PUSH(set, CLBR_ECX, ecx);  \
-   COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)   \
-   COND_POP(set, CLBR_EDX, edx);   \
-   COND_POP(set, CLBR_ECX, ecx);   \
-   COND_POP(set, CLBR_EDI, edi);   \
-   COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off)((off) / 4)
-#define PARA_SITE(ptype, ops)  _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr)*%cs:addr
-#endif
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define INTERRUPT_RETURN   \
PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
  ANNOTATE_RETPOLINE_SAFE;  \
  jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
 
-#define DISABLE_INTERRUPTS(clobbers)   \
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable),   \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable);\
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)\
-   PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable),\
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);\
- ANNOTATE_RETPOLINE_SAFE;  \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
-   

[PATCH v5 09/12] x86/paravirt: switch iret pvops to ALTERNATIVE

2021-03-08 Thread Juergen Gross
The iret paravirt op is rather special as it is using a jmp instead
of a call instruction. Switch it to ALTERNATIVE.

Signed-off-by: Juergen Gross 
---
V3:
- use ALTERNATIVE_TERNARY
---
 arch/x86/include/asm/paravirt.h   |  6 +++---
 arch/x86/include/asm/paravirt_types.h |  5 +
 arch/x86/kernel/asm-offsets.c |  5 -
 arch/x86/kernel/paravirt.c| 26 ++
 arch/x86/xen/enlighten_pv.c   |  3 +--
 5 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c6496a82fad1..36cd71fa097f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -749,9 +749,9 @@ extern void default_banner(void);
 #define PARA_INDIRECT(addr)*addr(%rip)
 
 #define INTERRUPT_RETURN   \
-   PARA_SITE(PARA_PATCH(PV_CPU_iret),  \
- ANNOTATE_RETPOLINE_SAFE;  \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
+   ANNOTATE_RETPOLINE_SAFE;\
+   ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);",\
+   X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)\
diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 45bd21647dd8..0afdac83f926 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -151,10 +151,6 @@ struct pv_cpu_ops {
 
u64 (*read_pmc)(int counter);
 
-   /* Normal iret.  Jump to this with the standard iret stack
-  frame set up. */
-   void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -294,6 +290,7 @@ struct paravirt_patch_template {
 
 extern struct pv_info pv_info;
 extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
 
 #define PARAVIRT_PATCH(x)  \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 736508004b30..ecd3fd6993d1 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -61,11 +61,6 @@ static void __used common(void)
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
 #endif
 
-#ifdef CONFIG_PARAVIRT_XXL
-   BLANK();
-   OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
 #ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 44e5b0fe28cb..0553a339d850 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -86,25 +86,6 @@ u64 notrace _paravirt_ident_64(u64 x)
 {
return x;
 }
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
-  unsigned long addr, unsigned len)
-{
-   struct branch *b = insn_buff;
-   unsigned long delta = (unsigned long)target - (addr+5);
-
-   if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-   WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void 
*)addr);
-#endif
-   return len; /* call too long for patch site */
-   }
-
-   b->opcode = 0xe9;   /* jmp */
-   b->delta = delta;
-
-   return 5;
-}
 #endif
 
 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -136,9 +117,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insn_buff, len);
 
-   else if (type == PARAVIRT_PATCH(cpu.iret))
-   /* If operation requires a jmp, then jmp */
-   ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
 #endif
else
/* Otherwise call the function. */
@@ -316,8 +294,6 @@ struct paravirt_patch_template pv_ops = {
 
.cpu.load_sp0   = native_load_sp0,
 
-   .cpu.iret   = native_iret,
-
 #ifdef CONFIG_X86_IOPL_IOPERM
.cpu.invalidate_io_bitmap   = native_tss_invalidate_io_bitmap,
.cpu.update_io_bitmap   = native_tss_update_io_bitmap,
@@ -422,6 +398,8 @@ struct paravirt_patch_template pv_ops = {
 NOKPROBE_SYMBOL(native_get_debugreg);
 NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
+
+void (*paravirt_iret)(void) = native_iret;
 #endif
 
 EXPORT_SYMBOL(pv_ops);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index dc0a337f985b..08dca7bebb30 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1070,8 +1070,6 @@ stati

[PATCH v5 05/12] x86/alternative: support ALTERNATIVE_TERNARY

2021-03-08 Thread Juergen Gross
Add ALTERNATIVE_TERNARY support for replacing an initial instruction
with either of two instructions depending on a feature:

  ALTERNATIVE_TERNARY "default_instr", FEATURE_NR,
  "feature_on_instr", "feature_off_instr"

which will start with "default_instr" and at patch time will, depending
on FEATURE_NR being set or not, patch that with either
"feature_on_instr" or "feature_off_instr".

Signed-off-by: Juergen Gross 
---
V3:
- new patch
V4:
- use X86_FEATURE_ALWAYS instead of negated feature (Boris Petkov)
- unfortunately this isn't enough to get rid of the "not feature"
  support, as this is needed in the patch "x86/paravirt: switch
  functions with custom code to ALTERNATIVE", too
V5:
- carve out the "not feature" part
---
 arch/x86/include/asm/alternative-asm.h | 4 
 arch/x86/include/asm/alternative.h | 6 ++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
index 9a1763550217..ba0aad81d3bb 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -115,6 +115,10 @@
.popsection
 .endm
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, newinstr2)   \
+   ALTERNATIVE_2 oldinstr, newinstr2, X86_FEATURE_ALWAYS,  \
+   newinstr1, feature
+
 #endif  /*  __ASSEMBLY__  */
 
 #endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index b9749cf21ada..693991f8fe89 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -183,6 +183,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, newinstr2)   \
+   ALTERNATIVE_2(oldinstr, newinstr2, X86_FEATURE_ALWAYS, newinstr1, 
feature)
+
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3)
\
".pushsection .altinstructions,\"a\"\n" 
\
@@ -214,6 +217,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, 
newinstr2, feature2) ::: "memory")
 
+#define alternative_ternary(oldinstr, feature, newinstr1, newinstr2)   \
+   asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr1, 
newinstr2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
-- 
2.26.2



[PATCH v5 06/12] x86: add new features for paravirt patching

2021-03-08 Thread Juergen Gross
For being able to switch paravirt patching from special cased custom
code sequences to ALTERNATIVE handling some X86_FEATURE_* are needed
as new features. This enables to have the standard indirect pv call
as the default code and to patch that with the non-Xen custom code
sequence via ALTERNATIVE patching later.

Make sure paravirt patching is performed before alternative patching.

Signed-off-by: Juergen Gross 
---
V3:
- add comment (Boris Petkov)
- no negative features (Boris Petkov)
V4:
- move paravirt_set_cap() to paravirt-spinlocks.c
---
 arch/x86/include/asm/cpufeatures.h   |  2 ++
 arch/x86/include/asm/paravirt.h  | 10 ++
 arch/x86/kernel/alternative.c| 30 ++--
 arch/x86/kernel/paravirt-spinlocks.c |  9 +
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..b440c950246d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table 
access-dirty bit */
 #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports 
the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL( 8*32+19) /* "" VMware prefers 
VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK   ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT( 8*32+21) /* "" PV 
vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1e45b46fae84..8c354099d9c3 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -47,6 +47,10 @@ static inline u64 paravirt_steal_clock(int cpu)
return static_call(pv_steal_clock)(cpu);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void)
 {
@@ -811,5 +815,11 @@ static inline void paravirt_arch_exit_mmap(struct 
mm_struct *mm)
 {
 }
 #endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1296a90aa5b8..ab9ad729fc5a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 
 int __read_mostly alternatives_patched;
 
@@ -724,6 +725,33 @@ void __init alternative_instructions(void)
 * patching.
 */
 
+   /*
+* Paravirt patching and alternative patching can be combined to
+* replace a function call with a short direct code sequence (e.g.
+* by setting a constant return value instead of doing that in an
+* external function).
+* In order to make this work the following sequence is required:
+* 1. set (artificial) features depending on used paravirt
+*functions which can later influence alternative patching
+* 2. apply paravirt patching (generally replacing an indirect
+*function call with a direct one)
+* 3. apply alternative patching (e.g. replacing a direct function
+*call with a custom code sequence)
+* Doing paravirt patching after alternative patching would clobber
+* the optimization of the custom code with a function call again.
+*/
+   paravirt_set_cap();
+
+   /*
+* First patch paravirt functions, such that we overwrite the indirect
+* call with the direct call.
+*/
+   apply_paravirt(__parainstructions, __parainstructions_end);
+
+   /*
+* Then patch alternatives, such that those paravirt calls that are in
+* alternatives can be overwritten by their immediate fragments.
+*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 #ifdef CONFIG_SMP
@@ -742,8 +770,6 @@ void __init alternative_instructions(void)
}
 #endif
 
-   apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
 }
diff --git a/arch/x86/kernel/paravirt-spinlocks.c 
b/arch/x86/kernel/paravirt-spinlocks.c
index 4f75d0cf6305..9e1ea99ad9df 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void)
return pv_ops.lock.vcpu_is_preempted.func ==
__raw_callee_save___native_vcpu_is_preempted;
 }
+
+void __init paravirt_set_cap(void)
+{
+   if (!pv_is_native_spin_unloc

[PATCH v5 02/12] x86/paravirt: switch time pvops functions to use static_call()

2021-03-08 Thread Juergen Gross
The time pvops functions are the only ones left which might be
used in 32-bit mode and which return a 64-bit value.

Switch them to use the static_call() mechanism instead of pvops, as
this allows quite some simplification of the pvops implementation.

Signed-off-by: Juergen Gross 
---
V4:
- drop paravirt_time.h again
- don't move Hyper-V code (Michael Kelley)
V5:
- drop no longer needed Hyper-V modification (Michael Kelley)
- switch Arm and Arm64 to static_call(), too (kernel test robot)
---
 arch/arm/include/asm/paravirt.h   | 14 +-
 arch/arm/kernel/paravirt.c|  9 +++--
 arch/arm64/include/asm/paravirt.h | 14 +-
 arch/arm64/kernel/paravirt.c  | 13 +
 arch/x86/Kconfig  |  1 +
 arch/x86/include/asm/mshyperv.h   |  2 +-
 arch/x86/include/asm/paravirt.h   | 17 ++---
 arch/x86/include/asm/paravirt_types.h |  6 --
 arch/x86/kernel/cpu/vmware.c  |  5 +++--
 arch/x86/kernel/kvm.c |  2 +-
 arch/x86/kernel/kvmclock.c|  2 +-
 arch/x86/kernel/paravirt.c| 16 
 arch/x86/kernel/tsc.c |  2 +-
 arch/x86/xen/time.c   | 11 ---
 drivers/xen/time.c|  3 ++-
 15 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index cdbf02d9c1d4..95d5b0d625cd 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 #endif
 
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 4cfed91fe256..7dd9806369fb 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -9,10 +9,15 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
diff --git a/arch/arm64/include/asm/paravirt.h 
b/arch/arm64/include/asm/paravirt.h
index cf3a0fd7c1a7..9aa193e0e8f2 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
 #define _ASM_ARM64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include 
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-   unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
-   struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
 
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
 
 static inline u64 paravirt_steal_clock(int cpu)
 {
-   return pv_ops.time.steal_clock(cpu);
+   return static_call(pv_steal_clock)(cpu);
 }
 
 int __init pv_time_init(void);
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index c07d7a034941..75fed4460407 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -26,8 +27,12 @@
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+   return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 /* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
 {
struct pv_time_stolen_time_region *reg;
 
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
if (ret)
return ret;
 
-   pv_ops.time.steal_clock = pv_steal_clock;
+   static_call_update(pv_steal_clock, para_steal_clock);
 
static_key_slow_inc(_steal_enabled);
if (steal_acc)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2792879d398e..107acc403b3b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x

[PATCH v5 04/12] x86/alternative: support not-feature

2021-03-08 Thread Juergen Gross
Add support for alternative patching for the case a feature is not
present on the current cpu.

For this purpose add a flag byte to struct alt_instr adding the
information that the inverted feature should be used.

For users of ALTERNATIVE() and friends an inverted feature is specified
by negating it, e.g.:

ALTERNATIVE(old, new, ~feature)

This requires adapting the objtool information for struct alt_instr.

Signed-off-by: Juergen Gross 
---
V5:
- split off from next patch
- reworked to use flag byte (Boris Petkov)
---
 arch/x86/include/asm/alternative-asm.h| 6 ++
 arch/x86/include/asm/alternative.h| 8 
 arch/x86/include/asm/cpufeature.h | 2 ++
 arch/x86/kernel/alternative.c | 5 +++--
 tools/objtool/arch/x86/include/arch/special.h | 6 +++---
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/alternative-asm.h 
b/arch/x86/include/asm/alternative-asm.h
index 464034db299f..9a1763550217 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -39,7 +39,13 @@
 .macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
+   .iflt \feature
+   .word ~(\feature)
+   .byte 1
+   .else
.word \feature
+   .byte 0
+   .endif
.byte \orig_len
.byte \alt_len
.byte \pad_len
diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 5753fb2ac489..b9749cf21ada 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -59,6 +59,8 @@ struct alt_instr {
s32 instr_offset;   /* original instruction */
s32 repl_offset;/* offset to replacement instruction */
u16 cpuid;  /* cpuid bit set for replacement */
+   u8  flag;   /* flag byte */
+#define ALTINSTR_FLAG_INV  0x01
u8  instrlen;   /* length of original instruction */
u8  replacementlen; /* length of new instruction */
u8  padlen; /* length of build-time padding */
@@ -145,7 +147,13 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 #define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label   */ \
" .long " b_replacement(num)"f - .\n"   /* new instruction */ \
+   " .iflt " __stringify(feature) "\n" /* inverted?   */ \
+   " .word ~(" __stringify(feature) ")\n"  /* feature bit */ \
+   " .byte " __stringify(ALTINSTR_FLAG_INV) "\n"   /* flag byte   */ \
+   " .else\n"\
" .word " __stringify(feature) "\n" /* feature bit */ \
+   " .byte 0\n"/* flag byte   */ \
+   " .endif\n"   \
" .byte " alt_total_slen "\n"   /* source len  */ \
" .byte " alt_rlen(num) "\n"/* replacement len */ \
" .byte " alt_pad_len "\n"  /* pad len */
diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 1728d4ce5730..f060d3186ee4 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -184,6 +184,7 @@ static __always_inline bool _static_cpu_has(u16 bit)
 " .long 1b - .\n"  /* src offset */
 " .long 4f - .\n"  /* repl offset */
 " .word %P[always]\n"  /* always replace */
+" .byte 0\n"   /* flag byte */
 " .byte 3b - 1b\n" /* src len */
 " .byte 5f - 4f\n" /* repl len */
 " .byte 3b - 2b\n" /* pad len */
@@ -196,6 +197,7 @@ static __always_inline bool _static_cpu_has(u16 bit)
 " .long 1b - .\n"  /* src offset */
 " .long 0\n"   /* no replacement */
 " .word %P[feature]\n" /* feature bit */
+" .byte 0\n"   /* flag byte */
 " .byte 3b - 1b\n" /* src len */
 " .byte 0\n"   /* repl len */
 " .byte 0\n"   /* pad len */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d..1296a90aa5b8 100644
--- a/arc

[PATCH v5 01/12] staticcall: move struct static_call_key definition to static_call_types.h

2021-03-08 Thread Juergen Gross
Having the definition of static_call() in static_call_types.h makes
no sense as long struct static_call_key isn't defined there, as the
generic implementation of static_call() is referencing this structure.

So move the definition of struct static_call_key to static_call_types.h.

Signed-off-by: Juergen Gross 
---
V5:
- new patch
---
 include/linux/static_call.h | 18 --
 include/linux/static_call_types.h   | 18 ++
 tools/include/linux/static_call_types.h | 18 ++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 85ecc789f4ff..76b881259144 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -128,16 +128,6 @@ struct static_call_mod {
struct static_call_site *sites;
 };
 
-struct static_call_key {
-   void *func;
-   union {
-   /* bit 0: 0 = mods, 1 = sites */
-   unsigned long type;
-   struct static_call_mod *mods;
-   struct static_call_site *sites;
-   };
-};
-
 /* For finding the key associated with a trampoline */
 struct static_call_tramp_key {
s32 tramp;
@@ -187,10 +177,6 @@ extern long __static_call_return0(void);
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 #define __DEFINE_STATIC_CALL(name, _func, _func_init)  \
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
@@ -243,10 +229,6 @@ static inline long __static_call_return0(void)
 
 static inline int static_call_init(void) { return 0; }
 
-struct static_call_key {
-   void *func;
-};
-
 static inline long __static_call_return0(void)
 {
return 0;
diff --git a/include/linux/static_call_types.h 
b/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
diff --git a/tools/include/linux/static_call_types.h 
b/tools/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name);\
 })
 
+struct static_call_key {
+   void *func;
+   union {
+   /* bit 0: 0 = mods, 1 = sites */
+   unsigned long type;
+   struct static_call_mod *mods;
+   struct static_call_site *sites;
+   };
+};
+
 #else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #define __STATIC_CALL_ADDRESSABLE(name)
 #define __static_call(name)__raw_static_call(name)
 
+struct static_call_key {
+   void *func;
+};
+
 #endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
 
 #ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
 
 #else
 
+struct static_call_key {
+   void *func;
+};
+
 #define static_call(name)  \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
 
-- 
2.26.2



[PATCH v5 03/12] x86/alternative: drop feature parameter from ALTINSTR_REPLACEMENT()

2021-03-08 Thread Juergen Gross
The macro ALTINSTR_REPLACEMENT() doesn't make use of the feature
parameter, so drop it.

Signed-off-by: Juergen Gross 
---
V5:
- new patch
---
 arch/x86/include/asm/alternative.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 13adca37c99a..5753fb2ac489 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
" .byte " alt_rlen(num) "\n"/* replacement len */ \
" .byte " alt_pad_len "\n"  /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */   
\
+#define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */   
\
"# ALT: replacement " #num "\n" 
\
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
 
@@ -161,7 +161,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature, 1)  \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr, feature, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr, 1)   \
".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,8 +171,8 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n"  \
-   ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)\
-   ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)\
+   ALTINSTR_REPLACEMENT(newinstr1, 1)  \
+   ALTINSTR_REPLACEMENT(newinstr2, 2)  \
".popsection\n"
 
 #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, 
feat3) \
@@ -183,9 +183,9 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
ALTINSTR_ENTRY(feat3, 3)
\
".popsection\n" 
\
".pushsection .altinstr_replacement, \"ax\"\n"  
\
-   ALTINSTR_REPLACEMENT(newinsn1, feat1, 1)
\
-   ALTINSTR_REPLACEMENT(newinsn2, feat2, 2)
\
-   ALTINSTR_REPLACEMENT(newinsn3, feat3, 3)
\
+   ALTINSTR_REPLACEMENT(newinsn1, 1)   
\
+   ALTINSTR_REPLACEMENT(newinsn2, 2)   
\
+   ALTINSTR_REPLACEMENT(newinsn3, 3)   
\
".popsection\n"
 
 /*
-- 
2.26.2



[PATCH v4 0/3] xen/events: bug fixes and some diagnostic aids

2021-03-06 Thread Juergen Gross
Those are fixes for XSA-332.

The rest of the V3 patches have been applied already. There is one
additional fix in patch 2 which addresses network outages when a guest
is doing reboot loops.

Juergen Gross (3):
  xen/events: reset affinity of 2-level event when tearing it down
  xen/events: don't unmask an event channel when an eoi is pending
  xen/events: avoid handling the same event on two cpus at the same time

 drivers/xen/events/events_2l.c   |  22 +++--
 drivers/xen/events/events_base.c | 130 ---
 drivers/xen/events/events_fifo.c |   7 --
 drivers/xen/events/events_internal.h |  14 +--
 4 files changed, 123 insertions(+), 50 deletions(-)

-- 
2.26.2



[PATCH v4 2/3] xen/events: don't unmask an event channel when an eoi is pending

2021-03-06 Thread Juergen Gross
An event channel should be kept masked when an eoi is pending for it.
When being migrated to another cpu it might be unmasked, though.

In order to avoid this keep three different flags for each event channel
to be able to distinguish "normal" masking/unmasking from eoi related
masking/unmasking and temporary masking. The event channel should only
be able to generate an interrupt if all flags are cleared.

Cc: sta...@vger.kernel.org
Fixes: 54c9de89895e0a36047 ("xen/events: add a new late EOI evtchn framework")
Reported-by: Julien Grall 
Signed-off-by: Juergen Gross 
Reviewed-by: Julien Grall 
---
V2:
- introduce a lock around masking/unmasking
- merge patch 3 into this one (Jan Beulich)
V4:
- don't set eoi masking flag in lateeoi_mask_ack_dynirq()
---
 drivers/xen/events/events_2l.c   |   7 --
 drivers/xen/events/events_base.c | 101 +--
 drivers/xen/events/events_fifo.c |   7 --
 drivers/xen/events/events_internal.h |   6 --
 4 files changed, 80 insertions(+), 41 deletions(-)

diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index a7f413c5c190..b8f2f971c2f0 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -77,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_port_t port)
return sync_test_bit(port, BM(>evtchn_pending[0]));
 }
 
-static bool evtchn_2l_test_and_set_mask(evtchn_port_t port)
-{
-   struct shared_info *s = HYPERVISOR_shared_info;
-   return sync_test_and_set_bit(port, BM(>evtchn_mask[0]));
-}
-
 static void evtchn_2l_mask(evtchn_port_t port)
 {
struct shared_info *s = HYPERVISOR_shared_info;
@@ -376,7 +370,6 @@ static const struct evtchn_ops evtchn_ops_2l = {
.clear_pending = evtchn_2l_clear_pending,
.set_pending   = evtchn_2l_set_pending,
.is_pending= evtchn_2l_is_pending,
-   .test_and_set_mask = evtchn_2l_test_and_set_mask,
.mask  = evtchn_2l_mask,
.unmask= evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 7e23808892a7..b27c012c86b5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -98,13 +98,18 @@ struct irq_info {
short refcnt;
u8 spurious_cnt;
u8 is_accounted;
-   enum xen_irq_type type; /* type */
+   short type; /* type: IRQT_* */
+   u8 mask_reason; /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT   0x01
+#define EVT_MASK_REASON_TEMPORARY  0x02
+#define EVT_MASK_REASON_EOI_PENDING0x04
unsigned irq;
evtchn_port_t evtchn;   /* event channel */
unsigned short cpu; /* cpu bound */
unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time;   /* Time in jiffies when to EOI. */
+   spinlock_t lock;
 
union {
unsigned short virq;
@@ -154,6 +159,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
  *   evtchn_rwlock
  * IRQ-desc lock
  *   percpu eoi_list_lock
+ * irq_info->lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
@@ -304,6 +310,8 @@ static int xen_irq_info_common_setup(struct irq_info *info,
info->irq = irq;
info->evtchn = evtchn;
info->cpu = cpu;
+   info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+   spin_lock_init(>lock);
 
ret = set_evtchn_to_irq(evtchn, irq);
if (ret < 0)
@@ -459,6 +467,34 @@ unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
return ret;
 }
 
+static void do_mask(struct irq_info *info, u8 reason)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   if (!info->mask_reason)
+   mask_evtchn(info->evtchn);
+
+   info->mask_reason |= reason;
+
+   spin_unlock_irqrestore(>lock, flags);
+}
+
+static void do_unmask(struct irq_info *info, u8 reason)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   info->mask_reason &= ~reason;
+
+   if (!info->mask_reason)
+   unmask_evtchn(info->evtchn);
+
+   spin_unlock_irqrestore(>lock, flags);
+}
+
 #ifdef CONFIG_X86
 static bool pirq_check_eoi_map(unsigned irq)
 {
@@ -605,7 +641,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, 
bool spurious)
}
 
info->eoi_time = 0;
-   unmask_evtchn(evtchn);
+   do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 }
 
 static void xen_irq_lateeoi_worker(struct work_struct *work)
@@ -850,7 +886,8 @@ static unsigned int __startup_pirq(unsigned int irq)
goto err;
 
 out:
-   unmask_evtchn(evtchn);
+   do_unmask(info, EVT_MASK_REASON_EXPLICIT);
+
eoi_pirq(irq_get_irq_data(irq));
 
 

[PATCH v4 1/3] xen/events: reset affinity of 2-level event when tearing it down

2021-03-06 Thread Juergen Gross
When creating a new event channel with 2-level events the affinity
needs to be reset initially in order to avoid using an old affinity
from earlier usage of the event channel port. So when tearing an event
channel down reset all affinity bits.

The same applies to the affinity when onlining a vcpu: all old
affinity settings for this vcpu must be reset. As percpu events get
initialized before the percpu event channel hook is called,
resetting of the affinities happens after offlining a vcpu (this is
working, as initial percpu memory is zeroed out).

Cc: sta...@vger.kernel.org
Reported-by: Julien Grall 
Signed-off-by: Juergen Gross 
Reviewed-by: Julien Grall 
---
V2:
- reset affinity when tearing down the event (Julien Grall)
---
 drivers/xen/events/events_2l.c   | 15 +++
 drivers/xen/events/events_base.c |  1 +
 drivers/xen/events/events_internal.h |  8 
 3 files changed, 24 insertions(+)

diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index da87f3a1e351..a7f413c5c190 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void)
return EVTCHN_2L_NR_CHANNELS;
 }
 
+static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu)
+{
+   clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
 static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
  unsigned int old_cpu)
 {
@@ -355,9 +360,18 @@ static void evtchn_2l_resume(void)
EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
 }
 
+static int evtchn_2l_percpu_deinit(unsigned int cpu)
+{
+   memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) *
+   EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+
+   return 0;
+}
+
 static const struct evtchn_ops evtchn_ops_2l = {
.max_channels  = evtchn_2l_max_channels,
.nr_channels   = evtchn_2l_max_channels,
+   .remove= evtchn_2l_remove,
.bind_to_cpu   = evtchn_2l_bind_to_cpu,
.clear_pending = evtchn_2l_clear_pending,
.set_pending   = evtchn_2l_set_pending,
@@ -367,6 +381,7 @@ static const struct evtchn_ops evtchn_ops_2l = {
.unmask= evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
.resume= evtchn_2l_resume,
+   .percpu_deinit = evtchn_2l_percpu_deinit,
 };
 
 void __init xen_evtchn_2l_init(void)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index adb7260e94b2..7e23808892a7 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -377,6 +377,7 @@ static int xen_irq_info_pirq_setup(unsigned irq,
 static void xen_irq_info_cleanup(struct irq_info *info)
 {
set_evtchn_to_irq(info->evtchn, -1);
+   xen_evtchn_port_remove(info->evtchn, info->cpu);
info->evtchn = 0;
channels_on_cpu_dec(info);
 }
diff --git a/drivers/xen/events/events_internal.h 
b/drivers/xen/events/events_internal.h
index 0a97c0549db7..18a4090d0709 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -14,6 +14,7 @@ struct evtchn_ops {
unsigned (*nr_channels)(void);
 
int (*setup)(evtchn_port_t port);
+   void (*remove)(evtchn_port_t port, unsigned int cpu);
void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
unsigned int old_cpu);
 
@@ -54,6 +55,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
return 0;
 }
 
+static inline void xen_evtchn_port_remove(evtchn_port_t evtchn,
+ unsigned int cpu)
+{
+   if (evtchn_ops->remove)
+   evtchn_ops->remove(evtchn, cpu);
+}
+
 static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
   unsigned int cpu,
   unsigned int old_cpu)
-- 
2.26.2



[PATCH v4 3/3] xen/events: avoid handling the same event on two cpus at the same time

2021-03-06 Thread Juergen Gross
When changing the cpu affinity of an event it can happen today that
(with some unlucky timing) the same event will be handled on the old
and the new cpu at the same time.

Avoid that by adding an "event active" flag to the per-event data and
call the handler only if this flag isn't set.

Cc: sta...@vger.kernel.org
Reported-by: Julien Grall 
Signed-off-by: Juergen Gross 
Reviewed-by: Julien Grall 
---
V2:
- new patch
V3:
- use common helper for end of handler action (Julien Grall)
- move setting is_active to 0 for lateeoi (Boris Ostrovsky)
---
 drivers/xen/events/events_base.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b27c012c86b5..8236e2364eeb 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -103,6 +103,7 @@ struct irq_info {
 #define EVT_MASK_REASON_EXPLICIT   0x01
 #define EVT_MASK_REASON_TEMPORARY  0x02
 #define EVT_MASK_REASON_EOI_PENDING0x04
+   u8 is_active;   /* Is event just being handled? */
unsigned irq;
evtchn_port_t evtchn;   /* event channel */
unsigned short cpu; /* cpu bound */
@@ -810,6 +811,12 @@ static void xen_evtchn_close(evtchn_port_t port)
BUG();
 }
 
+static void event_handler_exit(struct irq_info *info)
+{
+   smp_store_release(>is_active, 0);
+   clear_evtchn(info->evtchn);
+}
+
 static void pirq_query_unmask(int irq)
 {
struct physdev_irq_status_query irq_status;
@@ -828,14 +835,15 @@ static void pirq_query_unmask(int irq)
 
 static void eoi_pirq(struct irq_data *data)
 {
-   evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+   struct irq_info *info = info_for_irq(data->irq);
+   evtchn_port_t evtchn = info ? info->evtchn : 0;
struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
int rc = 0;
 
if (!VALID_EVTCHN(evtchn))
return;
 
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 
if (pirq_needs_eoi(data->irq)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, );
@@ -1666,6 +1674,8 @@ void handle_irq_for_port(evtchn_port_t port, struct 
evtchn_loop_ctrl *ctrl)
}
 
info = info_for_irq(irq);
+   if (xchg_acquire(>is_active, 1))
+   return;
 
dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
if (dev)
@@ -1853,12 +1863,11 @@ static void disable_dynirq(struct irq_data *data)
 
 static void ack_dynirq(struct irq_data *data)
 {
-   evtchn_port_t evtchn = evtchn_from_irq(data->irq);
-
-   if (!VALID_EVTCHN(evtchn))
-   return;
+   struct irq_info *info = info_for_irq(data->irq);
+   evtchn_port_t evtchn = info ? info->evtchn : 0;
 
-   clear_evtchn(evtchn);
+   if (VALID_EVTCHN(evtchn))
+   event_handler_exit(info);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
@@ -1874,7 +1883,7 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
 
if (VALID_EVTCHN(evtchn)) {
do_mask(info, EVT_MASK_REASON_EOI_PENDING);
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
}
 }
 
@@ -1885,7 +1894,7 @@ static void lateeoi_mask_ack_dynirq(struct irq_data *data)
 
if (VALID_EVTCHN(evtchn)) {
do_mask(info, EVT_MASK_REASON_EXPLICIT);
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
}
 }
 
@@ -1998,10 +2007,11 @@ static void restore_cpu_ipis(unsigned int cpu)
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq)
 {
-   evtchn_port_t evtchn = evtchn_from_irq(irq);
+   struct irq_info *info = info_for_irq(irq);
+   evtchn_port_t evtchn = info ? info->evtchn : 0;
 
if (VALID_EVTCHN(evtchn))
-   clear_evtchn(evtchn);
+   event_handler_exit(info);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)
-- 
2.26.2



[tip: locking/core] locking/csd_lock: Prepare more CSD lock debugging

2021-03-06 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the locking/core branch of tip:

Commit-ID: de7b09ef658d637eed0584eaba30884e409aef31
Gitweb:
https://git.kernel.org/tip/de7b09ef658d637eed0584eaba30884e409aef31
Author:Juergen Gross 
AuthorDate:Mon, 01 Mar 2021 11:13:35 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:49:48 +01:00

locking/csd_lock: Prepare more CSD lock debugging

In order to be able to easily add more CSD lock debugging data to
struct call_function_data->csd move the call_single_data_t element
into a sub-structure.

Signed-off-by: Juergen Gross 
Signed-off-by: Ingo Molnar 
Link: https://lore.kernel.org/r/20210301101336.7797-3-jgr...@suse.com
---
 kernel/smp.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index d5f0b21..6d7e6db 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,8 +31,12 @@
 
 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 
+struct cfd_percpu {
+   call_single_data_t  csd;
+};
+
 struct call_function_data {
-   call_single_data_t  __percpu *csd;
+   struct cfd_percpu   __percpu *pcpu;
cpumask_var_t   cpumask;
cpumask_var_t   cpumask_ipi;
 };
@@ -55,8 +59,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
return -ENOMEM;
}
-   cfd->csd = alloc_percpu(call_single_data_t);
-   if (!cfd->csd) {
+   cfd->pcpu = alloc_percpu(struct cfd_percpu);
+   if (!cfd->pcpu) {
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
return -ENOMEM;
@@ -71,7 +75,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
 
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
-   free_percpu(cfd->csd);
+   free_percpu(cfd->pcpu);
return 0;
 }
 
@@ -694,7 +698,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
 
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
-   call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
+   call_single_data_t *csd = _cpu_ptr(cfd->pcpu, cpu)->csd;
 
if (cond_func && !cond_func(cpu, info))
continue;
@@ -719,7 +723,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd;
 
-   csd = per_cpu_ptr(cfd->csd, cpu);
+   csd = _cpu_ptr(cfd->pcpu, cpu)->csd;
csd_lock_wait(csd);
}
}


[tip: locking/core] locking/csd_lock: Add more data to CSD lock debugging

2021-03-06 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the locking/core branch of tip:

Commit-ID: a5aabace5fb8abf2adcfcf0fe54c089b20d71755
Gitweb:
https://git.kernel.org/tip/a5aabace5fb8abf2adcfcf0fe54c089b20d71755
Author:Juergen Gross 
AuthorDate:Mon, 01 Mar 2021 11:13:36 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:49:48 +01:00

locking/csd_lock: Add more data to CSD lock debugging

In order to help identifying problems with IPI handling and remote
function execution add some more data to IPI debugging code.

There have been multiple reports of CPUs looping long times (many
seconds) in smp_call_function_many() waiting for another CPU executing
a function like tlb flushing. Most of these reports have been for
cases where the kernel was running as a guest on top of KVM or Xen
(there are rumours of that happening under VMWare, too, and even on
bare metal).

Finding the root cause hasn't been successful yet, even after more than
2 years of chasing this bug by different developers.

Commit:

  35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout diagnostics")

tried to address this by adding some debug code and by issuing another
IPI when a hang was detected. This helped mitigating the problem
(the repeated IPI unlocks the hang), but the root cause is still unknown.

Current available data suggests that either an IPI wasn't sent when it
should have been, or that the IPI didn't result in the target CPU
executing the queued function (due to the IPI not reaching the CPU,
the IPI handler not being called, or the handler not seeing the queued
request).

Try to add more diagnostic data by introducing a global atomic counter
which is being incremented when doing critical operations (before and
after queueing a new request, when sending an IPI, and when dequeueing
a request). The counter value is stored in percpu variables which can
be printed out when a hang is detected.

The data of the last event (consisting of sequence counter, source
CPU, target CPU, and event type) is stored in a global variable. When
a new event is to be traced, the data of the last event is stored in
the event related percpu location and the global data is updated with
the new event's data. This allows to track two events in one data
location: one by the value of the event data (the event before the
current one), and one by the location itself (the current event).

A typical printout with a detected hang will look like this:

csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 53 ns for 
CPU#06 scf_handler_1+0x0/0x50(0xa2a881bb1410).
csd: CSD lock (#1) handling prior 
scf_handler_1+0x0/0x50(0xa2a8813823c0) request.
csd: cnt(8cc): -> dequeue (src cpu 0 == empty)
csd: cnt(8cd): ->0006 idle
csd: cnt(0003668): 0001->0006 queue
csd: cnt(0003669): 0001->0006 ipi
csd: cnt(0003e0f): 0007->000a queue
csd: cnt(0003e10): 0001-> ping
csd: cnt(0003e71): 0003-> ping
csd: cnt(0003e72): ->0006 gotipi
csd: cnt(0003e73): ->0006 handle
csd: cnt(0003e74): ->0006 dequeue (src cpu 0 == empty)
csd: cnt(0003e7f): 0004->0006 ping
csd: cnt(0003e80): 0001-> pinged
csd: cnt(0003eb2): 0005->0001 noipi
csd: cnt(0003eb3): 0001->0006 queue
csd: cnt(0003eb4): 0001->0006 noipi
csd: cnt now: 0003f00

The idea is to print only relevant entries. Those are all events which
are associated with the hang (so sender side events for the source CPU
of the hanging request, and receiver side events for the target CPU),
and the related events just before those (for adding data needed to
identify a possible race). Printing all available data would be
possible, but this would add large amounts of data printed on larger
configurations.

Signed-off-by: Juergen Gross 
[ Minor readability edits. Breaks col80 but is far more readable. ]
Signed-off-by: Ingo Molnar 
Tested-by: Paul E. McKenney 
Link: https://lore.kernel.org/r/20210301101336.7797-4-jgr...@suse.com
---
 Documentation/admin-guide/kernel-parameters.txt |   4 +-
 kernel/smp.c| 226 ++-
 2 files changed, 226 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 98dbffa..1fe9d38 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -789,6 +789,10 @@
printed to the console in case a hanging CPU is
detected, and that CPU is pinged again in order to try
to resolve the hang situation.
+   0: disable csdlock debugging (default)
+   1: enable basic csdlock debugging (minor impact)
+   

[tip: locking/core] locking/csd_lock: Add boot parameter for controlling CSD lock debugging

2021-03-06 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the locking/core branch of tip:

Commit-ID: 8d0968cc6b8ffd8496c2ebffdfdc801f949a85e5
Gitweb:
https://git.kernel.org/tip/8d0968cc6b8ffd8496c2ebffdfdc801f949a85e5
Author:Juergen Gross 
AuthorDate:Mon, 01 Mar 2021 11:13:34 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:49:48 +01:00

locking/csd_lock: Add boot parameter for controlling CSD lock debugging

Currently CSD lock debugging can be switched on and off via a kernel
config option only. Unfortunately there is at least one problem with
CSD lock handling pending for about 2 years now, which has been seen
in different environments (mostly when running virtualized under KVM
or Xen, at least once on bare metal). Multiple attempts to catch this
issue have finally led to introduction of CSD lock debug code, but
this code is not in use in most distros as it has some impact on
performance.

In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG
enabled even for production use, add a boot parameter for switching
the debug functionality on. This will reduce any performance impact
of the debug coding to a bare minimum when not being used.

Signed-off-by: Juergen Gross 
[ Minor edits. ]
Signed-off-by: Ingo Molnar 
Link: https://lore.kernel.org/r/20210301101336.7797-2-jgr...@suse.com
---
 Documentation/admin-guide/kernel-parameters.txt |  6 +++-
 kernel/smp.c| 38 ++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0454572..98dbffa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -784,6 +784,12 @@
cs89x0_media=   [HW,NET]
Format: { rj45 | aui | bnc }
 
+   csdlock_debug=  [KNL] Enable debug add-ons of cross-CPU function call
+   handling. When switched on, additional debug data is
+   printed to the console in case a hanging CPU is
+   detected, and that CPU is pinged again in order to try
+   to resolve the hang situation.
+
dasd=   [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
 
diff --git a/kernel/smp.c b/kernel/smp.c
index aeb0adf..d5f0b21 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 #include "sched/smp.h"
@@ -102,6 +103,20 @@ void __init call_function_init(void)
 
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
 
+static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
+
+static int __init csdlock_debug(char *str)
+{
+   unsigned int val = 0;
+
+   get_option(, );
+   if (val)
+   static_branch_enable(_debug_enabled);
+
+   return 0;
+}
+early_param("csdlock_debug", csdlock_debug);
+
 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 
 /* Record current CSD work for current CPU, NULL to erase. */
-static void csd_lock_record(call_single_data_t *csd)
+static void __csd_lock_record(call_single_data_t *csd)
 {
if (!csd) {
smp_mb(); /* NULL cur_csd after unlock. */
@@ -125,7 +140,13 @@ static void csd_lock_record(call_single_data_t *csd)
  /* Or before unlock, as the case may be. */
 }
 
-static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
+static __always_inline void csd_lock_record(call_single_data_t *csd)
+{
+   if (static_branch_unlikely(_debug_enabled))
+   __csd_lock_record(csd);
+}
+
+static int csd_lock_wait_getcpu(call_single_data_t *csd)
 {
unsigned int csd_type;
 
@@ -140,7 +161,7 @@ static __always_inline int 
csd_lock_wait_getcpu(call_single_data_t *csd)
  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
  * so waiting on other types gets much less information.
  */
-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 
ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, 
int *bug_id)
 {
int cpu = -1;
int cpux;
@@ -204,7 +225,7 @@ static __always_inline bool 
csd_lock_wait_toolong(call_single_data_t *csd, u64 t
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static void __csd_lock_wait(call_single_data_t *csd)
 {
int bug_id = 0;
u64 ts0, ts1;
@@ -218,6 +239,15 @@ static __always_inline void 
csd_lock_wait(call_single_data_t *csd)
 

[GIT PULL] xen: branch for v5.12-rc2

2021-03-04 Thread Juergen Gross
Linus,

Please git pull the following tag:

 git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git 
for-linus-5.12b-rc2-tag

xen: branch for v5.12-rc2

It contains fixes for 2 security issues (XSA-367 and XSA-369).


Thanks.

Juergen

 arch/arm/xen/p2m.c| 35 ++---
 arch/x86/include/asm/xen/page.h   | 12 +
 arch/x86/xen/p2m.c| 54 ++-
 arch/x86/xen/setup.c  | 25 +++---
 drivers/net/xen-netback/netback.c | 12 -
 5 files changed, 104 insertions(+), 34 deletions(-)

Jan Beulich (2):
  Xen/gnttab: handle p2m update errors on a per-slot basis
  xen-netback: respect gnttab_map_refs()'s return value

Juergen Gross (1):
  xen: fix p2m size in dom0 for disabled memory hotplug case


[PATCH v3 3/4] kernel/smp: add more data to CSD lock debugging

2021-03-02 Thread Juergen Gross
In order to help identifying problems with IPI handling and remote
function execution add some more data to IPI debugging code.

There have been multiple reports of cpus looping long times (many
seconds) in smp_call_function_many() waiting for another cpu executing
a function like tlb flushing. Most of these reports have been for
cases where the kernel was running as a guest on top of KVM or Xen
(there are rumours of that happening under VMWare, too, and even on
bare metal).

Finding the root cause hasn't been successful yet, even after more than
2 years of chasing this bug by different developers.

Commit 35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout
diagnostics") tried to address this by adding some debug code and by
issuing another IPI when a hang was detected. This helped mitigating
the problem (the repeated IPI unlocks the hang), but the root cause is
still unknown.

Current available data suggests that either an IPI wasn't sent when it
should have been, or that the IPI didn't result in the target cpu
executing the queued function (due to the IPI not reaching the cpu,
the IPI handler not being called, or the handler not seeing the queued
request).

Try to add more diagnostic data by introducing a global atomic counter
which is being incremented when doing critical operations (before and
after queueing a new request, when sending an IPI, and when dequeueing
a request). The counter value is stored in percpu variables which can
be printed out when a hang is detected.

The data of the last event (consisting of sequence counter, source
cpu, target cpu, and event type) is stored in a global variable. When
a new event is to be traced, the data of the last event is stored in
the event related percpu location and the global data is updated with
the new event's data. This allows to track two events in one data
location: one by the value of the event data (the event before the
current one), and one by the location itself (the current event).

A typical printout with a detected hang will look like this:

csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 53 ns for 
CPU#06 scf_handler_1+0x0/0x50(0xa2a881bb1410).
csd: CSD lock (#1) handling prior 
scf_handler_1+0x0/0x50(0xa2a8813823c0) request.
csd: cnt(8cc): -> dequeue (src cpu 0 == empty)
csd: cnt(8cd): ->0006 idle
csd: cnt(0003668): 0001->0006 queue
csd: cnt(0003669): 0001->0006 ipi
csd: cnt(0003e0f): 0007->000a queue
csd: cnt(0003e10): 0001-> ping
csd: cnt(0003e71): 0003-> ping
csd: cnt(0003e72): ->0006 gotipi
csd: cnt(0003e73): ->0006 handle
csd: cnt(0003e74): ->0006 dequeue (src cpu 0 == empty)
csd: cnt(0003e7f): 0004->0006 ping
csd: cnt(0003e80): 0001-> pinged
csd: cnt(0003eb2): 0005->0001 noipi
csd: cnt(0003eb3): 0001->0006 queue
csd: cnt(0003eb4): 0001->0006 noipi
csd: cnt now: 0003f00

This example (being an artificial one, produced with a previous version
of this patch without the "hdlend" event), shows that cpu#6 started to
handle an IPI (cnt 3e72-3e74), bit didn't start to handle another IPI
(sent by cpu#4, cnt 3e7f). The next request from cpu#1 for cpu#6 was
queued (3eb3), but no IPI was needed (cnt 3eb4, there was the event
from cpu#4 in the queue already).

The idea is to print only relevant entries. Those are all events which
are associated with the hang (so sender side events for the source cpu
of the hanging request, and receiver side events for the target cpu),
and the related events just before those (for adding data needed to
identify a possible race). Printing all available data would be
possible, but this would add large amounts of data printed on larger
configurations.

Signed-off-by: Juergen Gross 
Tested-by: Paul E. McKenney 
---
V2:
- add automatic data deciphering and sorting of entries
- add new trace point for leaving flush_smp_call_function_queue()
- add information when finding an empty call_single_queue
V3:
- move new code to generic_exec_single() (Peter Zijlstra)
---
 .../admin-guide/kernel-parameters.txt |   4 +
 kernel/smp.c  | 228 +-
 2 files changed, 228 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 31dbf7b2f0e8..80c72f8e780d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -789,6 +789,10 @@
printed to the console in case a hanging cpu is
detected and that cpu is pinged again in order to try
to resolve the hang situation.
+   0: disable csdlock debugging (default)
+   1: enable basic csdlock debugging (minor impac

[PATCH v3 0/4] kernel/smp.c: add more CSD lock debugging

2021-03-02 Thread Juergen Gross
This patch series was created to help catching a rather long standing
problem with smp_call_function_any() and friends.

Very rarely a remote cpu seems not to execute a queued function and
the cpu queueing that function request will wait forever for the
CSD lock to be released by the remote cpu.

This problem has been observed primarily when running as a guest on
top of KVM or Xen, but there are reports of the same pattern for the
bare metal case, too. It seems to exist since about 2 years now, and
there is not much data available.

What is known up to now is that resending an IPI to the remote cpu is
helping.

The patches are adding more debug data being printed in a hang
situation using a kernel with CONFIG_CSD_LOCK_WAIT_DEBUG configured.
Additionally the debug coding can be controlled via a new parameter
in order to make it easier to use such a kernel in a production
environment without too much negative performance impact. Per default
the debugging additions will be switched off and they can be activated
via the new boot parameter:

csdlock_debug=1 will switch on the basic debugging and IPI resend
csdlock_debug=ext will add additional data printed out in a hang
  situation, but this option will have a larger impact on performance.

I hope that the "ext" setting will help to find the root cause of the
problem.

Juergen Gross (4):
  kernel/smp: add boot parameter for controlling CSD lock debugging
  kernel/smp: prepare more CSD lock debugging
  kernel/smp: add more data to CSD lock debugging
  kernel/smp: fix flush_smp_call_function_queue() cpu offline detection

 .../admin-guide/kernel-parameters.txt |  10 +
 kernel/smp.c  | 280 +-
 2 files changed, 277 insertions(+), 13 deletions(-)

-- 
2.26.2



[PATCH v3 2/4] kernel/smp: prepare more CSD lock debugging

2021-03-02 Thread Juergen Gross
In order to be able to easily add more CSD lock debugging data to
struct call_function_data->csd move the call_single_data_t element
into a sub-structure.

Signed-off-by: Juergen Gross 
---
 kernel/smp.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index d5f0b21ab55e..6d7e6dbe33dc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,8 +31,12 @@
 
 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 
+struct cfd_percpu {
+   call_single_data_t  csd;
+};
+
 struct call_function_data {
-   call_single_data_t  __percpu *csd;
+   struct cfd_percpu   __percpu *pcpu;
cpumask_var_t   cpumask;
cpumask_var_t   cpumask_ipi;
 };
@@ -55,8 +59,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
return -ENOMEM;
}
-   cfd->csd = alloc_percpu(call_single_data_t);
-   if (!cfd->csd) {
+   cfd->pcpu = alloc_percpu(struct cfd_percpu);
+   if (!cfd->pcpu) {
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
return -ENOMEM;
@@ -71,7 +75,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
 
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
-   free_percpu(cfd->csd);
+   free_percpu(cfd->pcpu);
return 0;
 }
 
@@ -694,7 +698,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
 
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
-   call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
+   call_single_data_t *csd = _cpu_ptr(cfd->pcpu, cpu)->csd;
 
if (cond_func && !cond_func(cpu, info))
continue;
@@ -719,7 +723,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd;
 
-   csd = per_cpu_ptr(cfd->csd, cpu);
+   csd = _cpu_ptr(cfd->pcpu, cpu)->csd;
csd_lock_wait(csd);
}
}
-- 
2.26.2



[PATCH v3 1/4] kernel/smp: add boot parameter for controlling CSD lock debugging

2021-03-02 Thread Juergen Gross
Currently CSD lock debugging can be switched on and off via a kernel
config option only. Unfortunately there is at least one problem with
CSD lock handling pending for about 2 years now, which has been seen
in different environments (mostly when running virtualized under KVM
or Xen, at least once on bare metal). Multiple attempts to catch this
issue have finally led to introduction of CSD lock debug code, but
this code is not in use in most distros as it has some impact on
performance.

In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG
enabled even for production use, add a boot parameter for switching
the debug functionality on. This will reduce any performance impact
of the debug coding to a bare minimum when not being used.

Signed-off-by: Juergen Gross 
---
 .../admin-guide/kernel-parameters.txt |  6 +++
 kernel/smp.c  | 38 +--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..31dbf7b2f0e8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -784,6 +784,12 @@
cs89x0_media=   [HW,NET]
Format: { rj45 | aui | bnc }
 
+   csdlock_debug=  [KNL] Enable debug add-ons of cross-cpu function call
+   handling. When switched on additional debug data is
+   printed to the console in case a hanging cpu is
+   detected and that cpu is pinged again in order to try
+   to resolve the hang situation.
+
dasd=   [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
 
diff --git a/kernel/smp.c b/kernel/smp.c
index aeb0adfa0606..d5f0b21ab55e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 #include "sched/smp.h"
@@ -102,6 +103,20 @@ void __init call_function_init(void)
 
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
 
+static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
+
+static int __init csdlock_debug(char *str)
+{
+   unsigned int val = 0;
+
+   get_option(, );
+   if (val)
+   static_branch_enable(_debug_enabled);
+
+   return 0;
+}
+early_param("csdlock_debug", csdlock_debug);
+
 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 
 /* Record current CSD work for current CPU, NULL to erase. */
-static void csd_lock_record(call_single_data_t *csd)
+static void __csd_lock_record(call_single_data_t *csd)
 {
if (!csd) {
smp_mb(); /* NULL cur_csd after unlock. */
@@ -125,7 +140,13 @@ static void csd_lock_record(call_single_data_t *csd)
  /* Or before unlock, as the case may be. */
 }
 
-static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
+static __always_inline void csd_lock_record(call_single_data_t *csd)
+{
+   if (static_branch_unlikely(_debug_enabled))
+   __csd_lock_record(csd);
+}
+
+static int csd_lock_wait_getcpu(call_single_data_t *csd)
 {
unsigned int csd_type;
 
@@ -140,7 +161,7 @@ static __always_inline int 
csd_lock_wait_getcpu(call_single_data_t *csd)
  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
  * so waiting on other types gets much less information.
  */
-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 
ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, 
int *bug_id)
 {
int cpu = -1;
int cpux;
@@ -204,7 +225,7 @@ static __always_inline bool 
csd_lock_wait_toolong(call_single_data_t *csd, u64 t
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static void __csd_lock_wait(call_single_data_t *csd)
 {
int bug_id = 0;
u64 ts0, ts1;
@@ -218,6 +239,15 @@ static __always_inline void 
csd_lock_wait(call_single_data_t *csd)
smp_acquire__after_ctrl_dep();
 }
 
+static __always_inline void csd_lock_wait(call_single_data_t *csd)
+{
+   if (static_branch_unlikely(_debug_enabled)) {
+   __csd_lock_wait(csd);
+   return;
+   }
+
+   smp_cond_load_acquire(>node.u_flags, !(VAL & CSD_FLAG_LOCK));
+}
 #else
 static void csd_lock_record(call_single_data_t *csd)
 {
-- 
2.26.2



[PATCH v3 4/4] kernel/smp: fix flush_smp_call_function_queue() cpu offline detection

2021-03-02 Thread Juergen Gross
The warnings for flushing a logically offline cpu's call_single_queue
are gated by a wrong if statement. It should trigger when there have
been new requests before dequeueing them, not afterwards.

Signed-off-by: Juergen Gross 
---
V3:
- new patch
---
 kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 1a96691dbf7f..b3077c327b0a 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -581,7 +581,7 @@ static void flush_smp_call_function_queue(bool 
warn_cpu_offline)
 
/* There shouldn't be any pending callbacks on an offline CPU. */
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
-!warned && !llist_empty(head))) {
+!warned && entry)) {
warned = true;
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
 
-- 
2.26.2



[tip: locking/core] locking/csd_lock: Add boot parameter for controlling CSD lock debugging

2021-03-01 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the locking/core branch of tip:

Commit-ID: 4b816578c16b92b68fb9842dcec0bc2fdc2b36d8
Gitweb:
https://git.kernel.org/tip/4b816578c16b92b68fb9842dcec0bc2fdc2b36d8
Author:Juergen Gross 
AuthorDate:Mon, 01 Mar 2021 11:13:34 +01:00
Committer: Ingo Molnar 
CommitterDate: Mon, 01 Mar 2021 14:27:58 +01:00

locking/csd_lock: Add boot parameter for controlling CSD lock debugging

Currently CSD lock debugging can be switched on and off via a kernel
config option only. Unfortunately there is at least one problem with
CSD lock handling pending for about 2 years now, which has been seen
in different environments (mostly when running virtualized under KVM
or Xen, at least once on bare metal). Multiple attempts to catch this
issue have finally led to introduction of CSD lock debug code, but
this code is not in use in most distros as it has some impact on
performance.

In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG
enabled even for production use, add a boot parameter for switching
the debug functionality on. This will reduce any performance impact
of the debug coding to a bare minimum when not being used.

Signed-off-by: Juergen Gross 
[ Minor edits. ]
Signed-off-by: Ingo Molnar 
Link: https://lore.kernel.org/r/20210301101336.7797-2-jgr...@suse.com
---
 Documentation/admin-guide/kernel-parameters.txt |  6 +++-
 kernel/smp.c| 38 ++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0454572..98dbffa 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -784,6 +784,12 @@
cs89x0_media=   [HW,NET]
Format: { rj45 | aui | bnc }
 
+   csdlock_debug=  [KNL] Enable debug add-ons of cross-CPU function call
+   handling. When switched on, additional debug data is
+   printed to the console in case a hanging CPU is
+   detected, and that CPU is pinged again in order to try
+   to resolve the hang situation.
+
dasd=   [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
 
diff --git a/kernel/smp.c b/kernel/smp.c
index aeb0adf..d5f0b21 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 #include "sched/smp.h"
@@ -102,6 +103,20 @@ void __init call_function_init(void)
 
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
 
+static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
+
+static int __init csdlock_debug(char *str)
+{
+   unsigned int val = 0;
+
+   get_option(, );
+   if (val)
+   static_branch_enable(_debug_enabled);
+
+   return 0;
+}
+early_param("csdlock_debug", csdlock_debug);
+
 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
 static DEFINE_PER_CPU(void *, cur_csd_info);
@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
 static atomic_t csd_bug_count = ATOMIC_INIT(0);
 
 /* Record current CSD work for current CPU, NULL to erase. */
-static void csd_lock_record(call_single_data_t *csd)
+static void __csd_lock_record(call_single_data_t *csd)
 {
if (!csd) {
smp_mb(); /* NULL cur_csd after unlock. */
@@ -125,7 +140,13 @@ static void csd_lock_record(call_single_data_t *csd)
  /* Or before unlock, as the case may be. */
 }
 
-static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
+static __always_inline void csd_lock_record(call_single_data_t *csd)
+{
+   if (static_branch_unlikely(_debug_enabled))
+   __csd_lock_record(csd);
+}
+
+static int csd_lock_wait_getcpu(call_single_data_t *csd)
 {
unsigned int csd_type;
 
@@ -140,7 +161,7 @@ static __always_inline int 
csd_lock_wait_getcpu(call_single_data_t *csd)
  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
  * so waiting on other types gets much less information.
  */
-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 
ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, 
int *bug_id)
 {
int cpu = -1;
int cpux;
@@ -204,7 +225,7 @@ static __always_inline bool 
csd_lock_wait_toolong(call_single_data_t *csd, u64 t
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static void __csd_lock_wait(call_single_data_t *csd)
 {
int bug_id = 0;
u64 ts0, ts1;
@@ -218,6 +239,15 @@ static __always_inline void 
csd_lock_wait(call_single_data_t *csd)
 

[tip: locking/core] locking/csd_lock: Add more data to CSD lock debugging

2021-03-01 Thread tip-bot2 for Juergen Gross
The following commit has been merged into the locking/core branch of tip:

Commit-ID: 6bf3195fdbab92b57f3167101a0b651b93dbeae7
Gitweb:
https://git.kernel.org/tip/6bf3195fdbab92b57f3167101a0b651b93dbeae7
Author:Juergen Gross 
AuthorDate:Mon, 01 Mar 2021 11:13:36 +01:00
Committer: Ingo Molnar 
CommitterDate: Mon, 01 Mar 2021 14:27:59 +01:00

locking/csd_lock: Add more data to CSD lock debugging

In order to help identifying problems with IPI handling and remote
function execution add some more data to IPI debugging code.

There have been multiple reports of CPUs looping long times (many
seconds) in smp_call_function_many() waiting for another CPU executing
a function like tlb flushing. Most of these reports have been for
cases where the kernel was running as a guest on top of KVM or Xen
(there are rumours of that happening under VMWare, too, and even on
bare metal).

Finding the root cause hasn't been successful yet, even after more than
2 years of chasing this bug by different developers.

Commit:

  35feb60474bf4f7 ("kernel/smp: Provide CSD lock timeout diagnostics")

tried to address this by adding some debug code and by issuing another
IPI when a hang was detected. This helped mitigating the problem
(the repeated IPI unlocks the hang), but the root cause is still unknown.

Current available data suggests that either an IPI wasn't sent when it
should have been, or that the IPI didn't result in the target CPU
executing the queued function (due to the IPI not reaching the CPU,
the IPI handler not being called, or the handler not seeing the queued
request).

Try to add more diagnostic data by introducing a global atomic counter
which is being incremented when doing critical operations (before and
after queueing a new request, when sending an IPI, and when dequeueing
a request). The counter value is stored in percpu variables which can
be printed out when a hang is detected.

The data of the last event (consisting of sequence counter, source
CPU, target CPU, and event type) is stored in a global variable. When
a new event is to be traced, the data of the last event is stored in
the event related percpu location and the global data is updated with
the new event's data. This allows to track two events in one data
location: one by the value of the event data (the event before the
current one), and one by the location itself (the current event).

A typical printout with a detected hang will look like this:

csd: Detected non-responsive CSD lock (#1) on CPU#1, waiting 53 ns for 
CPU#06 scf_handler_1+0x0/0x50(0xa2a881bb1410).
csd: CSD lock (#1) handling prior 
scf_handler_1+0x0/0x50(0xa2a8813823c0) request.
csd: cnt(8cc): -> dequeue (src cpu 0 == empty)
csd: cnt(8cd): ->0006 idle
csd: cnt(0003668): 0001->0006 queue
csd: cnt(0003669): 0001->0006 ipi
csd: cnt(0003e0f): 0007->000a queue
csd: cnt(0003e10): 0001-> ping
csd: cnt(0003e71): 0003-> ping
csd: cnt(0003e72): ->0006 gotipi
csd: cnt(0003e73): ->0006 handle
csd: cnt(0003e74): ->0006 dequeue (src cpu 0 == empty)
csd: cnt(0003e7f): 0004->0006 ping
csd: cnt(0003e80): 0001-> pinged
csd: cnt(0003eb2): 0005->0001 noipi
csd: cnt(0003eb3): 0001->0006 queue
csd: cnt(0003eb4): 0001->0006 noipi
csd: cnt now: 0003f00

The idea is to print only relevant entries. Those are all events which
are associated with the hang (so sender side events for the source CPU
of the hanging request, and receiver side events for the target CPU),
and the related events just before those (for adding data needed to
identify a possible race). Printing all available data would be
possible, but this would add large amounts of data printed on larger
configurations.

Signed-off-by: Juergen Gross 
[ Minor readability edits. Breaks col80 but is far more readable. ]
Signed-off-by: Ingo Molnar 
Tested-by: Paul E. McKenney 
Link: https://lore.kernel.org/r/20210301101336.7797-4-jgr...@suse.com
---
 Documentation/admin-guide/kernel-parameters.txt |   4 +-
 kernel/smp.c| 226 ++-
 2 files changed, 226 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 98dbffa..1fe9d38 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -789,6 +789,10 @@
printed to the console in case a hanging CPU is
detected, and that CPU is pinged again in order to try
to resolve the hang situation.
+   0: disable csdlock debugging (default)
+   1: enable basic csdlock debugging (minor impact)
+   

  1   2   3   4   5   6   7   8   9   10   >