[Xen-devel] [PATCH v2 9/7] x86/XPTI: reduce .text.entry
This exposes less code pieces and at the same time reduces the range covered from slightly above 3 pages to a little below 2 of them. The code being moved is entirely unchanged, except for the removal of trailing blanks and a pointless q suffix from "retq". A few more small pieces could be moved, but it seems better to me to leave them where they are to not make it overly hard to follow code paths. Signed-off-by: Jan Beulich--- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -13,8 +13,6 @@ #include #include -.section .text.entry, "ax", @progbits - ENTRY(entry_int82) ASM_CLAC pushq $0 @@ -199,6 +197,8 @@ ENTRY(compat_post_handle_exception) movb $0,TRAPBOUNCE_flags(%rdx) jmp compat_test_all_events +.section .text.entry, "ax", @progbits + /* See lstar_enter for entry register state. */ ENTRY(cstar_enter) ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI @@ -256,6 +256,8 @@ UNLIKELY_END(compat_syscall_gpf) movb %cl,TRAPBOUNCE_flags(%rdx) jmp .Lcompat_bounce_exception +.text + ENTRY(compat_sysenter) CR4_PV32_RESTORE movq VCPU_trap_ctxt(%rbx),%rcx @@ -275,9 +277,6 @@ ENTRY(compat_int80_direct_trap) call compat_create_bounce_frame jmp compat_test_all_events -/* compat_create_bounce_frame & helpers don't need to be in .text.entry */ -.text - /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:*/ /* {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]} */ /* %rdx: trap_bounce, %rbx: struct vcpu */ --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -14,8 +14,6 @@ #include #include -.section .text.entry, "ax", @progbits - /* %rbx: struct vcpu */ ENTRY(switch_to_kernel) leaq VCPU_trap_bounce(%rbx),%rdx @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel) movb %cl,TRAPBOUNCE_flags(%rdx) call create_bounce_frame andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) +/* %rbx: struct vcpu */ +test_all_events: +ASSERT_NOT_IN_ATOMIC +cli # tests must not race interrupts +/*test_softirqs:*/ +movl VCPU_processor(%rbx),%eax +shll $IRQSTAT_shift,%eax +leaq irq_stat+IRQSTAT_softirq_pending(%rip),%rcx +cmpl $0,(%rcx,%rax,1) +jne process_softirqs +testb $1,VCPU_mce_pending(%rbx) +jnz process_mce +.Ltest_guest_nmi: +testb $1,VCPU_nmi_pending(%rbx) +jnz process_nmi +test_guest_events: +movq VCPU_vcpu_info(%rbx),%rax +movzwl VCPUINFO_upcall_pending(%rax),%eax +decl %eax +cmpl $0xfe,%eax +jarestore_all_guest +/*process_guest_events:*/ +sti +leaq VCPU_trap_bounce(%rbx),%rdx +movq VCPU_event_addr(%rbx),%rax +movq %rax,TRAPBOUNCE_eip(%rdx) +movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +call create_bounce_frame jmp test_all_events +ALIGN +/* %rbx: struct vcpu */ +process_softirqs: +sti +call do_softirq +jmp test_all_events + +ALIGN +/* %rbx: struct vcpu */ +process_mce: +testb $1 << VCPU_TRAP_MCE,VCPU_async_exception_mask(%rbx) +jnz .Ltest_guest_nmi +sti +movb $0,VCPU_mce_pending(%rbx) +call set_guest_machinecheck_trapbounce +test %eax,%eax +jz test_all_events +movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the +movb %dl,VCPU_mce_old_mask(%rbx)# iret hypercall +orl $1 << VCPU_TRAP_MCE,%edx +movb %dl,VCPU_async_exception_mask(%rbx) +jmp process_trap + +ALIGN +/* %rbx: struct vcpu */ +process_nmi: +testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) +jnz test_guest_events +sti +movb $0,VCPU_nmi_pending(%rbx) +call set_guest_nmi_trapbounce +test %eax,%eax +jz test_all_events +movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the +movb %dl,VCPU_nmi_old_mask(%rbx)# iret hypercall +orl $1 << VCPU_TRAP_NMI,%edx +movb %dl,VCPU_async_exception_mask(%rbx) +/* FALLTHROUGH */ +process_trap: +leaq VCPU_trap_bounce(%rbx),%rdx +call create_bounce_frame +jmp test_all_events + +/* No special register assumptions. */ +ENTRY(ret_from_intr) +GET_CURRENT(bx) +testb $3,UREGS_cs(%rsp) +jzrestore_all_xen +movq VCPU_domain(%rbx),%rax +testb $1,DOMAIN_is_32bit_pv(%rax) +jztest_all_events +jmp compat_test_all_events + +/* Enable NMIs. No special register assumptions. Only %rax is not preserved. */ +ENTRY(enable_nmis) +movq %rsp, %rax /* Grab RSP before pushing */ + +/* Set up stack frame */
[Xen-devel] [PATCH v2 8/7] x86/XPTI: use %r12 to write zero into xen_cr3
Now that we zero all registers early on all entry paths, use that to avoid a couple of immediates here. Signed-off-by: Jan Beulich--- We may want to consider eliminating a few more $0 this way. But especially for byte ones I'm not sure it's worth it, due to the REX prefix the use of %r12 would incur. --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -223,7 +223,7 @@ ENTRY(cstar_enter) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx mov %rcx, %cr3 -movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lcstar_cr3_okay: sti .Lcstar_cr3_end: --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -185,7 +185,7 @@ ENTRY(lstar_enter) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx mov %rcx, %cr3 -movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Llstar_cr3_okay: sti .Llstar_cr3_end: @@ -295,7 +295,7 @@ GLOBAL(sysenter_eflags_saved) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx mov %rcx, %cr3 -movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lsyse_cr3_okay: sti .Lsyse_cr3_end: @@ -348,7 +348,7 @@ ENTRY(int80_direct_trap) mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) neg %rcx mov %rcx, %cr3 -movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) .Lint80_cr3_okay: sti .Lint80_cr3_end: @@ -538,10 +538,9 @@ ENTRY(common_interrupt) neg %rcx .Lintr_cr3_load: mov %rcx, %cr3 -xor %ecx, %ecx -mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) testb $3, UREGS_cs(%rsp) -cmovnz %rcx, %r15 +cmovnz %r12, %r15 .Lintr_cr3_okay: CR4_PV32_RESTORE @@ -586,10 +585,9 @@ GLOBAL(handle_exception) neg %rcx .Lxcpt_cr3_load: mov %rcx, %cr3 -xor %ecx, %ecx -mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) testb $3, UREGS_cs(%rsp) -cmovnz %rcx, %r15 +cmovnz %r12, %r15 .Lxcpt_cr3_okay: handle_exception_saved: @@ -828,7 +826,7 @@ handle_ist_exception: neg %rcx .List_cr3_load: mov %rcx, %cr3 -movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14) +mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) .List_cr3_okay: CR4_PV32_RESTORE ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v2 3/6] xen/x86: support per-domain flag for xpti
On 08/03/18 11:17, Jan Beulich wrote: On 02.03.18 at 09:14,wrote: >> --- a/xen/arch/x86/mm.c >> +++ b/xen/arch/x86/mm.c >> @@ -510,15 +510,19 @@ void make_cr3(struct vcpu *v, mfn_t mfn) >> >> void write_ptbase(struct vcpu *v) >> { >> -if ( this_cpu(root_pgt) && is_pv_vcpu(v) && !is_pv_32bit_vcpu(v) ) >> +if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti ) >> { >> get_cpu_info()->root_pgt_changed = true; >> +get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); >> asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); >> } >> else >> { >> get_cpu_info()->root_pgt_changed = false; >> +/* Make sure to clear xen_cr3 before pv_cr3; write_cr3() >> serializes. */ >> +get_cpu_info()->xen_cr3 = 0; >> write_cr3(v->arch.cr3); >> +get_cpu_info()->pv_cr3 = 0; >> } >> } > > I think you want to latch the return value of get_cpu_info() into a > local variable now. Yes. > >> @@ -707,6 +708,9 @@ int __init dom0_construct_pv(struct domain *d, >> cpu = p->processor; >> } >> >> +if ( !is_pv_32bit_domain(d) ) >> +xpti_domain_init(d); > > Perhaps better to omit the conditional here? Or otherwise use the > "compat32" local variable? I'll drop the conditional. > >> +static int parse_xpti(const char *s) > > __init Aah, of course. > >> +{ >> +int rc = 0; >> + >> +switch ( parse_bool(s, NULL) ) >> +{ >> +case 0: >> +opt_xpti = XPTI_OFF; >> +break; >> +case 1: >> +opt_xpti = XPTI_ON; >> +break; >> +default: >> +if ( !strcmp(s, "default") ) > > This wants to also be mentioned in the command line doc. Uuh, this was a copy-and-paste result from my alternative XPTI approach. I'll just drop that value. > >> +opt_xpti = XPTI_DEFAULT; >> +else if ( !strcmp(s, "nodom0") ) >> +opt_xpti = XPTI_NODOM0; >> +else >> +rc = -EINVAL; >> +break; >> +} >> + >> +return rc; >> +} >> + >> +custom_param("xpti", parse_xpti); > > Please omit the blank line above here. Okay. > >> +void xpti_init(void) > > __init Yes. > >> +void xpti_domain_init(struct domain *d) >> +{ >> +if ( !is_pv_domain(d) || is_pv_32bit_domain(d) ) >> +return; >> + >> +switch ( opt_xpti ) >> +{ >> +case XPTI_OFF: >> +d->arch.pv_domain.xpti = false; >> +break; >> +case XPTI_ON: >> +d->arch.pv_domain.xpti = true; >> +break; >> +case XPTI_NODOM0: >> +d->arch.pv_domain.xpti = d->domain_id != 0 && >> + d->domain_id != hardware_domid; >> +break; >> +default: >> +ASSERT_UNREACHABLE(); >> +break; >> +} >> + >> +if ( d->arch.pv_domain.xpti ) >> +printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n", >> + d->domain_id); > > Please don't, even less so without XENLOG_G_*. And if you really, > really want this at, say, XENLOG_G_DEBUG, then Dom%d please. Okay, I'll drop that message. > >> --- a/xen/include/asm-x86/domain.h >> +++ b/xen/include/asm-x86/domain.h >> @@ -257,6 +257,9 @@ struct pv_domain >> struct mapcache_domain mapcache; >> >> struct cpuidmasks *cpuidmasks; >> + >> +/* XPTI active? */ >> +bool xpti; >> }; > > Is there really no 1 byte slot available elsewhere in the structure? > Like between nr_l4_pages and mapcache? I'll have a look. Juergen ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v2] x86: improve MSR_SHADOW_GS accesses
On 08/03/18 11:15, Jan Beulich wrote: > Instead of using RDMSR/WRMSR, on fsgsbase-capable systems use a double > SWAPGS combined with RDGSBASE/WRGSBASE. This halves execution time for > a shadow GS update alone on my Haswell (and we have indications of > good performance improvements by this on Skylake too), while the win is > even higher when e.g. updating more than one base (as may and commonly > will happen in load_segments()). > > Signed-off-by: Jan BeulichReviewed-by: Andrew Cooper ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v2] x86: improve MSR_SHADOW_GS accesses
Instead of using RDMSR/WRMSR, on fsgsbase-capable systems use a double SWAPGS combined with RDGSBASE/WRGSBASE. This halves execution time for a shadow GS update alone on my Haswell (and we have indications of good performance improvements by this on Skylake too), while the win is even higher when e.g. updating more than one base (as may and commonly will happen in load_segments()). Signed-off-by: Jan Beulich--- v2: Drop use of alternatives for now. --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1338,9 +1338,12 @@ static void load_segments(struct vcpu *n if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) ) wrfsbase(n->arch.pv_vcpu.fs_base); -/* Most kernels have non-zero GS base, so don't bother testing. */ -/* (This is also a serialising instruction, avoiding AMD erratum #88.) */ -wrmsrl(MSR_SHADOW_GS_BASE, n->arch.pv_vcpu.gs_base_kernel); +/* + * Most kernels have non-zero GS base, so don't bother testing. + * (For old AMD hardware this is also a serialising instruction, + * avoiding erratum #88.) + */ +wrgsshadow(n->arch.pv_vcpu.gs_base_kernel); /* This can only be non-zero if selector is NULL. */ if ( n->arch.pv_vcpu.gs_base_user | --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -503,12 +503,12 @@ static void vmx_save_guest_msrs(struct v * We cannot cache SHADOW_GS_BASE while the VCPU runs, as it can * be updated at any time via SWAPGS, which we cannot trap. */ -rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs); +v->arch.hvm_vmx.shadow_gs = rdgsshadow(); } static void vmx_restore_guest_msrs(struct vcpu *v) { -wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs); +wrgsshadow(v->arch.hvm_vmx.shadow_gs); wrmsrl(MSR_STAR, v->arch.hvm_vmx.star); wrmsrl(MSR_LSTAR, v->arch.hvm_vmx.lstar); wrmsrl(MSR_SYSCALL_MASK, v->arch.hvm_vmx.sfmask); @@ -2846,7 +2846,7 @@ static int long_mode_do_msr_read(unsigne break; case MSR_SHADOW_GS_BASE: -rdmsrl(MSR_SHADOW_GS_BASE, *msr_content); +*msr_content = rdgsshadow(); break; case MSR_STAR: @@ -3065,7 +3065,7 @@ static int long_mode_do_msr_write(unsign else if ( msr == MSR_GS_BASE ) __vmwrite(GUEST_GS_BASE, msr_content); else -wrmsrl(MSR_SHADOW_GS_BASE, msr_content); +wrgsshadow(msr_content); break; --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -1032,7 +1032,7 @@ static int write_msr(unsigned int reg, u case MSR_SHADOW_GS_BASE: if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) ) break; -wrmsrl(MSR_SHADOW_GS_BASE, val); +wrgsshadow(val); curr->arch.pv_vcpu.gs_base_user = val; return X86EMUL_OKAY; --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -1034,7 +1034,7 @@ long do_set_segment_base(unsigned int wh case SEGBASE_GS_USER: if ( is_canonical_address(base) ) { -wrmsrl(MSR_SHADOW_GS_BASE, base); +wrgsshadow(base); v->arch.pv_vcpu.gs_base_user = base; } else --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -49,7 +49,7 @@ static void read_registers(struct cpu_us regs->gs = read_sreg(gs); crs[5] = rdfsbase(); crs[6] = rdgsbase(); -rdmsrl(MSR_SHADOW_GS_BASE, crs[7]); +crs[7] = rdgsshadow(); } static void _show_registers( --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -170,6 +170,22 @@ static inline unsigned long rdgsbase(voi return base; } +static inline unsigned long rdgsshadow(void) +{ +unsigned long base; + +if ( cpu_has_fsgsbase ) +{ +asm volatile ( "swapgs" ); +base = __rdgsbase(); +asm volatile ( "swapgs" ); +} +else +rdmsrl(MSR_SHADOW_GS_BASE, base); + +return base; +} + static inline void wrfsbase(unsigned long base) { if ( cpu_has_fsgsbase ) @@ -194,6 +210,25 @@ static inline void wrgsbase(unsigned lon wrmsrl(MSR_GS_BASE, base); } +static inline void wrgsshadow(unsigned long base) +{ +if ( cpu_has_fsgsbase ) +{ +asm volatile ( "swapgs\n\t" +#ifdef HAVE_AS_FSGSBASE + "wrgsbase %0\n\t" + "swapgs" + :: "r" (base) ); +#else + ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8\n\t" + "swapgs" + :: "a" (base) ); +#endif +} +else +wrmsrl(MSR_SHADOW_GS_BASE, base); +} + DECLARE_PER_CPU(uint64_t, efer); static inline uint64_t read_efer(void) { ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH] sched/rt: Fix build after c/s c3715dd8fb766
Travis reports: sched_rt.c:241:30: error: unused function 'rt_dom' [-Werror,-Wunused-function] static inline struct rt_dom *rt_dom(const struct domain *dom) ^ 1 error generated. when compiling with Clang. Drop the function. Signed-off-by: Andrew Cooper--- CC: George Dunlap CC: Dario Faggioli CC: Meng Xu --- xen/common/sched_rt.c | 5 - 1 file changed, 5 deletions(-) diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c index 1c72ea8..ac79f15 100644 --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -238,11 +238,6 @@ static inline struct rt_vcpu *rt_vcpu(const struct vcpu *vcpu) return vcpu->sched_priv; } -static inline struct rt_dom *rt_dom(const struct domain *dom) -{ -return dom->sched_priv; -} - static inline struct list_head *rt_runq(const struct scheduler *ops) { return _priv(ops)->runq; -- 2.1.4 ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 0/7] unsafe big.LITTLE support
Hello, On 08/03/18 06:15, Peng Fan wrote: Hi Stefano, On Fri, Mar 02, 2018 at 11:05:54AM -0800, Stefano Stabellini wrote: Hi all, This series changes the initialization of two virtual registers to make sure they match the value of the underlying physical cpu. It also disables cpus different from the boot cpu, unless a newly introduced command line option is specified. In that case, it explains how to setup the system to avoid corruptions, which involves manually specifying the cpu affinity of all domains, because the scheduler still lacks big.LITTLE support. In the uncommon case of a system where the cacheline sizes are different across cores, it disables all cores that have a different dcache line size from the boot cpu. In fact, it is not sufficient to use the dcache line size of the current cpu, it would be necessary to use the minimum across all dcache line sizes of all cores. Given that it is actually uncommon even in big.LITTLE systems, just disable cpus for now. The first patch in the series is a fix for the way we read the dcache line size. I am trying the patchset, but I meet issue that Guest Big/Little with vcpu not working properly. As my current hardware has an issue which has fix in Kernel, https://source.codeaurora.org/external/imx/linux-imx/commit/?h=imx_4.9.51_imx8_beta2=917cc3a8db2f3609ef8e2f59e7bcd31aa2cd4e59 Can you describe what you mean by not working properly? Also what is your setup? Did you pin the different vCPUs as requested by the documentation. I am not sure whether this issue cause DomU big/Little not work. Well, I would recommend to speak with NXP whether this errata affects TLB flush for Hypervisor Page-Table or Stage-2 Page-Table. So wonder has this patchset been tested on Big/Little Hardware? This series only adds facility to report the correct MIDR to the guest. If your platform requires more, then it would be necessary send a patch for Xen. Cheers, -- Julien Grall ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [PATCH v2] xen/acpi: upload _PSD info for non Dom0 CPUs too
All uploaded PM data from non-dom0 CPUs takes the info from vCPU 0 and changing only the acpi_id. For processors which P-state coordination type is HW_ALL (0xFD) it is OK to upload bogus P-state dependency information (_PSD), because Xen will ignore any cpufreq domains created for past CPUs. Albeit for platforms which expose coordination types as SW_ANY or SW_ALL, this will have some unintended side effects. Effectively, it will look at the P-state domain existence and *if it already exists* it will skip the acpi-cpufreq initialization and thus inherit the policy from the first CPU in the cpufreq domain. This will finally lead to the original cpu not changing target freq to P0 other than the first in the domain. Which will make turbo boost not getting enabled (e.g. for 'performance' governor) for all cpus. This patch fixes that, by also evaluating _PSD when enumerate all ACPI procesors and always uploading the correct info to Xen. We export acpi_processor_get_psd() to help with this purpose, but change signature to not assume the existence of a struct acpi_processor given that ACPI doesn't create an acpi_processor for non-dom0 CPUs. Signed-off-by: Joao Martins--- Changes since v1: (Comments from Boris) * Refactor acpi_processor_get_psd(), export and use that instead. * s/offline/non-dom0/ --- drivers/acpi/processor_perflib.c | 11 +-- drivers/xen/xen-acpi-processor.c | 37 + include/acpi/processor.h | 2 ++ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index c7cf48ad5cb9..a651ab3490d8 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module) EXPORT_SYMBOL(acpi_processor_notify_smm); -static int acpi_processor_get_psd(struct acpi_processor*pr) +int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain) { int result = 0; acpi_status status = AE_OK; @@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) struct acpi_buffer format = {sizeof("N"), "N"}; struct acpi_buffer state = {0, NULL}; union acpi_object *psd = NULL; - struct acpi_psd_package *pdomain; - status = acpi_evaluate_object(pr->handle, "_PSD", NULL, ); + status = acpi_evaluate_object(handle, "_PSD", NULL, ); if (ACPI_FAILURE(status)) { return -ENODEV; } @@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) goto end; } - pdomain = &(pr->performance->domain_info); - state.length = sizeof(struct acpi_psd_package); state.pointer = pdomain; @@ -597,6 +594,7 @@ static int acpi_processor_get_psd(struct acpi_processor *pr) kfree(buffer.pointer); return result; } +EXPORT_SYMBOL(acpi_processor_get_psd); int acpi_processor_preregister_performance( struct acpi_processor_performance __percpu *performance) @@ -645,7 +643,8 @@ int acpi_processor_preregister_performance( pr->performance = per_cpu_ptr(performance, i); cpumask_set_cpu(i, pr->performance->shared_cpu_map); - if (acpi_processor_get_psd(pr)) { + pdomain = &(pr->performance->domain_info); + if (acpi_processor_get_psd(pr->handle, pdomain)) { retval = -EINVAL; continue; } diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 23e391d3ec01..e4f49d23a593 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -53,6 +53,10 @@ static unsigned long *acpi_ids_done; static unsigned long *acpi_id_present; /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ static unsigned long *acpi_id_cst_present; +/* And if there is an _PSD definition for the ACPI IDs */ +static unsigned long *acpi_id_psd_present; +/* Which ACPI P-State dependencies for a enumerated processor */ +static struct acpi_psd_package *acpi_psd; static int push_cxx_to_hypervisor(struct acpi_processor *_pr) { @@ -372,6 +376,15 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk); + /* It has P-state dependencies */ + if (!acpi_processor_get_psd(handle, _psd[acpi_id])) { + __set_bit(acpi_id, acpi_id_psd_present); + + pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n", +acpi_id, acpi_psd[acpi_id].coord_type, +acpi_psd[acpi_id].domain); + } + status = acpi_evaluate_object(handle, "_CST", NULL, ); if (ACPI_FAILURE(status)) { if (!pblk)
Re: [Xen-devel] [PATCH v2 3/6] xen/x86: support per-domain flag for xpti
>>> On 02.03.18 at 09:14,wrote: > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -510,15 +510,19 @@ void make_cr3(struct vcpu *v, mfn_t mfn) > > void write_ptbase(struct vcpu *v) > { > -if ( this_cpu(root_pgt) && is_pv_vcpu(v) && !is_pv_32bit_vcpu(v) ) > +if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti ) > { > get_cpu_info()->root_pgt_changed = true; > +get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt)); > asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); > } > else > { > get_cpu_info()->root_pgt_changed = false; > +/* Make sure to clear xen_cr3 before pv_cr3; write_cr3() serializes. > */ > +get_cpu_info()->xen_cr3 = 0; > write_cr3(v->arch.cr3); > +get_cpu_info()->pv_cr3 = 0; > } > } I think you want to latch the return value of get_cpu_info() into a local variable now. > @@ -707,6 +708,9 @@ int __init dom0_construct_pv(struct domain *d, > cpu = p->processor; > } > > +if ( !is_pv_32bit_domain(d) ) > +xpti_domain_init(d); Perhaps better to omit the conditional here? Or otherwise use the "compat32" local variable? > +static int parse_xpti(const char *s) __init > +{ > +int rc = 0; > + > +switch ( parse_bool(s, NULL) ) > +{ > +case 0: > +opt_xpti = XPTI_OFF; > +break; > +case 1: > +opt_xpti = XPTI_ON; > +break; > +default: > +if ( !strcmp(s, "default") ) This wants to also be mentioned in the command line doc. > +opt_xpti = XPTI_DEFAULT; > +else if ( !strcmp(s, "nodom0") ) > +opt_xpti = XPTI_NODOM0; > +else > +rc = -EINVAL; > +break; > +} > + > +return rc; > +} > + > +custom_param("xpti", parse_xpti); Please omit the blank line above here. > +void xpti_init(void) __init > +void xpti_domain_init(struct domain *d) > +{ > +if ( !is_pv_domain(d) || is_pv_32bit_domain(d) ) > +return; > + > +switch ( opt_xpti ) > +{ > +case XPTI_OFF: > +d->arch.pv_domain.xpti = false; > +break; > +case XPTI_ON: > +d->arch.pv_domain.xpti = true; > +break; > +case XPTI_NODOM0: > +d->arch.pv_domain.xpti = d->domain_id != 0 && > + d->domain_id != hardware_domid; > +break; > +default: > +ASSERT_UNREACHABLE(); > +break; > +} > + > +if ( d->arch.pv_domain.xpti ) > +printk("Enabling Xen Pagetable protection (XPTI) for Domain %d\n", > + d->domain_id); Please don't, even less so without XENLOG_G_*. And if you really, really want this at, say, XENLOG_G_DEBUG, then Dom%d please. > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -257,6 +257,9 @@ struct pv_domain > struct mapcache_domain mapcache; > > struct cpuidmasks *cpuidmasks; > + > +/* XPTI active? */ > +bool xpti; > }; Is there really no 1 byte slot available elsewhere in the structure? Like between nr_l4_pages and mapcache? Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
[Xen-devel] [xen-unstable-smoke test] 120342: regressions - FAIL
flight 120342 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/120342/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-arm64-xsm 6 xen-buildfail REGR. vs. 120304 build-armhf 6 xen-buildfail REGR. vs. 120304 Tests which did not succeed, but are not blocking: test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-armhf-armhf-xl 1 build-check(1) blocked n/a test-amd64-amd64-libvirt 13 migrate-support-checkfail never pass version targeted for testing: xen c9bd8a73656d7435b1055ee8825823aee995993e baseline version: xen 8b022d0005d5b941cd078f640cae04711f5536c1 Last test of basis 120304 2018-03-07 13:01:17 Z0 days Testing same since 120313 2018-03-07 17:08:30 Z0 days6 attempts People who touched revisions under test: Andrew CooperJuergen Gross Meng Xu Wei Liu jobs: build-arm64-xsm fail build-amd64 pass build-armhf fail build-amd64-libvirt pass test-armhf-armhf-xl blocked test-arm64-arm64-xl-xsm blocked test-amd64-amd64-xl-qemuu-debianhvm-i386 pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Not pushing. commit c9bd8a73656d7435b1055ee8825823aee995993e Author: Juergen Gross Date: Wed Mar 7 12:03:18 2018 +0100 tools/xenstore: add libdl dependency to libxenstore Commit 448c03b3cbe14873ee63 ("tools/xenstore: try to get minimum thread stack size for watch thread") added a dependency to libdl to libxenstore. Add the needed flags to LDLIBS_libxenstore and the pkg-config file of libxenstore. Fixes: 448c03b3cbe14873ee63 Signed-off-by: Juergen Gross Acked-by: Wei Liu commit c77ec3478ac0e117c814789bdd4caab60b4732c5 Author: Andrew Cooper Date: Wed Feb 28 11:43:25 2018 + xen/domain: Call sched_destroy_domain() in the domain_create() error path If domain_create() fails, complete_domain_destroy() doesn't get called, meaning that sched_destroy_domain() is missed. In practice, this can only fail because of exceptional late_hwdom_init() issues at the moment. Make sched_destroy_domain() idempotent, and call it in the fail path. Signed-off-by: Andrew Cooper Reviewed-by: George Dunlap Reviewed-by: Dario Faggioli commit c3715dd8fb766eb6f90d9cd81ef892f917845238 Author: Andrew Cooper Date: Tue Feb 27 16:48:19 2018 + xen/sched: Remove {init,destroy}_domain() interfaces These hooks have one single caller (sched_{init,destroy}_domain() respectively) and are all identical (when implemented). Previous changes have ensured that only real domains reach these functions, so ASSERT() that system domains are not seen. Call sched_{alloc,free}_domdata() directly, and handle d->sched_priv directly. The net diffstat is: add/remove: 0/8 grow/shrink: 1/7 up/down: 7/-335 (-328) function old new delta sched_destroy_domain 130 137 +7 sched_init_domain138 137 -1 rt_dom_destroy 6 - -6 null_dom_destroy 6 - -6 csched_dom_destroy 9 - -9 csched2_dom_destroy9 - -9 sched_rtds_def 264 248 -16 sched_null_def 264 248 -16
Re: [Xen-devel] [PATCH] common/sched: Fix ARM build following c/s 340edc3902
On 03/07/2018 07:41 PM, Andrew Cooper wrote: > The OSSTest smoke tests reports: > > sched_credit2.c: In function 'csched2_alloc_domdata': > sched_credit2.c:3015:9: error: implicit declaration of function 'ERR_PTR' > [-Werror=implicit-function-declaration] >return ERR_PTR(-ENOMEM); >^ > sched_credit2.c:3015:9: error: nested extern declaration of 'ERR_PTR' > [-Werror=nested-externs] > > As the ERR infrastructure is part of the main scheduler interface now, > include it from xen/sched-if.h > > Signed-off-by: Andrew CooperAcked-by: George Dunlap ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH] common/sched: Fix ARM build following c/s 340edc3902
On Wed, 2018-03-07 at 12:04 -0800, Stefano Stabellini wrote: > On Wed, 7 Mar 2018, Andrew Cooper wrote: > > The OSSTest smoke tests reports: > > > > sched_credit2.c: In function 'csched2_alloc_domdata': > > sched_credit2.c:3015:9: error: implicit declaration of function > 'ERR_PTR' [-Werror=implicit-function-declaration] > >return ERR_PTR(-ENOMEM); > >^ > > sched_credit2.c:3015:9: error: nested extern declaration of > 'ERR_PTR' [-Werror=nested-externs] > > > > As the ERR infrastructure is part of the main scheduler interface > now, include it from xen/sched-if.h > > > > Signed-off-by: Andrew Cooper> > Reviewed-by: Stefano Stabellini > Reviewed-by: Dario Faggioli Regards, Dario -- <> (Raistlin Majere) - Dario Faggioli, Ph.D, http://about.me/dario.faggioli Software Engineer @ SUSE https://www.suse.com/ signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v4 08/20] x86emul: abstract out XCRn accesses
>>> On 05.03.18 at 16:56,wrote: > On 28/02/18 13:03, Jan Beulich wrote: >> @@ -5178,18 +5202,33 @@ x86_emulate( >> _regs.eflags |= X86_EFLAGS_AC; >> break; >> >> -#ifdef __XEN__ >> -case 0xd1: /* xsetbv */ >> +case 0xd0: /* xgetbv */ >> generate_exception_if(vex.pfx, EXC_UD); >> -if ( !ops->read_cr || ops->read_cr(4, , ctxt) != >> X86EMUL_OKAY ) >> +if ( !ops->read_cr || !ops->read_xcr || >> + ops->read_cr(4, , ctxt) != X86EMUL_OKAY ) >> cr4 = 0; >> generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD); >> -generate_exception_if(!mode_ring0() || >> - handle_xsetbv(_regs.ecx, >> -_regs.eax | (_regs.rdx << >> 32)), >> +generate_exception_if(_regs.ecx > (vcpu_has_xgetbv1() ? 1 : 0), >>EXC_GP, 0); > > I'm still opposed to this change. It is inconsistent with all other > handling in the emulator, because we do not do input register validation > for any of the CR/DR/MSR hooks. > > The {read,write}_xcr() hooks should be required to deal with any > arbitrary register, just like the {read,write}_{cr,dr,msr}() hooks are > currently expected to do. And I continue to not follow you here: None of the %crN's existence is controlled by any CPUID flags, hence a check like the above one would not be possible there. If anything, I could see the core emulator filtering out %cr1, %cr5-%cr7, and from %cr9 upwards (and similarly for %drN, at which point we could also centralize the [non-]aliasing of %dr4/%dr5 onto %dr6/%dr7). The fundamental idea behind the check above (and such a possible %crN/%drN related change) being to keep in a single place all checks which are mandated by the architecture. MSRs (already by their spelled out name, even if that has become a misnomer quite quickly after their introduction) are a bit different, and hence I'm less convinced core RDMSR/WRMSR emulation should go through such hoops when the callbacks will have to normally have a big switch() statement anyway. But yes, if the only way forward here is to move the checks into the individual callbacks, I'll have no choice. Please clarify. Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] tools/libacpi printf output to logging instead of console/stdout ?
>>> On 07.03.18 at 21:52,wrote: > When starting a guest with the 'xl create' command (non-verbose) i get > this extra output on PVH guest types only: > > S3 disabled > S4 disabled > CONV disabled > > > It seems libacpi/* only contains normal printf's, so for the other guest > types i probably just never triggered one of them. > > Shouldn't these printf's go to logging instead of console/stdout ? I think it's the responsibility of the executable linking to that library to suitably set up / redirect stdout. There not being anything like "stdlog", I'm also not sure where you would think libacpi should send them (if it was to control this itself) - surely not stderr. Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [Xen-devel] [PATCH v2 6/6] xen/domain: Added debug safety in the domain_create() failure path
>>> On 07.03.18 at 20:12,wrote: > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -399,6 +399,9 @@ struct domain *domain_create(domid_t domid, unsigned int > domcr_flags, > return d; > > fail: > +ASSERT(err < 0); /* Sanity check paths leading here. */ > +err = err ?: -EINVAL; /* Release build safety. */ Fundamentally I'm fine with this, but could you use a much less frequently used (and hence easier to identify) error code here? Like EILSEQ, ECHILD, or ENOTEMPTY (and I'm open to the use of other obscure ones)? We don't know what the cause of the error was, so what exact error code to report is pretty meaningless to the caller anyway. With that Reviewed-by: Jan Beulich Jan ___ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel