[Xen-devel] [PATCH] x86: xen: mmu: Remove unused function
Remove the function set_pte_mfn() that is not used anywhere. This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist rickard_strandqv...@spectrumdigital.se --- arch/x86/xen/mmu.c |9 - arch/x86/xen/mmu.h |2 -- 2 files changed, 11 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a8a1a3d..6959550 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -281,15 +281,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val) xen_set_pmd_hyper(ptr, val); } -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) -{ - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); -} - static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb..f2dcf79 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -13,8 +13,6 @@ enum pt_level { bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); -void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] evtchn: simplify sending of notifications
The trivial wrapper evtchn_set_pending() is pretty pointless, as it only serves to invoke another wrapper evtchn_port_set_pending(). In turn, the latter is kind of inconsistent with its siblings in that is takes a struct vcpu * rather than a struct domain * - adjusting this allows for more efficient code in the majority of cases. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -95,8 +95,6 @@ static uint8_t get_xen_consumer(xen_even /* Get the notification function for a given Xen-bound event channel. */ #define xen_notification_fn(e) (xen_consumers[(e)-xen_consumer-1]) -static void evtchn_set_pending(struct vcpu *v, int port); - static int virq_is_global(uint32_t virq) { int rc; @@ -287,7 +285,7 @@ static long evtchn_bind_interdomain(evtc * We may have lost notifications on the remote unbound port. Fix that up * here by conservatively always setting a notification on the local port. */ -evtchn_set_pending(ld-vcpu[lchn-notify_vcpu_id], lport); +evtchn_port_set_pending(ld, lchn-notify_vcpu_id, lchn); bind-local_port = lport; @@ -599,11 +597,10 @@ static long evtchn_close(evtchn_close_t return __evtchn_close(current-domain, close-port); } -int evtchn_send(struct domain *d, unsigned int lport) +int evtchn_send(struct domain *ld, unsigned int lport) { struct evtchn *lchn, *rchn; -struct domain *ld = d, *rd; -struct vcpu *rvcpu; +struct domain *rd; intrport, ret = 0; spin_lock(ld-event_lock); @@ -633,14 +630,13 @@ int evtchn_send(struct domain *d, unsign rd= lchn-u.interdomain.remote_dom; rport = lchn-u.interdomain.remote_port; rchn = evtchn_from_port(rd, rport); -rvcpu = rd-vcpu[rchn-notify_vcpu_id]; if ( consumer_is_xen(rchn) ) -(*xen_notification_fn(rchn))(rvcpu, rport); +xen_notification_fn(rchn)(rd-vcpu[rchn-notify_vcpu_id], rport); else -evtchn_set_pending(rvcpu, rport); +evtchn_port_set_pending(rd, rchn-notify_vcpu_id, rchn); break; case ECS_IPI: -evtchn_set_pending(ld-vcpu[lchn-notify_vcpu_id], lport); +evtchn_port_set_pending(ld, lchn-notify_vcpu_id, lchn); break; case ECS_UNBOUND: /* silently drop the notification */ @@ -655,11 +651,6 @@ out: return ret; } -static void evtchn_set_pending(struct vcpu *v, int port) -{ -evtchn_port_set_pending(v, evtchn_from_port(v-domain, port)); -} - int guest_enabled_event(struct vcpu *v, uint32_t virq) { return ((v != NULL) (v-virq_to_evtchn[virq] != 0)); @@ -669,6 +660,7 @@ void send_guest_vcpu_virq(struct vcpu *v { unsigned long flags; int port; +struct domain *d; ASSERT(!virq_is_global(virq)); @@ -678,7 +670,8 @@ void send_guest_vcpu_virq(struct vcpu *v if ( unlikely(port == 0) ) goto out; -evtchn_set_pending(v, port); +d = v-domain; +evtchn_port_set_pending(d, v-vcpu_id, evtchn_from_port(d, port)); out: spin_unlock_irqrestore(v-virq_lock, flags); @@ -707,7 +700,7 @@ static void send_guest_global_virq(struc goto out; chn = evtchn_from_port(d, port); -evtchn_set_pending(d-vcpu[chn-notify_vcpu_id], port); +evtchn_port_set_pending(d, chn-notify_vcpu_id, chn); out: spin_unlock_irqrestore(v-virq_lock, flags); @@ -731,7 +724,7 @@ void send_guest_pirq(struct domain *d, c } chn = evtchn_from_port(d, port); -evtchn_set_pending(d-vcpu[chn-notify_vcpu_id], port); +evtchn_port_set_pending(d, chn-notify_vcpu_id, chn); } static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly; @@ -1202,7 +1195,6 @@ void notify_via_xen_event_channel(struct { struct evtchn *lchn, *rchn; struct domain *rd; -intrport; spin_lock(ld-event_lock); @@ -1219,9 +1211,8 @@ void notify_via_xen_event_channel(struct if ( likely(lchn-state == ECS_INTERDOMAIN) ) { rd= lchn-u.interdomain.remote_dom; -rport = lchn-u.interdomain.remote_port; -rchn = evtchn_from_port(rd, rport); -evtchn_set_pending(rd-vcpu[rchn-notify_vcpu_id], rport); +rchn = evtchn_from_port(rd, lchn-u.interdomain.remote_port); +evtchn_port_set_pending(rd, rchn-notify_vcpu_id, rchn); } spin_unlock(ld-event_lock); --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru d-evtchn_port_ops-init(d, evtchn); } -static inline void evtchn_port_set_pending(struct vcpu *v, +static inline void evtchn_port_set_pending(struct domain *d, + unsigned int vcpu_id, struct evtchn *evtchn) { -v-domain-evtchn_port_ops-set_pending(v, evtchn); +d-evtchn_port_ops-set_pending(d-vcpu[vcpu_id], evtchn); } static inline
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 10:41, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 5:33 PM On 12.01.15 at 09:46, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:35 PM On 09.01.15 at 11:10, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] The question isn't about migrating with devices assigned, but about assigning devices after migration (consider a dual vif + SR-IOV NIC guest setup where the SR-IOV NIC gets hot-removed before migration and a new one hot-plugged afterwards). Furthermore any tying of the guest memory layout to the host's where the guest first boots is awkward, as post-migration there's not going to be any reliable correlation between the guest layout and the new host's. how can you solve this? like above example, a NIC on node-A leaves a reserved region in guest e820. now it's hot-removed and then migrated to node-b. there's no way to update e820 again since it's only boot structure. then user will still see such awkward regions. since it's not avoidable, report-all in the summary mail looks not causing a new problem. The solution to this are reserved regions specified in the guest config, independent of host characteristics. I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. I don't think anyone ever indicated that such erasure would be needed/wanted - I'm not sure how you ended up there. I ended here to indicate that report-all which gives user more reserved regions than necessary is not a weird case since above scenario can also create such fact. User shouldn't set expectation about reserved region layout. and this argument is necessary to support our proposal of using report-all. :-) The fact that ranges can't be removed from a guest's memory map is irrelevant - there's simply no question that this is that way. The main counter argument against report-all remains: It may result in unnecessarily little low memory in guests not needing all of the host regions to be reserved for them. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] arm64/EFI: minor corrections
- don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/arm/efi/efi-boot.h +++ b/xen/arch/arm/efi/efi-boot.h @@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt, static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table) { -const EFI_GUID fdt_guid = DEVICE_TREE_GUID; +static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID; EFI_CONFIGURATION_TABLE *tables; void *fdt = NULL; int i; @@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem || desc_ptr-Type == EfiBootServicesCode || desc_ptr-Type == EfiBootServicesData ) { -bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart; -bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * EFI_PAGE_SIZE; -if ( ++i = NR_MEM_BANKS ) +if ( i = NR_MEM_BANKS ) { -PrintStr(LWarning: All ); -DisplayUint(NR_MEM_BANKS, -1); -PrintStr(L bootinfo mem banks exhausted.\r\n); +PrintStr(LWarning: All __stringify(NR_MEM_BANKS) + bootinfo mem banks exhausted.\r\n); break; } +bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart; +bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * EFI_PAGE_SIZE; +++i; } desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size); } @@ -334,7 +334,7 @@ static void __init efi_arch_process_memo status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size, desc_ver); if ( EFI_ERROR(status) ) -PrintErrMesg(LUpdating FDT failed\r\n, status); +PrintErrMesg(LUpdating FDT failed, status); } static void __init efi_arch_pre_exit_boot(void) @@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli status = efi_bs-AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void **)buf); if ( EFI_ERROR(status) ) -PrintErrMesg(LUnable to allocate string buffer\r\n, status); +PrintErrMesg(LUnable to allocate string buffer, status); if ( image_name ) { arm64/EFI: minor corrections - don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/arm/efi/efi-boot.h +++ b/xen/arch/arm/efi/efi-boot.h @@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt, static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table) { -const EFI_GUID fdt_guid = DEVICE_TREE_GUID; +static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID; EFI_CONFIGURATION_TABLE *tables; void *fdt = NULL; int i; @@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem || desc_ptr-Type == EfiBootServicesCode || desc_ptr-Type == EfiBootServicesData ) { -bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart; -bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * EFI_PAGE_SIZE; -if ( ++i = NR_MEM_BANKS ) +if ( i = NR_MEM_BANKS ) { -PrintStr(LWarning: All ); -DisplayUint(NR_MEM_BANKS, -1); -PrintStr(L bootinfo mem banks exhausted.\r\n); +PrintStr(LWarning: All __stringify(NR_MEM_BANKS) + bootinfo mem banks exhausted.\r\n); break; } +bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart; +bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * EFI_PAGE_SIZE; +++i; } desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size); } @@ -334,7 +334,7 @@ static void __init efi_arch_process_memo status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size, desc_ver); if ( EFI_ERROR(status) ) -PrintErrMesg(LUpdating FDT failed\r\n, status); +PrintErrMesg(LUpdating FDT failed, status); } static void __init efi_arch_pre_exit_boot(void) @@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli status = efi_bs-AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void **)buf); if ( EFI_ERROR(status) ) -PrintErrMesg(LUnable to allocate string buffer\r\n, status); +PrintErrMesg(LUnable to allocate string buffer, status); if ( image_name ) {
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 09:46, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:35 PM On 09.01.15 at 11:10, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] The question isn't about migrating with devices assigned, but about assigning devices after migration (consider a dual vif + SR-IOV NIC guest setup where the SR-IOV NIC gets hot-removed before migration and a new one hot-plugged afterwards). Furthermore any tying of the guest memory layout to the host's where the guest first boots is awkward, as post-migration there's not going to be any reliable correlation between the guest layout and the new host's. how can you solve this? like above example, a NIC on node-A leaves a reserved region in guest e820. now it's hot-removed and then migrated to node-b. there's no way to update e820 again since it's only boot structure. then user will still see such awkward regions. since it's not avoidable, report-all in the summary mail looks not causing a new problem. The solution to this are reserved regions specified in the guest config, independent of host characteristics. I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. I don't think anyone ever indicated that such erasure would be needed/wanted - I'm not sure how you ended up there. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation
While for us it's not as bad as it was for Linux, their commit 13e457e0ee (KVM: x86: Emulator does not decode clflush well, by Nadav Amit na...@cs.technion.ac.il) nevertheless points out two shortcomings in our code: opcode 0F AE /7 is clflush only when it uses a memory mode (otherwise it's SFENCE) and when there's no REP prefix (an operand size prefix is fine, as that's CLFLUSHOPT). Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4400,7 +4400,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg 7 ) { -case 7: /* clflush */ +case 7: /* clflush{,opt} */ +fail_if(modrm_mod == 3); +fail_if(rep_prefix()); fail_if(ops-wbinvd == NULL); if ( (rc = ops-wbinvd(ctxt)) != 0 ) goto done; x86emul: tighten CLFLUSH emulation While for us it's not as bad as it was for Linux, their commit 13e457e0ee (KVM: x86: Emulator does not decode clflush well, by Nadav Amit na...@cs.technion.ac.il) nevertheless points out two shortcomings in our code: opcode 0F AE /7 is clflush only when it uses a memory mode (otherwise it's SFENCE) and when there's no REP prefix (an operand size prefix is fine, as that's CLFLUSHOPT). Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4400,7 +4400,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg 7 ) { -case 7: /* clflush */ +case 7: /* clflush{,opt} */ +fail_if(modrm_mod == 3); +fail_if(rep_prefix()); fail_if(ops-wbinvd == NULL); if ( (rc = ops-wbinvd(ctxt)) != 0 ) goto done; ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold
We've had reports of systems where CMCIs would surface at a relatively high rate during certain periods of time, without them apparently causing subsequent more severe problems (see Xeon E7-8800/4800/2800 specification clarification SC1). Give the admin a knob to lower the impact on the system logs. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. If set, override Xen's default choice for the platform timer. +### cmci-threshold + `= integer` + + Default: `2` + +Specify the event count threshold for raising Corrected Machine Check +Interrupts. Specifying zero disables CMCI handling. + ### cmos-rtc-probe `= boolean` --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; +unsigned int threshold, max_threshold; +static unsigned int cmci_threshold = 2; +integer_param(cmci-threshold, cmci_threshold); rdmsrl(msr, val); /* Some other CPU already owns this bank. */ @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) goto out; } -val = ~CMCI_THRESHOLD_MASK; -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); -rdmsrl(msr, val); +if ( cmci_threshold ) +{ +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); +rdmsrl(msr, val); +} if (!(val CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ mcabanks_set(i, __get_cpu_var(no_cmci_banks)); +wrmsrl(msr, val ~CMCI_THRESHOLD_MASK); return 0; } +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); +threshold = cmci_threshold; +if ( threshold max_threshold ) +{ + mce_printk(MCE_QUIET, + CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n, + threshold, smp_processor_id(), i, max_threshold); + threshold = max_threshold; +} +wrmsrl(msr, (val ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h @@ -86,9 +86,6 @@ /* Bitfield of MSR_K8_HWCR register */ #define K8_HWCR_MCi_STATUS_WREN(1ULL 18) -/*Intel Specific bitfield*/ -#define CMCI_THRESHOLD 0x2 - #define MCi_MISC_ADDRMOD_MASK (0x7UL 6) #define MCi_MISC_PHYSMOD(0x2UL 6) x86/MCE: allow overriding the CMCI threshold We've had reports of systems where CMCIs would surface at a relatively high rate during certain periods of time, without them apparently causing subsequent more severe problems (see Xeon E7-8800/4800/2800 specification clarification SC1). Give the admin a knob to lower the impact on the system logs. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. If set, override Xen's default choice for the platform timer. +### cmci-threshold + `= integer` + + Default: `2` + +Specify the event count threshold for raising Corrected Machine Check +Interrupts. Specifying zero disables CMCI handling. + ### cmos-rtc-probe `= boolean` --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; +unsigned int threshold, max_threshold; +static unsigned int cmci_threshold = 2; +integer_param(cmci-threshold, cmci_threshold); rdmsrl(msr, val); /* Some other CPU already owns this bank. */ @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) goto out; } -val = ~CMCI_THRESHOLD_MASK; -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); -rdmsrl(msr, val); +if ( cmci_threshold ) +{ +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); +rdmsrl(msr, val); +} if (!(val CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ mcabanks_set(i, __get_cpu_var(no_cmci_banks)); +wrmsrl(msr, val ~CMCI_THRESHOLD_MASK); return 0; } +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); +threshold = cmci_threshold; +if ( threshold max_threshold ) +{ + mce_printk(MCE_QUIET, + CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n, + threshold, smp_processor_id(), i, max_threshold); + threshold = max_threshold; +} +wrmsrl(msr, (val ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); ---
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 09/01/2015 18:38, Dario Faggioli ha scritto: On Fri, 2015-01-09 at 14:42 +, Wei Liu wrote: On Thu, Jan 08, 2015 at 03:33:17PM +0100, Fabio Fantoni wrote: Sorry for the probably stupid question, what are the pros and cons of default use of phy instead qdisk for raw files as domU disk? There's no stupid question. :-) I was told that it performs better and enables other potential improvements. Not a big deal, probably, but IMO it would be nice to have at least a few words about when it's happening in the changelog, wouldn't it? I'm also interested in detailed changelog about. In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Thanks for any reply and sorry for my bad english. Regards, Dario ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] common/memory: fix an XSM error path
XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap return the extent at which failure was detected, not error indicators. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; -rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d); -if ( rc ) +if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) ) { rcu_unlock_domain(d); -return rc; +return start_extent; } switch ( op ) common/memory: fix an XSM error path XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap return the extent at which failure was detected, not error indicators. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; -rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d); -if ( rc ) +if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) ) { rcu_unlock_domain(d); -return rc; +return start_extent; } switch ( op ) ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 5:33 PM On 12.01.15 at 09:46, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:35 PM On 09.01.15 at 11:10, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] The question isn't about migrating with devices assigned, but about assigning devices after migration (consider a dual vif + SR-IOV NIC guest setup where the SR-IOV NIC gets hot-removed before migration and a new one hot-plugged afterwards). Furthermore any tying of the guest memory layout to the host's where the guest first boots is awkward, as post-migration there's not going to be any reliable correlation between the guest layout and the new host's. how can you solve this? like above example, a NIC on node-A leaves a reserved region in guest e820. now it's hot-removed and then migrated to node-b. there's no way to update e820 again since it's only boot structure. then user will still see such awkward regions. since it's not avoidable, report-all in the summary mail looks not causing a new problem. The solution to this are reserved regions specified in the guest config, independent of host characteristics. I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. I don't think anyone ever indicated that such erasure would be needed/wanted - I'm not sure how you ended up there. I ended here to indicate that report-all which gives user more reserved regions than necessary is not a weird case since above scenario can also create such fact. User shouldn't set expectation about reserved region layout. and this argument is necessary to support our proposal of using report-all. :-) Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:46 PM On 09.01.15 at 11:26, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] On 09.01.15 at 07:57, kevin.t...@intel.com wrote: 1.1) per-device 'warn' vs. global 'warn' Both Tim/Jan prefer to 'warn' as a per-device option to the admin instead of a global option. In a glimpse a per-device 'warn' option provides more fine-grained control than a global option, however if thinking it carefully allowing one device w/ potential problem isn't more correct or secure than allowing multiple devices w/ potential problem. Even in practice a device like USB can work bearing 1MB confliction, like Jan pointed out there's always corner cases which we might not know so as long as we open door for one device, it implies a problematic environment to users and user's judge on whether he can live up to this problem is not impacted by how many devices the door is opened for (he anyway needs to study warning message and do verification if choosing to live up) Regarding to that, imo if we agree to provide 'warn' option, just providing a global overriding option (definitely per-vm) is acceptable and simpler. If the admin determined that ignoring the RMRR requirements for one devices is safe, that doesn't (and shouldn't) mean this is the case for all other devices too. I don't think admin can determine whether it's 100% safe. What admin can decide is whether he lives up to the potential problem based on his purpose or based on some experiments. only device vendor knows when and how RMRR is used. So as long as warn is opened for one device, I think it already means a problem environment and then adding more device is just same situation. What if the admin consulted the device and BIOS vendors, and got assured there's not going to be any accesses to the reserved regions post-boot? consultancy could be still inaccurate, or man-error may happen. 1.2) when to 'fail' There is one open whether we should fail immediately in domain builder if a confliction is detected. Jan's comment is yes, we should 'fail' the VM creation as it's an error. My previous point is more mimicking native behavior, where a device failure (in our case it's actually potential device failure since VM is not powered yet) doesn't impact user until its function is actually touched. In our case, even domain builder fails to re-arrange guest RAM to skip reserved regions, we have centralized policy (either 'fail' or 'warn' per above conclusion) in Xen hypervisor when the device is actually assigned. so a 'warn' should be fine, but my insist on this is not strong. See my earlier reply: Failure to add a device to me is more like a device preventing a bare metal system from coming up altogether. not all devices are required for bare metal to boot. it causes problem only when it's being used in the boot process. say at powering up the disk (insert in the PCI slot) is broken (not sure whether you call such thing as 'failure to add a device'), it is only error when BIOS tries to read disk. Not necessarily. Any malfunctioning device touched by the BIOS, irrespective of whether the device is needed for booting, can cause the boot process to hang. Again, the analogy to bare metal is device presence, not whether the device is functioning properly. note device assignment path is the actual path to decide whether a device will be present to the guest. not at this domain build time. That would only make a marginal difference in time of when domain creation fails. it's not marginal difference. instead it's about who owns the policy. to me, detect/avoid conflictions in domain builder is just a preparation for later device assignment (either deterministic static assignment or non-deterministic hotplug). As a preparation, we don't need to make a failure here as a blocker to prevent guest boot. Instead, leave the decision to where device assignment actually happens then hard requirement is made on any conflictions a.t.m. Then we just follow the existing policy of device assignment (either block guest boot, or move forward w/o presenting the device), if confliction is treat as a failure by default (w/o 'warn' override) and another point is about hotplug. 'fail' for future devices is too strict, but to differentiate that from static-assigned devices, domain builder will then need maintain a per-device reserved region structure. just 'warn' makes things simple. Whereas here I agree - hotplug should just fail (without otherwise impacting the guest). so 'should' - 'shoundn't'? No. Perhaps what you imply from fail is different from my reading: I mean this to be the result of the hotplug operation - the device would just not appear in the guest. The guest isn't to be
Re: [Xen-devel] preparing for 4.4.2 / 4.3.4
On Thu, Jan 08, Jan Beulich wrote: now that 4.5 is almost out the door, I'd like to get stable releases prepared on the other two active branches. 4.3.4 is expected to be the last xen.org managed release on the 4.3 branch. Aiming at RC1s some time next week, please indicate commits you consider relevant to backport but find missing from the respective branches. e86539a388314cd3dca88f5e69d7873343197cd8 (libxc: check return values on mmap() and madvise() on xc_alloc_hypercall_buffer()) Olaf ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:35 PM On 09.01.15 at 11:10, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Boot time device assignment is different: The question isn't whether an assigned device works, instead the proper analogy is whether a device is _present_. If a device doesn't work on bare metal, it will still be discoverable. Yet if device assignment fails, that's not going to be the case - for security reasons, the guest would not see any notion of the device. the question is whether we want such device assignment fail due to RMRR confliction, and the fail decision should be when Xen handles actual assignment instead of when domain builder prepares reserved regions. Detecting the failure only in the hypervisor has the downside of potentially leaving the user with little clues as to what went wrong. Sending messages to the hypervisor log in that case is questionable, yet the tool stack (namely libxc) is known to not always do a good job in error propagation. The question isn't about migrating with devices assigned, but about assigning devices after migration (consider a dual vif + SR-IOV NIC guest setup where the SR-IOV NIC gets hot-removed before migration and a new one hot-plugged afterwards). Furthermore any tying of the guest memory layout to the host's where the guest first boots is awkward, as post-migration there's not going to be any reliable correlation between the guest layout and the new host's. how can you solve this? like above example, a NIC on node-A leaves a reserved region in guest e820. now it's hot-removed and then migrated to node-b. there's no way to update e820 again since it's only boot structure. then user will still see such awkward regions. since it's not avoidable, report-all in the summary mail looks not causing a new problem. The solution to this are reserved regions specified in the guest config, independent of host characteristics. I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [linux-3.14 test] 33341: tolerable FAIL - PUSHED
flight 33341 linux-3.14 real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33341/ Failures :-/ but no regressions. Regressions which are regarded as allowable (not blocking): test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 32448 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-armhf-armhf-libvirt 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-armhf-armhf-xl 10 migrate-support-checkfail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass version targeted for testing: linuxc3b70f0bbb6a883f4afa639286043d3f71fbbddf baseline version: linux83a926f7a4e39fb6be0576024e67fe161593defa People who touched revisions under test: Eric W. Biederman ebied...@xmission.com Andreas Müller g...@stapelspeicher.org Andrew Lunn and...@lunn.ch Andrew Morton a...@linux-foundation.org Andy Lutomirski l...@amacapital.net Baruch Siach bar...@tkos.co.il Catalin Marinas catalin.mari...@arm.com Cedric Bosdonnat cbosdon...@suse.com Chris Mason c...@fb.com Christoph Hellwig h...@lst.de Dan Carpenter dan.carpen...@oracle.com Darrick J. Wong darrick.w...@oracle.com Dmitry Eremin-Solenikov dbarysh...@gmail.com Dmitry Osipenko dig...@gmail.com Eric W. Biederman ebied...@xmission.com Filipe Manana fdman...@suse.com Greg Kroah-Hartman gre...@linuxfoundation.org H. Peter Anvin h...@zytor.com Hannes Reinecke h...@suse.de Herbert Xu herb...@gondor.apana.org.au Ingo Molnar mi...@kernel.org Jaehoon Chung jh80.ch...@samsung.com James Hogan james.ho...@imgtec.com Jan Kara j...@suse.cz Jason Cooper ja...@lakedaemon.net Joe Thornber e...@redhat.com Johannes Berg johannes.b...@intel.com Josef Bacik jba...@fb.com Kashyap Desai kashyap.de...@avagotech.com Lee Jones lee.jo...@linaro.org Linus Torvalds torva...@linux-foundation.org Luis Henriques luis.henriq...@canonical.com Michael Halcrow mhalc...@google.com Mike Snitzer snit...@redhat.com Mikulas Patocka mpato...@redhat.com Milan Broz gmazyl...@gmail.com Mimi Zohar zo...@linux.vnet.ibm.com NeilBrown ne...@suse.de Oleg Nesterov o...@redhat.com Paolo Bonzini pbonz...@redhat.com Paul Moore pmo...@redhat.com Peng Tao tao.p...@primarydata.com Peter Guo peter@bayhubtech.com Rabin Vincent rabin.vinc...@axis.com Richard Guy Briggs r...@redhat.com Richard Weinberger rich...@nod.at Sumit Saxena sumit.sax...@avagotech.com sumit.sax...@avagotech.com sumit.sax...@avagotech.com Takashi Iwai ti...@suse.de Thierry Reding tred...@nvidia.com Thomas Gleixner t...@linutronix.de Trond Myklebust trond.mykleb...@primarydata.com Tyler Hicks tyhi...@canonical.com Ulf Hansson ulf.hans...@linaro.org Uwe Kleine-König u.kleine-koe...@pengutronix.de Will Deacon will.dea...@arm.com Zhang Rui rui.zh...@intel.com jobs: build-amd64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvops
Re: [Xen-devel] preparing for 4.4.2 / 4.3.4
On 12.01.15 at 09:47, o...@aepfle.de wrote: On Thu, Jan 08, Jan Beulich wrote: now that 4.5 is almost out the door, I'd like to get stable releases prepared on the other two active branches. 4.3.4 is expected to be the last xen.org managed release on the 4.3 branch. Aiming at RC1s some time next week, please indicate commits you consider relevant to backport but find missing from the respective branches. e86539a388314cd3dca88f5e69d7873343197cd8 (libxc: check return values on mmap() and madvise() on xc_alloc_hypercall_buffer()) Tool stack backport requests should be Cc-ed to the respective maintainer (now added). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
The resource behind an event channel is domain centric rather than vcpu centric, and free_xen_event_channel() only follows the vcpu's domain pointer. This change allows mem_event_disable() to avoid arbitrarily referencing d-vcpu[0] just to pass the domain. No functional change. Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Keir Fraser k...@xen.org CC: Jan Beulich jbeul...@suse.com --- xen/arch/x86/hvm/hvm.c | 12 ++-- xen/common/event_channel.c |4 +--- xen/common/mem_event.c |2 +- xen/include/xen/event.h|3 +-- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 8b06bfd..acfc032 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct hvm_ioreq_server *s, return 0; fail3: -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); fail2: spin_unlock(s-lock); @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct hvm_ioreq_server *s, list_del(sv-list_entry); if ( v-vcpu_id == 0 s-bufioreq.va != NULL ) -free_xen_event_channel(v, s-bufioreq_evtchn); +free_xen_event_channel(v-domain, s-bufioreq_evtchn); -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); xfree(sv); break; @@ -701,9 +701,9 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s) list_del(sv-list_entry); if ( v-vcpu_id == 0 s-bufioreq.va != NULL ) -free_xen_event_channel(v, s-bufioreq_evtchn); +free_xen_event_channel(v-domain, s-bufioreq_evtchn); -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); xfree(sv); } @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu *v, domid_t remote_domid, /* xchg() ensures that only we call free_xen_event_channel(). */ old_port = xchg(p_port, new_port); -free_xen_event_channel(v, old_port); +free_xen_event_channel(v-domain, old_port); return 0; } diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 7d6de54..cfe4978 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel( } -void free_xen_event_channel( -struct vcpu *local_vcpu, int port) +void free_xen_event_channel(struct domain *d, int port) { struct evtchn *chn; -struct domain *d = local_vcpu-domain; spin_lock(d-event_lock); diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c index 16ebdb5..0cc43d7 100644 --- a/xen/common/mem_event.c +++ b/xen/common/mem_event.c @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, struct mem_event_domain *med) } /* Free domU's event channel and leave the other one unbound */ -free_xen_event_channel(d-vcpu[0], med-xen_port); +free_xen_event_channel(d, med-xen_port); /* Unblock all vCPUs */ for_each_vcpu ( d, v ) diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 88526f8..0eb1dd3 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)( int alloc_unbound_xen_event_channel( struct vcpu *local_vcpu, domid_t remote_domid, xen_event_channel_notification_t notification_fn); -void free_xen_event_channel( -struct vcpu *local_vcpu, int port); +void free_xen_event_channel(struct domain *d, int port); /* Query if event channel is in use by the guest */ int guest_enabled_event(struct vcpu *v, uint32_t virq); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Konrad Rzeszutek Wilk [mailto:konrad.w...@oracle.com] Sent: Saturday, January 10, 2015 4:28 AM On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote: On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich jbeul...@suse.com wrote: On 08.01.15 at 16:59, dunl...@umich.edu wrote: On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich jbeul...@suse.com wrote: the 1st invocation of this interface will save all reported reserved regions under domain structure, and later invocation (e.g. from hvmloader) gets saved content. Why would the reserved regions need attaching to the domain structure? The combination of (to be) assigned devices and global RMRR list always allow reproducing the intended set of regions without any extra storage. So when you say (to be) assigned devices, you mean any device which is currently assigned, *or may be assigned at some point in the future*? Yes. Do you think the extra storage for this VM might possibly be assigned this device at some point wouldn't really be that much bigger than this VM might possibly map this RMRR at some point in the future? Since listing devices without RMRR association would be pointless, I think a list of devices would require less storage. But see below. It seems a lot cleaner to me to have the toolstack tell Xen what ranges are reserved for RMRR per VM, and then have Xen check again when assigning a device to make sure that the RMRRs have already been reserved. With an extra level of what can be got wrong by the admin. However, I now realize that doing it this way would allow specifying regions not associated with any device on the host the guest boots on, but associated with one on a host the guest may later migrate to. I did say the toolstack, not the admin. :-) At the xl level, I envisioned a single boolean that would say, Make my memory layout resemble the host system -- so the MMIO hole would be the same size, and all the RMRRs would be reserved. Like the e820_host=1 ? :-) so this is the extension to report-all, not just for reserved regions but for all e820 entries. :-) one thing I'm struggling here (w/ Jan in other threads) is whether reporting all reserved regions on the host can be a default setting to simplify the overall rmrr implementations, given that fact that end user shouldn't set expectation on actual reserved regions. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -170,7 +170,10 @@ _clean_%/: FORCE %.o: %.S Makefile $(CC) $(AFLAGS) -c $ -o $@ -SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \ +SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \ + $(foreach w,1 2 4, \ + rodata.str$(w).$(a)) \ + rodata.cst$(a)) \ $(foreach r,rel rel.ro,data.$(r).local) $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile extend list of sections convertible to .init.* ones in init-only objects In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -170,7 +170,10 @@ _clean_%/: FORCE %.o: %.S Makefile $(CC) $(AFLAGS) -c $ -o $@ -SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \ +SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \ + $(foreach w,1 2 4, \ + rodata.str$(w).$(a)) \ + rodata.cst$(a)) \ $(foreach r,rel rel.ro,data.$(r).local) $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] xen-ringwatch issues
On Fri, 2015-01-09 at 17:00 -0500, moftah moftah wrote: Hi Ian, OK, I have followed your suggestions and created a new patch Thanks, please submit it in the form describe in the wiki page http://wiki.xen.org/wiki/Submitting_Xen_Patches which I referenced before. In particular, a Signed-off-by is a strict requirement for licensing reasons, to indicate that you have read the DCO (which is at http://wiki.xen.org/wiki/Submitting_Xen_Patches#Signing_off_a_patch ). Please also provide a suitable changelog entry and use a unified diff (diff -u). I think rather than [-]* the right addition would be -? (i.e. a single optional negative sign), unless Python's re syntax is lots different to what I expect. this new patch does fix the issue fully for the negative numbers I didnt run the patch on 64bit OS since xenserver has 32bit Dom0 These fields are unsigned int, .i.e. always 32-bits. So I think your if self.conf 0: self.cons = 4294967296 + self.cons etc will work. The Internet also suggests ctypes.c_uint32(i).value, dunno if that is better. In any case writing 4294967296 as 2**32 would be slightly clearer. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 01/11] VMX: VMFUNC and #VE definitions and detection.
On 09/01/15 21:26, Ed White wrote: Currently, neither is enabled globally but may be enabled on a per-VCPU basis by the altp2m code. Everything can be force-disabled globally by specifying vmfunc=0 on the Xen command line. Remove the check for EPTE bit 63 == zero in ept_split_super_page(), as that bit is now hardware-defined. Signed-off-by: Ed White edmund.h.wh...@intel.com --- docs/misc/xen-command-line.markdown | 7 +++ xen/arch/x86/hvm/vmx/vmcs.c | 40 + xen/arch/x86/mm/p2m-ept.c | 1 - xen/include/asm-x86/hvm/vmx/vmcs.h | 16 +++ xen/include/asm-x86/hvm/vmx/vmx.h | 13 +++- xen/include/asm-x86/msr-index.h | 1 + 6 files changed, 76 insertions(+), 2 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index 152ae03..00fbae7 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1305,6 +1305,13 @@ The optional `keep` parameter causes Xen to continue using the vga console even after dom0 has been started. The default behaviour is to relinquish control to dom0. +### vmfunc (Intel) + `= boolean` + + Default: `true` + +Use VMFUNC and #VE support if available. + ### vpid (Intel) `= boolean` diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 9d8033e..4274e92 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -50,6 +50,9 @@ boolean_param(unrestricted_guest, opt_unrestricted_guest_enabled); static bool_t __read_mostly opt_apicv_enabled = 1; boolean_param(apicv, opt_apicv_enabled); +static bool_t __read_mostly opt_vmfunc_enabled = 1; +boolean_param(vmfunc, opt_vmfunc_enabled); Please can experimental features be off by default. (I am specifically looking to avoid the issues we had with apicv getting into stable releases despite reliably causing problems for migration). I suspect you will have many interested testers for this featureset, and it is fine to patch the default later when the feature gets declared stable. I also wonder whether it might be better to have a vmx= command line parameter with vmfunc as a subopt, to save gaining an ever increasing set of related top level parameters? Other than this, the content of the rest of the patch appears fine. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c
Commit b81975eade8c (x86, irq: Clean up irqdomain transition code) breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke setup_IO_APIC(), so no irqdomains created for IOAPICs and mp_map_pin_to_irq() fails at the very beginning. Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize irqdomain for IOAPICs. Signed-off-by: Jiang Liu jiang@linux.intel.com --- Hi Sander, Could you please help to test this patch against 3.19-rc3? I have reworked it based Konrad's suggestions. Thanks! Gerry --- arch/x86/kernel/apic/io_apic.c | 31 +++ arch/x86/xen/smp.c |1 + 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..81d4faeb8040 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -38,6 +38,8 @@ #include linux/slab.h #include linux/bootmem.h +#include xen/xen.h + #include asm/idle.h #include asm/io.h #include asm/smp.h @@ -2165,6 +2167,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0; + if (!nr_legacy_irqs()) + return; + local_irq_save(flags); /* @@ -2185,6 +2190,13 @@ static inline void __init check_timer(void) apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic-init(1); + /* +* legacy_pic will be changed to null_legacy_pic if init() fails to +* to detect legacy PIC hardware. Recheck again. +*/ + if (!nr_legacy_irqs()) + goto out; + pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx) ioapics[idx].pin_info = NULL; } +static void setup_IO_APIC_IDs(void) +{ + if (xen_domain()) + return; + + x86_init.mpparse.setup_ioapic_ids(); + sync_Arb_IDs(); +} + void __init setup_IO_APIC(void) { int ioapic; @@ -2382,16 +2403,10 @@ void __init setup_IO_APIC(void) for_each_ioapic(ioapic) BUG_ON(mp_irqdomain_create(ioapic)); - /* - * Set up IO-APIC IRQ routing. - */ - x86_init.mpparse.setup_ioapic_ids(); - - sync_Arb_IDs(); + setup_IO_APIC_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (nr_legacy_irqs()) - check_timer(); + check_timer(); ioapic_initialized = 1; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4c071aeb8417..9f404df64422 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -327,6 +327,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) xen_raw_printk(m); panic(m); } + setup_IO_APIC(); xen_init_lock_cpu(0); smp_store_boot_cpu_info(); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix
On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote: 3) report-sel vs. report-all One thing I'm not clear on is whether you are suggesting to reserve RMRR (either -all or -sel) for every domain by default, or whether the guest CFG will need to explicitly opt-in, IOW is there a 3rd report-none option which is the default unless otherwise requested (e.g. by e820_host=1, or some other new option)? Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c
On 12/01/15 13:39, Jiang Liu wrote: Commit b81975eade8c (x86, irq: Clean up irqdomain transition code) breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke setup_IO_APIC(), so no irqdomains created for IOAPICs and mp_map_pin_to_irq() fails at the very beginning. Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize irqdomain for IOAPICs. Having Xen call setup_IO_APIC() to initialize the irq domains then having to add special cases to it is just wrong. The bits of init deferred by mp_register_apic() are also deferred to two different places which looks odd. What about something like the following (untested) patch? diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f604..e180680 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -253,8 +253,10 @@ int __init arch_early_ioapic_init(void) if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; - for_each_ioapic(i) + for_each_ioapic(i) { + BUG_ON(mp_irqdomain_create(ioapic)); alloc_ioapic_saved_registers(i); + } /* * For legacy IRQ's, start with assigning irq0 to irq15 to @@ -2379,8 +2381,6 @@ void __init setup_IO_APIC(void) io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, ENABLING IO-APIC IRQs\n); - for_each_ioapic(ioapic) - BUG_ON(mp_irqdomain_create(ioapic)); /* * Set up IO-APIC IRQ routing. @@ -2929,7 +2929,8 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base, /* * If mp_register_ioapic() is called during early boot stage when * walking ACPI/SFI/DT tables, it's too early to create irqdomain, -* we are still using bootmem allocator. So delay it to setup_IO_APIC(). +* we are still using bootmem allocator. So delay it to +* arch_early_ioapic_init(). */ if (hotplug) { if (mp_irqdomain_create(idx)) { --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx) ioapics[idx].pin_info = NULL; } +static void setup_IO_APIC_IDs(void) +{ + if (xen_domain()) + return; This would have to xen_pv_domain(). David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
On 12/01/15 11:36, Roger Pau Monné wrote: El 12/01/15 a les 8.09, Bob Liu ha escrit: On 01/09/2015 11:51 PM, Roger Pau Monné wrote: El 06/01/15 a les 14.19, Bob Liu ha escrit: When there is no enough free grants, gnttab_alloc_grant_references() will fail and block request queue will stop. If the system is always lack of grants, blkif_restart_queue_callback() can't be scheduled and block request queue can't be restart(block I/O hang). But when there are former requests complete, some grants may free to persistent_gnts_c, we can give the request queue another chance to restart and avoid block hang. Reported-by: Junxiao Bi junxiao...@oracle.com Signed-off-by: Bob Liu bob@oracle.com --- drivers/block/xen-blkfront.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2236c6f..dd30f99 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } } } + + /* + * Request queue would be stopped if failed to alloc enough grants and + * won't be restarted until gnttab_free_count = info-callback-count. + * + * But there is another case, once we have enough persistent grants we + * can try to restart the request queue instead of continue to wait for + * 'gnttab_free_count'. + */ + if (info-persistent_gnts_c = info-callback.count) + schedule_work(info-work); I guess I'm missing something here, but blkif_completion is called by blkif_interrupt, which in turn calls kick_pending_request_queues when finished, which IMHO should be enough to restart the processing of requests. You are right, sorry for the mistake. The problem we met was a xenblock I/O hang. Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8 but block request queue was still stopped. It's very hard to reproduce this issue, we only see it once. I think there might be a race condition: request A request B: info-persistent_gnts_c max_grefs and fail to alloc enough grants interrupt happen, blkif_complte(): info-persistent_gnts_c++ kick_pending_request_queues() stop block request queue added to callback list If the system don't have enough grants(but have enough persistent_gnts), request queue would still hang. Not sure how can this happen, blkif_queue_request explicitly checks that persistent_gnts_c max_grefs before adding the callback and stopping the request queue, so in your case the queue should not be blocked. Can you dump the state of info-connected? I think Bob has correctly identified a race. After calling blk_stop_queue(), check info-persistent_gnts again and restart the queue if there free grefs. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
Use the 'xl pci-attach $DomU $BDF' command to attach more than one PCI devices to the guest, then detach the devices with 'xl pci-detach $DomU $BDF', after that, re-attach these PCI devices again, an error message will be reported like following: libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an error message from QMP server: Duplicate ID 'pci-pt-03_10.1' for device. The count of calling xen_pt_region_add and xen_pt_region_del are not the same will cause the XenPCIPassthroughState and it's related QemuOpts object not be released properly. Thanks for the patch! From this description, I don't quite understand why the memory_region_ref and memory_region_unref calls are wrong. What do you mean by The count of calling xen_pt_region_add and xen_pt_region_del are not the same? I means for some memory regions , only the xen_pt_region_add callback function was called while the xen_pt_region_del was not called. On unplug xen_pt_region_del does not get called? Or the memory region argument is not exactly the same as the one initially passed to xen_pt_region_add? agree. Liang, could you elaborate how the patch is associated with above explanation? :-) I have verified the following patch can work too: diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) } out: -memory_listener_register(s-memory_listener, address_space_memory); +memory_listener_register(s-memory_listener, + s-dev.bus_master_as); memory_listener_register(s-io_listener, address_space_io); XEN_PT_LOG(d, Real physical device %02x:%02x.%d registered successfully!\n, By further debugging, I found when using 'address_space_memory', 'xen_pt_region_del' won't be called when the memory region's name is not ' xen-pci-pt-*', when using ' s-dev.bus_master_as ', there is no such issue. I think use the device related address space here is more reasonable, but I am not sure. Could you give some suggestion? Liang ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote: On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin kevin.t...@intel.com wrote: From: George Dunlap Sent: Thursday, January 08, 2015 8:55 PM On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap george.dun...@eu.citrix.com wrote: If RMRRs almost always happen up above 2G, for example, then a simple solution that wouldn't require too much work would be to make sure that the PCI MMIO hole we specify to libxc and to qemu-upstream is big enough to include all RMRRs. That would satisfy the libxc and qemu requirements. If we then store specific RMRRs we want included in xenstore, hvmloader can put them in the e820 map, and that would satisfy the hvmloader requirement. An alternate thing to do here would be to properly fix the qemu-upstream problem, by making a way for hvmloader to communicate changes in the gpfn layout to qemu. Then hvmloader could do the work of moving memory under RMRRs to higher memory; and libxc wouldn't need to be involved at all. I think it would also fix our long-standing issues with assigning PCI devices to qemu-upstream guests, which up until now have only been worked around. could you elaborate a bit for that long-standing issue? So qemu-traditional didn't particularly expect to know the guest memory layout. qemu-upstream does; it expects to know what areas of memory are guest memory and what areas of memory are unmapped. If a read or write happens to a gpfn which *xen* knows is valid, but which *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. The problem though is that the guest's memory map is not actually communicated to qemu-upstream in any way. Originally, qemu-upstream was only told how much memory the guest had, and it just happens to choose the same guest memory layout as the libxc domain builder does. This works, but it is bad design, because if libxc were to change for some reason, people would have to simply remember to also change the qemu-upstream layout. Where this really bites us is in PCI pass-through. The default 4G MMIO hole is very small; and hvmloader naturally expects to be able to make this area larger by relocating memory from below 4G to above 4G. It moves the memory in Xen's p2m, but it has no way of communicating this to qemu-upstream. So when the guest does an MMIO instuction that causes qemu-upstream to access that memory, the guest crashes. There are two work-arounds at the moment: 1. A flag which tells hvmloader not to relocate memory 2. The option to tell qemu-upstream to make the memory hole larger. Both are just work-arounds though; a proper fix would be to allow hvmloader some way of telling qemu that the memory has moved, so it can update its memory map. This will (I'm pretty sure) have an effect on RMRR regions as well, for the reasons I've mentioned above: whether make the holes for the RMRRs in libxc or in hvmloader, if we *move* that memory up to the top of the address space (rather than, say, just not giving that RAM to the guest), then qemu-upstream's idea of the guest memory map will be wrong, and will probably crash at some point. Having the ability for hvmloader to populate and/or move the memory around, and then tell qemu-upstream what the resulting map looked like, would fix both the MMIO-resize issue and the RMRR problem, wrt qemu-upstream. Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz? You can now specify the mmio_hole size for HVM guests when using qemu-upstream: http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, which allows configuring the MMIO size below 4GB. Backport pc q35: Add new machine opt max-ram-below-4g: http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458 x86: hvm: Allow configuration of the size of the mmio_hole.: http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7 -- Pasi -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote: In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). So we go from handling .rodata.str1.{1,2,3,8} to .rodata.str{1,2,4}.{1,2,4,8,16}? Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Ian Campbell ian.campb...@citrix.com ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
On 12.01.15 at 15:03, ian.campb...@eu.citrix.com wrote: On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote: In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). So we go from handling .rodata.str1.{1,2,3,8} to .rodata.str{1,2,4}.{1,2,4,8,16}? These plus .rodata.cst{1,2,4,8,16}. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections
On 12.01.15 at 14:46, ian.campb...@eu.citrix.com wrote: On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote: - don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function - Avoid needless use of DisplayUint via __stringify. Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Ian Campbell ian.campb...@citrix.com Will you commit yourself or would you like me to? I will. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 12:28 PM, Tian, Kevin kevin.t...@intel.com wrote: From: George Dunlap Sent: Monday, January 12, 2015 8:14 PM On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. You keep saying report-all or report-sel, but I'm not 100% clear what you mean by those. In any case, the naming has got to be a bit misleading: the important questions at the moment, AFAICT, are: I explained them in original proposal Yes, I read it and didn't understand it there either. :-) 1. Whether we make holes at boot time for all RMRRs on the system, or whether only make RMRRs for some subset (or potentially some other arbitrary range, which may include RMRRs on other hosts to which we may want to migrate). I use 'report-all' to stand for making holes for all RMRRs on the system, while 'report-sel' for specified subset. including other RMRRs (from admin for migration) is orthogonal to above open. Right; so the report in this case is report to the guest. As I said, I think that's confusing terminology; after all, we want to report to the guest all holes that we make, and only the holes that we make. The question isn't then which ones we report, but which ones we make holes for. :-) So for this discussion, maybe rmrr-host (meaning, copy RMRRs from the host) or rmrr-sel (meaning, specify a selection of RMRRs, which may be from this host, or even another host)? Given that the ranges may be of arbitrary size, and that we may want to specify additional ranges for migration to other hosts, then I think we need at some level we need the machinery to be in place to specify the RMRRs that will be reserved for a specific guest. At the xl level, there should of course be a way to specify use all host RMRRs; but what should happen then is that xl / libxl should query Xen for the host RMRRs and then pass those down to the next layer of the library. 2. Whether those holes are made by the domain builder in libxc, or by hvmloader based on current discussion, whether to make holes in hvmloader doesn't bring fundamental difference. as long as domain builder still need to populate memory (even minimal for hvmloader to boot), it needs to check conflict and may ideally make hole too (though we may make assumption not doing that) Well it will have an impact on the overall design of the code; but you're right, if RMRRs really can (and will) be anywhere in memory, then the domain builder will need to know what RMRRs are going to be reserved for this VM and avoid populating those. If, on the other hand, we can make some fairly small assumptions about where there will not be any RMRRs, then we can get away with handling everything in hvmloader. 3. What happens if Xen is asked to assign a device and it finds that the required RMRR is not empty: a. during guest creation b. after the guest has booted for Xen we don't need differentiate a/b. by default it's clear failure should be returned as it implies a security/correctness issue if moving forward. but based on discussion an override to 'warn' only
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
On 12/01/2015 14:35, Li, Liang Z wrote: diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) } out: -memory_listener_register(s-memory_listener, address_space_memory); +memory_listener_register(s-memory_listener, + s-dev.bus_master_as); memory_listener_register(s-io_listener, address_space_io); XEN_PT_LOG(d, Real physical device %02x:%02x.%d registered successfully!\n, By further debugging, I found when using 'address_space_memory', 'xen_pt_region_del' won't be called when the memory region's name is not ' xen-pci-pt-*', when using ' s-dev.bus_master_as ', there is no such issue. I think use the device related address space here is more reasonable, but I am not sure. Could you give some suggestion? Yes, this patch makes sense. The listener will be called every time the command register is written. Paolo ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 1/3] x86: also allow REP STOS emulation acceleration
On Fri, Jan 9, 2015 at 12:10 PM, Jan Beulich jbeul...@suse.com wrote: On 09.01.15 at 12:45, t...@xen.org wrote: At 11:24 + on 09 Jan (1420799087), Jan Beulich wrote: On 09.01.15 at 12:18, t...@xen.org wrote: +default: +xfree(buf); +ASSERT(!buf); looks dodgy... In which way? The default is supposed to be unreachable, and sits in the else branch to an if(!buf), i.e. in a release build we'll correctly free the buffer, while in a debug build the ASSERT() will trigger. Oh I see. Can you please use ASSERT(0) for that? I sincerely dislike ASSERT(0), but if that's the only way to get the patch accepted... Um, by what set of criteria is ASSERT(!buf) better than ASSERT(0)? At least the second tells the reader that you're not using ASSERT() the normal way. I agree that ASSERT_UNREACHABLE() is probably the best option. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [seabios test] 33359: regressions - FAIL
flight 33359 seabios real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33359/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-amd64-xl-qemuu-debianhvm-amd64 5 xen-boot fail REGR. vs. 33317 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass version targeted for testing: seabios 301dd092c2d04a5d70c94b9d873d810785e94a84 baseline version: seabios 60e0e55f212dadd043ab9e39bee05a48013ddd8f People who touched revisions under test: Kevin O'Connor ke...@koconnor.net jobs: build-amd64 pass build-i386 pass build-amd64-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl pass test-amd64-i386-xl pass test-amd64-amd64-xl-pvh-amd fail test-amd64-i386-rhel6hvm-amd pass test-amd64-i386-qemut-rhel6hvm-amd pass test-amd64-i386-qemuu-rhel6hvm-amd pass test-amd64-amd64-xl-qemut-debianhvm-amd64pass test-amd64-i386-xl-qemut-debianhvm-amd64 pass test-amd64-amd64-xl-qemuu-debianhvm-amd64fail test-amd64-i386-xl-qemuu-debianhvm-amd64 pass test-amd64-i386-freebsd10-amd64 pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass test-amd64-i386-xl-qemuu-ovmf-amd64 pass test-amd64-amd64-xl-qemut-win7-amd64 fail test-amd64-i386-xl-qemut-win7-amd64 fail test-amd64-amd64-xl-qemuu-win7-amd64 fail test-amd64-i386-xl-qemuu-win7-amd64 fail test-amd64-amd64-xl-win7-amd64 fail test-amd64-i386-xl-win7-amd64fail test-amd64-i386-xl-credit2 pass test-amd64-i386-freebsd10-i386 pass test-amd64-amd64-xl-pcipt-intel fail test-amd64-amd64-xl-pvh-intelfail test-amd64-i386-rhel6hvm-intel pass test-amd64-i386-qemut-rhel6hvm-intel pass test-amd64-i386-qemuu-rhel6hvm-intel pass test-amd64-amd64-libvirt fail test-amd64-i386-libvirt fail test-amd64-i386-xl-multivcpu pass test-amd64-amd64-pairpass test-amd64-i386-pair pass test-amd64-amd64-xl-sedf-pin
Re: [Xen-devel] [PATCH 00/12] Replace Xen xl parsing/formatting impl
On Fri, 2015-01-09 at 22:03 -0700, Jim Fehlig wrote: The first attempt to implement support for parsing/formatting Xen's xl disk config format copied Xen's flex-based parser into libvirt, which has proved to be challenging in the context of autotools. But as it turns out, Xen provides an interface to the parser via libxlutil. This series reverts the first attempt, along with subsequent attempts to fix it, and replaces it with an implementation based on libxlutil. The first nine patches revert the original implementation and subsequent fixes. Patch 10 provides an implemenation based on libxlutil. Patches 11 and 12 are basically unchanged from patches 3 and 4 in the first attempt. One upshot of using libxlutil instead of copying the flex source is removing the potential for source divergence. Thanks for doing this, looks good to me, FWIW. Is the presence/absence of xen-xl support exposed via virsh anywhere? If so then I can arrange for my Xen osstest patches for libvirt testing to use xen-xl when available but still fallback to xen-xm. I've had a look in virsh capabilities and virsh help domxml-from-native but not seeing xen-xm, so assuming xen-xl won't magically appear in any of those places either. (TBH, this may become moot since I suspect your patches will be well established by the time my osstest patches hit osstest...) Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
El 12/01/15 a les 14.04, David Vrabel ha escrit: On 12/01/15 11:36, Roger Pau Monné wrote: El 12/01/15 a les 8.09, Bob Liu ha escrit: On 01/09/2015 11:51 PM, Roger Pau Monné wrote: El 06/01/15 a les 14.19, Bob Liu ha escrit: When there is no enough free grants, gnttab_alloc_grant_references() will fail and block request queue will stop. If the system is always lack of grants, blkif_restart_queue_callback() can't be scheduled and block request queue can't be restart(block I/O hang). But when there are former requests complete, some grants may free to persistent_gnts_c, we can give the request queue another chance to restart and avoid block hang. Reported-by: Junxiao Bi junxiao...@oracle.com Signed-off-by: Bob Liu bob@oracle.com --- drivers/block/xen-blkfront.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2236c6f..dd30f99 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } } } + + /* + * Request queue would be stopped if failed to alloc enough grants and + * won't be restarted until gnttab_free_count = info-callback-count. + * + * But there is another case, once we have enough persistent grants we + * can try to restart the request queue instead of continue to wait for + * 'gnttab_free_count'. + */ + if (info-persistent_gnts_c = info-callback.count) + schedule_work(info-work); I guess I'm missing something here, but blkif_completion is called by blkif_interrupt, which in turn calls kick_pending_request_queues when finished, which IMHO should be enough to restart the processing of requests. You are right, sorry for the mistake. The problem we met was a xenblock I/O hang. Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8 but block request queue was still stopped. It's very hard to reproduce this issue, we only see it once. I think there might be a race condition: request A request B: info-persistent_gnts_c max_grefs and fail to alloc enough grants interrupt happen, blkif_complte(): info-persistent_gnts_c++ kick_pending_request_queues() blkif_interrupt can never interrupt blkif_queue_request, because it's holding a spinlock (info-io_lock). If you have seen this trace in the wild it means something is really wrong and we are calling blkif_queue_request without acquiring the spinlock and thus without disabling interrupts. stop block request queue added to callback list If the system don't have enough grants(but have enough persistent_gnts), request queue would still hang. Not sure how can this happen, blkif_queue_request explicitly checks that persistent_gnts_c max_grefs before adding the callback and stopping the request queue, so in your case the queue should not be blocked. Can you dump the state of info-connected? I think Bob has correctly identified a race. After calling blk_stop_queue(), check info-persistent_gnts again and restart the queue if there free grefs. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [xen-4.5-testing test] 33348: tolerable FAIL - PUSHED
flight 33348 xen-4.5-testing real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33348/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-armhf-armhf-libvirt 9 guest-start fail never pass test-armhf-armhf-xl 10 migrate-support-checkfail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass version targeted for testing: xen 3508d75155dae3154d10cd5c33d44336f6f4bf1c baseline version: xen 36174af3fbeb1b662c0eadbfa193e77f68cc955b People who touched revisions under test: Andrew Cooper andrew.coop...@citrix.com Boris Ostrovsky boris.ostrov...@oracle.com Ed Swierk eswi...@skyportsystems.com Ian Campbell ian.campb...@citrix.com Ian Campbell i...@hellion.org.uk Ian Jackson ian.jack...@eu.citrix.com Ian Jackson i...@mariner.uk.xensource.com Jan Beulich jbeul...@suse.com Julien Grall julien.gr...@linaro.org Kevin Tian kevin.t...@intel.com Konrad Rzeszutek Wilk konrad.w...@oracle.com Mihai DonÈu mdo...@bitdefender.com Olaf Hering o...@aepfle.de Robert Hu robert...@intel.com RÄzvan Cojocaru rcojoc...@bitdefender.com Stefano Stabellini stefano.stabell...@eu.citrix.com Wei Liu wei.l...@citrix.com Yang Hongyang yan...@cn.fujitsu.com jobs: build-amd64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-armhf-pvopspass build-i386-pvops pass build-amd64-rumpuserxen pass build-i386-rumpuserxen pass test-amd64-amd64-xl pass test-armhf-armhf-xl pass test-amd64-i386-xl pass test-amd64-amd64-xl-pvh-amd fail test-amd64-i386-rhel6hvm-amd pass test-amd64-i386-qemut-rhel6hvm-amd pass test-amd64-i386-qemuu-rhel6hvm-amd pass test-amd64-amd64-xl-qemut-debianhvm-amd64pass test-amd64-i386-xl-qemut-debianhvm-amd64 pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-i386-xl-qemuu-debianhvm-amd64 pass test-amd64-i386-freebsd10-amd64 pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass test-amd64-i386-xl-qemuu-ovmf-amd64 pass test-amd64-amd64-rumpuserxen-amd64 pass test-amd64-amd64-xl-qemut-win7-amd64 fail test-amd64-i386-xl-qemut-win7-amd64 fail test-amd64-amd64-xl-qemuu-win7-amd64
[Xen-devel] [PATCH v2] xenstored: log tdb message via xenstored's logging mechanisms
TDB provides us with a callback for this purpose. Use it in both xenstored and xs_tdb_dump. While at it make the existing log() macro tollerate memory failures. Signed-off-by: Ian Campbell ian.campb...@citrix.com --- v2: Use tdb_logger consistently. Did not: move location of talloc_free, since talloc_free(NULL) returns an error. --- tools/xenstore/xenstored_core.c | 39 +-- tools/xenstore/xs_tdb_dump.c| 12 +++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index 4eaff57..3fd9a20 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -89,9 +89,14 @@ static void check_store(void); #define log(...) \ do {\ char *s = talloc_asprintf(NULL, __VA_ARGS__); \ - trace(%s\n, s); \ - syslog(LOG_ERR, %s, s); \ - talloc_free(s); \ + if (s) {\ + trace(%s\n, s); \ + syslog(LOG_ERR, %s, s); \ + talloc_free(s); \ + } else {\ + trace(talloc failure during logging\n); \ + syslog(LOG_ERR, talloc failure during logging\n); \ + } \ } while (0) @@ -1479,13 +1484,35 @@ static void manual_node(const char *name, const char *child) talloc_free(node); } +static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...) +{ + va_list ap; + char *s; + + va_start(ap, fmt); + s = talloc_vasprintf(NULL, fmt, ap); + va_end(ap); + + if (s) { + trace(TDB: %s\n, s); + syslog(LOG_ERR, TDB: %s, s); + if (verbose) + xprintf(TDB: %s, s); + talloc_free(s); + } else { + trace(talloc failure during logging\n); + syslog(LOG_ERR, talloc failure during logging\n); + } +} + static void setup_structure(void) { char *tdbname; tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb()); if (!(tdb_flags TDB_INTERNAL)) - tdb_ctx = tdb_open(tdbname, 0, tdb_flags, O_RDWR, 0); + tdb_ctx = tdb_open_ex(tdbname, 0, tdb_flags, O_RDWR, 0, + tdb_logger, NULL); if (tdb_ctx) { /* XXX When we make xenstored able to restart, this will have @@ -1516,8 +1543,8 @@ static void setup_structure(void) talloc_free(tlocal); } else { - tdb_ctx = tdb_open(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT, - 0640); + tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT, + 0640, tdb_logger, NULL); if (!tdb_ctx) barf_perror(Could not create tdb file %s, tdbname); diff --git a/tools/xenstore/xs_tdb_dump.c b/tools/xenstore/xs_tdb_dump.c index b91cdef..9f636f9 100644 --- a/tools/xenstore/xs_tdb_dump.c +++ b/tools/xenstore/xs_tdb_dump.c @@ -33,6 +33,15 @@ static char perm_to_char(enum xs_perm_type perm) '?'; } +static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + int main(int argc, char *argv[]) { TDB_DATA key; @@ -41,7 +50,8 @@ int main(int argc, char *argv[]) if (argc != 2) barf(Usage: xs_tdb_dump tdbfile); - tdb = tdb_open(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0); + tdb = tdb_open_ex(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0, + tdb_logger, NULL); if (!tdb) barf_perror(Could not open %s, argv[1]); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] Add a flight to test OVMF master branch
Do the usual stuffs for adding a new branch (ap-* cr-* etc). Modify ts-xen-build so that it builds Xen with the specified ovmf tree and revision. Only build and test on x86 by modifying make-flight and mfi-common. New branch is added to cr-daily-branch. It exports the tree and changeset used for the test if set. Otherwise the values in Config.mk are used. Signed-off-by: Wei Liu wei.l...@citrix.com --- ap-common|5 + ap-fetch-version |4 ap-fetch-version-old |5 + ap-print-url |3 +++ ap-push |5 + cr-daily-branch | 14 ++ cr-for-branches |2 +- cri-common |1 + make-flight |6 ++ mfi-common |5 - ts-xen-build |7 +++ 11 files changed, 55 insertions(+), 2 deletions(-) diff --git a/ap-common b/ap-common index 96cbd14..e2c3e42 100644 --- a/ap-common +++ b/ap-common @@ -51,6 +51,10 @@ : ${PUSH_TREE_SEABIOS:=$XENBITS:/home/xen/git/osstest/seabios.git} : ${BASE_TREE_SEABIOS:=git://xenbits.xen.org/osstest/seabios.git} +: ${TREE_OVMF_UPSTREAM:=https://github.com/tianocore/edk2.git} +: ${PUSH_TREE_OVMF:=$XENBITS:/home/xen/git/osstest/ovmf.git} +: ${BASE_TREE_OVMF:=git://xenbits.xen.org/osstest/ovmf.git} + : ${TREE_LINUXFIRMWARE:=git://xenbits.xen.org/osstest/linux-firmware.git} : ${PUSH_TREE_LINUXFIRMWARE:=$XENBITS:/home/osstest/ext/linux-firmware.git} : ${UPSTREAM_TREE_LINUXFIRMWARE:=$GIT_KERNEL_ORG/pub/scm/linux/kernel/git/firmware/linux-firmware.git} @@ -77,6 +81,7 @@ fi : ${LOCALREV_LIBVIRT:=daily-cron.$branch} : ${LOCALREV_RUMPUSERXEN:=daily-cron.$branch} : ${LOCALREV_SEABIOS:=daily-cron.$branch} +: ${LOCALREV_OVMF:=daily-cron.$branch} : ${TREEBASE_LINUX_XCP:=http://hg.uk.xensource.com/carbon/trunk/linux-2.6.27} diff --git a/ap-fetch-version b/ap-fetch-version index f6c65d8..33aaf00 100755 --- a/ap-fetch-version +++ b/ap-fetch-version @@ -85,6 +85,10 @@ seabios) repo_tree_rev_fetch_git seabios \ $TREE_SEABIOS_UPSTREAM master $LOCALREV_SEABIOS ;; +ovmf) + repo_tree_rev_fetch_git ovmf \ + $TREE_OVMF_UPSTREAM master $LOCALREV_OVMF + ;; osstest) if [ x$OSSTEST_USE_HEAD != xy ] ; then git fetch $HOME/testing.git pretest:ap-fetch 2 diff --git a/ap-fetch-version-old b/ap-fetch-version-old index 43c997c..7b7f50e 100755 --- a/ap-fetch-version-old +++ b/ap-fetch-version-old @@ -30,6 +30,7 @@ select_xenbranch : ${BASE_LOCALREV_LIBVIRT:=daily-cron.$branch.old} : ${BASE_LOCALREV_RUMPUSERXEN:=daily-cron.$branch.old} : ${BASE_LOCALREV_SEABIOS:=daily-cron.$branch.old} +: ${BASE_LOCALREV_OVMF:=daily-cron.$branch.old} : ${BASE_TREE_QEMU_UPSTREAM:=${TREE_QEMU_UPSTREAM/\/staging\//\/}} @@ -92,6 +93,10 @@ seabios) repo_tree_rev_fetch_git seabios \ $BASE_TREE_SEABIOS xen-tested-master $BASE_LOCALREV_SEABIOS ;; +ovmf) + repo_tree_rev_fetch_git ovmf \ + $BASE_TREE_OVMF xen-tested-master $BASE_LOCALREV_OVMF + ;; osstest) if [ x$OSSTEST_USE_HEAD != xy ] ; then git fetch -f $HOME/testing.git incoming:ap-fetch diff --git a/ap-print-url b/ap-print-url index 7b27e1e..3db2375 100755 --- a/ap-print-url +++ b/ap-print-url @@ -58,6 +58,9 @@ rumpuserxen) seabios) echo $TREE_SEABIOS_UPSTREAM ;; +ovmf) + echo $TREE_OVMF_UPSTREAM + ;; osstest) echo none:; ;; diff --git a/ap-push b/ap-push index a2aa747..01089f3 100755 --- a/ap-push +++ b/ap-push @@ -36,6 +36,7 @@ TREE_XEN=$PUSH_TREE_XEN TREE_LIBVIRT=$PUSH_TREE_LIBVIRT TREE_RUMPUSERXEN=$PUSH_TREE_RUMPUSERXEN TREE_SEABIOS=$PUSH_TREE_SEABIOS +TREE_OVMF=$PUSH_TREE_OVMF if info_linux_tree $branch; then cd $repos/linux @@ -92,6 +93,10 @@ seabios) cd $repos/seabios git push $TREE_SEABIOS $revision:refs/heads/xen-tested-master ;; +ovmf) + cd $repos/ovmf + git push $TREE_OVMF $revision:refs/heads/xen-tested-master + ;; osstest) git push $HOME/testing.git $revision:incoming git push $XENBITS:/home/xen/git/osstest.git $revision:master diff --git a/cr-daily-branch b/cr-daily-branch index fc663ce..c7d1071 100755 --- a/cr-daily-branch +++ b/cr-daily-branch @@ -146,6 +146,16 @@ if [ x$REVISION_SEABIOS = x ]; then : REVISION_SEABIOS from Config.mk fi fi +if [ x$REVISION_OVMF = x ]; then +if [ x$tree = xovmf ]; then + TREE_OVMF=$TREE_OVMF_UPSTREAM + export TREE_OVMF + determine_version REVISION_OVMF ovmf OVMF + export REVISION_OVMF +else + : REVISION_OVMF from Config.mk +fi +fi if [ x$REVISION_LIBVIRT = x ]; then determine_version REVISION_LIBVIRT libvirt LIBVIRT export REVISION_LIBVIRT @@ -203,6 +213,10 @@ seabios) realtree=seabios NEW_REVISION=$REVISION_SEABIOS ;; +ovmf) + realtree=ovmf + NEW_REVISION=$REVISION_OVMF + ;; *)
Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections
On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote: - don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function - Avoid needless use of DisplayUint via __stringify. Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Ian Campbell ian.campb...@citrix.com Will you commit yourself or would you like me to? Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix
From: Ian Campbell [mailto:ian.campb...@citrix.com] Sent: Monday, January 12, 2015 9:42 PM On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote: 3) report-sel vs. report-all One thing I'm not clear on is whether you are suggesting to reserve RMRR (either -all or -sel) for every domain by default, or whether the guest CFG will need to explicitly opt-in, IOW is there a 3rd report-none option which is the default unless otherwise requested (e.g. by e820_host=1, or some other new option)? only when a device is assigned (or potentially prepare for hotplug usage), and report-all/sel is from hypervisor p.o.v to tell userspace about all RMRR regions on this platform or just RMRR regions belonging to specified devices. 'e820_host' only makes holes to prepare (more than RMRR requires). finally we still need query actual reserved regions reported by hypervisor and then mark them reserved in guest e820 table and avoid conflict for PCI BAR allocation etc. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [qemu-mainline bisection] complete test-amd64-amd64-xl-qemuu-win7-amd64
branch xen-unstable xen branch xen-unstable job test-amd64-amd64-xl-qemuu-win7-amd64 test windows-install Tree: linux git://xenbits.xen.org/linux-pvops.git Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git Tree: qemuu git://git.qemu.org/qemu.git Tree: xen git://xenbits.xen.org/xen.git *** Found and reproduced problem changeset *** Bug is in tree: qemuu git://git.qemu.org/qemu.git Bug introduced: 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6 Bug not present: 60fb1a87b47b14e4ea67043aa56f353e77fbd70a commit 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6 Author: Marcel Apfelbaum marce...@redhat.com Date: Tue Dec 16 16:58:05 2014 + machine: remove qemu_machine_opts global list QEMU has support for options per machine, keeping a global list of options is no longer necessary. Signed-off-by: Marcel Apfelbaum marce...@redhat.com Reviewed-by: Alexander Graf ag...@suse.de Reviewed-by: Greg Bellows greg.bell...@linaro.org Message-id: 1418217570-15517-2-git-send-email-marce...@redhat.com Signed-off-by: Peter Maydell peter.mayd...@linaro.org For bisection revision-tuple graph see: http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.qemu-mainline.test-amd64-amd64-xl-qemuu-win7-amd64.windows-install.html Revision IDs in each graph node refer, respectively, to the Trees above. Searching for failure / basis pass: 33123 fail [host=itch-mite] / 32598 ok. Failure / basis pass flights: 33123 / 32598 (tree with no url: seabios) Tree: linux git://xenbits.xen.org/linux-pvops.git Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git Tree: qemuu git://git.qemu.org/qemu.git Tree: xen git://xenbits.xen.org/xen.git Latest 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b Basis pass 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 7e58e2ac7778cca3234c33387e49577bb7732714 36174af3fbeb1b662c0eadbfa193e77f68cc955b Generating revisions with ./adhoc-revtuple-generator git://xenbits.xen.org/linux-pvops.git#83a926f7a4e39fb6be0576024e67fe161593defa-83a926f7a4e39fb6be0576024e67fe161593defa git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860 git://xenbits.xen.org/staging/qemu-xen-unstable.git#b0d42741f8e9a00854c3b3faca1da84bfc69bf22-b0d42741f8e9a00854c3b3faca1da84bfc69bf22 git://git.qemu.org/qemu.git#7e58e2ac7778cca3234c33387e49577bb7732714-ab0302ee764fd702465aef6d88612cdff4302809 git://xenbits.xen.org/xen.git#36174af3fbeb1b662c0eadbfa193e77f68cc955b-36174af3fbeb1b662c0eadbfa193e77f68cc955b + exec + sh -xe + cd /export/home/osstest/repos/qemu + git remote set-url origin git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git + git fetch -p origin +refs/heads/*:refs/remotes/origin/* + exec + sh -xe + cd /export/home/osstest/repos/qemu + git remote set-url origin git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git + git fetch -p origin +refs/heads/*:refs/remotes/origin/* Loaded 1005 nodes in revision graph Searching for test results: 32585 pass irrelevant 32598 pass 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 7e58e2ac7778cca3234c33387e49577bb7732714 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32611 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32626 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32689 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32659 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32876 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32854 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22
Re: [Xen-devel] [libvirt] [PATCH 09/12] Revert src/xenconfig: Xen-xl parser
On 01/10/2015 12:03 AM, Jim Fehlig wrote: This reverts commit 2c78051a14acfb7aba078d569b1632dfe0ca0853. Conflicts: src/Makefile.am Signed-off-by: Jim Fehlig jfeh...@suse.com --- .gitignore| 1 - cfg.mk| 3 +- configure.ac | 1 - po/POTFILES.in| 1 - src/Makefile.am | 25 +-- src/libvirt_xenconfig.syms| 4 - src/xenconfig/xen_common.c| 3 +- src/xenconfig/xen_xl.c| 499 -- src/xenconfig/xen_xl.h| 33 --- src/xenconfig/xen_xl_disk.l | 256 -- src/xenconfig/xen_xl_disk_i.h | 39 11 files changed, 4 insertions(+), 861 deletions(-) OK - so reverting is fine; however, xen_xl_disk.{c,h} still exist... Simple enough solution, but they will show up in someone's git status output since they are also removed from .gitignore. There are a couple Coverity issues from the next 3 patches - I'll note them for those directly. ACK John diff --git a/.gitignore b/.gitignore index eac2203..9d09709 100644 --- a/.gitignore +++ b/.gitignore @@ -140,7 +140,6 @@ /src/remote/*_protocol.[ch] /src/rpc/virkeepaliveprotocol.[ch] /src/rpc/virnetprotocol.[ch] -/src/xenconfig/xen_xl_disk.[ch] /src/test_libvirt*.aug /src/test_virtlockd.aug /src/util/virkeymaps.h diff --git a/cfg.mk b/cfg.mk index 3df3dcb..21f83c3 100644 --- a/cfg.mk +++ b/cfg.mk @@ -89,9 +89,8 @@ distdir: sc_vulnerable_makefile_CVE-2012-3386.z endif # Files that should never cause syntax check failures. -# (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$ VC_LIST_ALWAYS_EXCLUDE_REGEX = \ - (^(HACKING|docs/(news\.html\.in|.*\.patch)|src/xenconfig/xen_xl_disk.[chl])|\.(po|fig|gif|ico|png))$$ + (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$ # Functions like free() that are no-ops on NULL arguments. useless_free_options = \ diff --git a/configure.ac b/configure.ac index 167b875..9d12079 100644 --- a/configure.ac +++ b/configure.ac @@ -146,7 +146,6 @@ m4_ifndef([LT_INIT], [ ]) AM_PROG_CC_C_O AM_PROG_LD -AM_PROG_LEX AC_MSG_CHECKING([for how to mark DSO non-deletable at runtime]) LIBVIRT_NODELETE= diff --git a/po/POTFILES.in b/po/POTFILES.in index 094c8e3..e7cb2cc 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -247,7 +247,6 @@ src/xenapi/xenapi_driver.c src/xenapi/xenapi_utils.c src/xenconfig/xen_common.c src/xenconfig/xen_sxpr.c -src/xenconfig/xen_xl.c src/xenconfig/xen_xm.c tests/virpolkittest.c tools/libvirt-guests.sh.in diff --git a/src/Makefile.am b/src/Makefile.am index c7975e5..e0e47d0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1000,22 +1000,11 @@ CPU_SOURCES = \ VMX_SOURCES =\ vmx/vmx.c vmx/vmx.h -AM_LFLAGS = -Pxl_disk_ --header-file=../$*.h -LEX_OUTPUT_ROOT = lex.xl_disk_ -BUILT_SOURCES += xenconfig/xen_xl_disk.c xenconfig/xen_xl_disk.h -# Generated header file is not implicitly added to dist -EXTRA_DIST += xenconfig/xen_xl_disk.h -CLEANFILES += xenconfig/xen_xl_disk.h xenconfig/xen_xl_disk.c - -XENXLDISKPARSER_SOURCES = xenconfig/xen_xl_disk.l - XENCONFIG_SOURCES = \ xenconfig/xenxs_private.h \ - xenconfig/xen_common.c xenconfig/xen_common.h \ + xenconfig/xen_common.c xenconfig/xen_common.h \ xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h \ - xenconfig/xen_xm.c xenconfig/xen_xm.h \ - xenconfig/xen_xl.c xenconfig/xen_xl.h \ - xenconfig/xen_xl_disk_i.h + xenconfig/xen_xm.c xenconfig/xen_xm.h pkgdata_DATA = cpu/cpu_map.xml @@ -1070,19 +1059,10 @@ libvirt_vmx_la_SOURCES = $(VMX_SOURCES) endif WITH_VMX if WITH_XENCONFIG -# Flex generated XL disk parser needs to be compiled without WARN_FLAGS -# Add the generated object to its own library to control CFLAGS -noinst_LTLIBRARIES += libvirt_xenxldiskparser.la -libvirt_xenxldiskparser_la_CFLAGS = \ - -I$(srcdir)/conf $(AM_CFLAGS) -Wno-unused-parameter -libvirt_xenxldiskparser_la_SOURCES = \ - $(XENXLDISKPARSER_SOURCES) - noinst_LTLIBRARIES += libvirt_xenconfig.la libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la libvirt_xenconfig_la_CFLAGS = \ -I$(srcdir)/conf $(AM_CFLAGS) -libvirt_xenconfig_la_LIBADD = libvirt_xenxldiskparser.la libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES) endif WITH_XENCONFIG @@ -1844,7 +1824,6 @@ EXTRA_DIST += \ $(VBOX_DRIVER_EXTRA_DIST) \ $(VMWARE_DRIVER_SOURCES)\
Re: [Xen-devel] [RFC V9 2/4] domain snapshot overview
On Mon, 2015-01-12 at 00:01 -0700, Chun Yan Liu wrote: On 1/8/2015 at 08:26 PM, in message 1420719995.19787.62.ca...@citrix.com, Ian Campbell ian.campb...@citrix.com wrote: On Mon, 2014-12-22 at 20:42 -0700, Chun Yan Liu wrote: On 12/19/2014 at 06:25 PM, in message 1418984720.20028.15.ca...@citrix.com, Ian Campbell ian.campb...@citrix.com wrote: On Thu, 2014-12-18 at 22:45 -0700, Chun Yan Liu wrote: On 12/18/2014 at 11:10 PM, in message 1418915443.11882.86.ca...@citrix.com, Ian Campbell ian.campb...@citrix.com wrote: On Tue, 2014-12-16 at 14:32 +0800, Chunyan Liu wrote: Changes to V8: * add an overview document, so that one can has a overall look about the whole domain snapshot work, limits, requirements, how to do, etc. = Domain snapshot overview I don't see a similar section for disk snapshots, are you not considering those here except as a part of a domain snapshot or is this an oversight? There are three main use cases (that I know of at least) for snapshotting like behaviour. One is as you've mentioned below for backup, i.e. to preserve the VM at a certain point in time in order to be able to roll back to it. Is this the only usecase you are considering? Yes. I didn't take disk snapshot thing into the scope. A second use case is to support gold image type deployments, i.e. where you create one baseline single disk image and then clone it multiple times to deploy lots of guests. I think this is usually a disk snapshot type thing, but maybe it can be implemented as restoring a gold domain snapshot multiple times (e.g. for start of day performance reasons). As we initially discussed about the thing, disk snapshot thing can be done be existing tools directly like qemu-img, vhd-util. I was reading this section as a more generic overview of snapshotting, without reference to where/how things might ultimately be implemented. From a design point of view it would be useful to cover the various use cases, even if the solution is that the user implements them using CLI tools by hand (xl) or the toolstack does it for them internally (libvirt). This way we can more clearly see the full picture, which allows us to validate that we are making the right choices about what goes where. OK. I see. I think this user case is more like how to use the snapshot, rather than how to implement snapshot. Right? Correct, what the user is actually trying to achieve with the functionality. 'Gold image' or 'Gold domain', the needed work is more like cloning disks. Yes, or resuming multiple times. I see. But IMO it doesn't need change in snapshot design and implementation. Even resuming multiple times, they couldn't use the same image but duplicate the image multiple times. Perhaps, but the use case should be included so that this rationale for not worrying about it can be written down (so that people like me don't keep asking...) The third case, (which is similar to the first), is taking a disk snapshot in order to be able to run you usual backup software on the snapshot (which is now unchanging, which is handy) and then deleting the disk snapshot (this differs from the first case in which disk is active after the snapshot, and due to the lack of the memory part). Sorry, I'm still not quite clear about what this user case wants to do. The user has an active domain which they want to backup, but backup software often does not cope well if the data is changing under its feet. So the users wants to take a snapshot of the domains disks while leaving the domain running, so they can backup that static version of the disk out of band from the VM itself (e.g. by attaching it to a separate backup VM). Got it. So that's simply disk-only snapshot when domian is active. As you mentioned below, that needs guest agent to quiesce the disks. But currently xen hypervisor can't support that, right? I don't think that's relevant right now, let me explain: I think it's important to consider all the use cases for snapshotting, not because I think they need to be implemented now but to make sure that we don't make any design decisions now which would make it *impossible* to implement it in the future (at least
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 1:56 PM, Pasi Kärkkäinen pa...@iki.fi wrote: On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote: So qemu-traditional didn't particularly expect to know the guest memory layout. qemu-upstream does; it expects to know what areas of memory are guest memory and what areas of memory are unmapped. If a read or write happens to a gpfn which *xen* knows is valid, but which *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. The problem though is that the guest's memory map is not actually communicated to qemu-upstream in any way. Originally, qemu-upstream was only told how much memory the guest had, and it just happens to choose the same guest memory layout as the libxc domain builder does. This works, but it is bad design, because if libxc were to change for some reason, people would have to simply remember to also change the qemu-upstream layout. Where this really bites us is in PCI pass-through. The default 4G MMIO hole is very small; and hvmloader naturally expects to be able to make this area larger by relocating memory from below 4G to above 4G. It moves the memory in Xen's p2m, but it has no way of communicating this to qemu-upstream. So when the guest does an MMIO instuction that causes qemu-upstream to access that memory, the guest crashes. There are two work-arounds at the moment: 1. A flag which tells hvmloader not to relocate memory 2. The option to tell qemu-upstream to make the memory hole larger. Both are just work-arounds though; a proper fix would be to allow hvmloader some way of telling qemu that the memory has moved, so it can update its memory map. This will (I'm pretty sure) have an effect on RMRR regions as well, for the reasons I've mentioned above: whether make the holes for the RMRRs in libxc or in hvmloader, if we *move* that memory up to the top of the address space (rather than, say, just not giving that RAM to the guest), then qemu-upstream's idea of the guest memory map will be wrong, and will probably crash at some point. Having the ability for hvmloader to populate and/or move the memory around, and then tell qemu-upstream what the resulting map looked like, would fix both the MMIO-resize issue and the RMRR problem, wrt qemu-upstream. Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz? You can now specify the mmio_hole size for HVM guests when using qemu-upstream: http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, which allows configuring the MMIO size below 4GB. Backport pc q35: Add new machine opt max-ram-below-4g: http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458 x86: hvm: Allow configuration of the size of the mmio_hole.: http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7 Yes -- that's workaround #2 above (tell qemu-upstream to make the memory hole larger). But it's still a work-around, because it requires the admin to figure out how big a memory hole he needs. With qemu-traditional, he could just assign whatever devices he wanted, and hvmloader would make it the right size automatically. Ideally that's how it would work for qemu-upstream as well. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 4/4] dt-uart: support /chosen/stdout-path property.
On Thu, 2015-01-08 at 13:30 +, Julien Grall wrote: On 08/01/15 13:22, Ian Campbell wrote: On Thu, 2015-01-08 at 13:15 +, Julien Grall wrote: Hi Ian, On 08/01/15 11:53, Ian Campbell wrote: +ret = dt_property_read_string(chosen, stdout-path, stdout); +if ( ret = 0 ) +{ +printk(Taking dtuart configuration from /chosen/stdout-path\n); +if ( strlcpy(opt_dtuart, stdout, sizeof(opt_dtuart)) + = sizeof(opt_dtuart) ) +printk(WARNING: /chosen/stdout-path too long, truncated\n); I would add XENLOG_WARNING here and ... +} +else if ( ret != -EINVAL /* Not present */ ) +printk(Failed to read /chosen/stdout-path (%d)\n, ret); XENLOG_ERROR here. In practice these only go via the earlyprintk mechanism, since the console can't be setup yet. I'm not sure it's worthwhile tagging such messages. earlyprintk is transparent for the console code. Tagging may help if we decide to implement other kind of console later (VGA, PCI UART...). Anyway, I doesn't change much things here as the message is tagged as WARNING by default. So it will be always printing. It turns out that none of the existing prints in this function use the tags, and I think its of marginal use in this context so I don't think it is necessary to go changing them all, or to only use the tags for these two messages. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel
On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote: This is RFC because explicitly changes the logic introduced by c/s b34f2c375 xsm: label xen-consumer event channels, and is only compile tested. Xen event channels are not internal resources. They still have one end in a domain, and are created at the request of privileged domains. This logic which successfully creates a Xen event channel opens up undesirable failure cases with ill-specified XSM policies. If a domain is permitted to create ioreq servers or memevent listeners, but not to create event channels, the ioreq/memevent creation will succeed but attempting to bind the returned event channel will fail without any indication of a permission error. Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Daniel De Graaf dgde...@tycho.nsa.gov CC: Keir Fraser k...@xen.org CC: Jan Beulich jbeul...@suse.com CC: Tim Deegan t...@xen.org CC: Ian Campbell ian.campb...@citrix.com CC: Ian Jackson ian.jack...@eu.citrix.com --- xen/common/event_channel.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index cfe4978..89a7d99 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel( chn = evtchn_from_port(d, port); rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); +if ( rc ) +goto out; out here appears to return port, not rc so you aren't returning failure, but an even more half setup port than before. And I think you need to free the port on failure too. chn-state = ECS_UNBOUND; chn-xen_consumer = get_xen_consumer(notification_fn); chn-notify_vcpu_id = local_vcpu-vcpu_id; -chn-u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; +chn-u.unbound.remote_domid = remote_domid; out: spin_unlock(d-event_lock); ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
On Wed, 24 Dec 2014, Liang Li wrote: Use the 'xl pci-attach $DomU $BDF' command to attach more then one PCI devices to the guest, then detach the devices with 'xl pci-detach $DomU $BDF', after that, re-attach these PCI devices again, an error message will be reported like following: libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an error message from QMP server: Duplicate ID 'pci-pt-03_10.1' for device. The count of calling xen_pt_region_add and xen_pt_region_del are not the same will cause the XenPCIPassthroughState and it's related QemuOpts object not be released properly. Thanks for the patch! From this description, I don't quite understand why the memory_region_ref and memory_region_unref calls are wrong. What do you mean by The count of calling xen_pt_region_add and xen_pt_region_del are not the same? On unplug xen_pt_region_del does not get called? Or the memory region argument is not exactly the same as the one initially passed to xen_pt_region_add? Signed-off-by: Liang Li liang.z...@intel.com Reported-by: Longtao Pang longtaox.p...@intel.com --- hw/xen/xen_pt.c | 4 1 file changed, 4 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..523b8a2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec) XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, memory_listener); -memory_region_ref(sec-mr); xen_pt_region_update(s, sec, true); } @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec) memory_listener); xen_pt_region_update(s, sec, false); -memory_region_unref(sec-mr); } static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, io_listener); -memory_region_ref(sec-mr); xen_pt_region_update(s, sec, true); } @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) io_listener); xen_pt_region_update(s, sec, false); -memory_region_unref(sec-mr); } static const MemoryListener xen_pt_memory_listener = { -- 1.9.1 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
El 12/01/15 a les 8.09, Bob Liu ha escrit: On 01/09/2015 11:51 PM, Roger Pau Monné wrote: El 06/01/15 a les 14.19, Bob Liu ha escrit: When there is no enough free grants, gnttab_alloc_grant_references() will fail and block request queue will stop. If the system is always lack of grants, blkif_restart_queue_callback() can't be scheduled and block request queue can't be restart(block I/O hang). But when there are former requests complete, some grants may free to persistent_gnts_c, we can give the request queue another chance to restart and avoid block hang. Reported-by: Junxiao Bi junxiao...@oracle.com Signed-off-by: Bob Liu bob@oracle.com --- drivers/block/xen-blkfront.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2236c6f..dd30f99 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } } } + + /* +* Request queue would be stopped if failed to alloc enough grants and +* won't be restarted until gnttab_free_count = info-callback-count. +* +* But there is another case, once we have enough persistent grants we +* can try to restart the request queue instead of continue to wait for +* 'gnttab_free_count'. +*/ + if (info-persistent_gnts_c = info-callback.count) + schedule_work(info-work); I guess I'm missing something here, but blkif_completion is called by blkif_interrupt, which in turn calls kick_pending_request_queues when finished, which IMHO should be enough to restart the processing of requests. You are right, sorry for the mistake. The problem we met was a xenblock I/O hang. Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8 but block request queue was still stopped. It's very hard to reproduce this issue, we only see it once. I think there might be a race condition: request A request B: info-persistent_gnts_c max_grefs and fail to alloc enough grants interrupt happen, blkif_complte(): info-persistent_gnts_c++ kick_pending_request_queues() stop block request queue added to callback list If the system don't have enough grants(but have enough persistent_gnts), request queue would still hang. Not sure how can this happen, blkif_queue_request explicitly checks that persistent_gnts_c max_grefs before adding the callback and stopping the request queue, so in your case the queue should not be blocked. Can you dump the state of info-connected? Roger. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
On 12.01.15 at 12:33, andrew.coop...@citrix.com wrote: On 12/01/15 08:57, Jan Beulich wrote: --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru d-evtchn_port_ops-init(d, evtchn); } -static inline void evtchn_port_set_pending(struct vcpu *v, +static inline void evtchn_port_set_pending(struct domain *d, + unsigned int vcpu_id, struct evtchn *evtchn) I would rename this to the, now vacant, evtchn_set_pending(). It takes an evtchn* not a port. (Its sole caller was evtchn_set_pending(), so the patch won't grow) No (and I had actually considered it) - that would get its name out of sync with all its sibling wrappers. Furthermore, all callers except send_guest_vcpu_virq() currently use evtchn-notify_vcpu_id to get a struct vcpu* to pass. I think you can drop the vcpu_id parameter and use evtchn-notify_vcpu_id directly, which reduces the likelyhood of a bug where the evtchn is bound to one vcpu but a caller gets the wrong id and raises the event channel on the wrong vcpu. Generally a nice idea, but it doesn't immediately/obviously fit with the use in send_guest_vcpu_virq(). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] xl only waits 33 seconds for ballooning to complete
On Thu, Jan 8, 2015 at 1:11 AM, Mike Latimer mlati...@suse.com wrote: On Wednesday, January 07, 2015 09:38:31 AM Ian Campbell wrote: That's exactly what I was about to suggest as I read the penultimate paragraph, i.e. keep waiting so long as some reasonable delta occurs on each iteration. Thanks, Ian. I wonder if there is a future-safe threshold on the amount of delta that indicates progress is being made. Should some minimum safe progress amount or percentage be set, or is it better to just make sure free memory is increasing at the end of each iteration of the loop? For example, the following simple change just tracks free_memkb and only decrements the retry count if it has not increased since the last check: -- diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index ed0d478..4cf2991 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -2196,7 +2196,7 @@ static int preserve_domain(uint32_t *r_domid, libxl_event *event, static int freemem(uint32_t domid, libxl_domain_build_info *b_info) { int rc, retries = 3; -uint32_t need_memkb, free_memkb; +uint32_t need_memkb, free_memkb, free_memkb_prev = 0; if (!autoballoon) return 0; @@ -2229,7 +2229,10 @@ static int freemem(uint32_t domid, libxl_domain_build_info *b_info) if (rc 0) return rc; -retries--; +/* only decrement retry count if free_memkb is not increasing */ +if (free_memkb = free_memkb_prev) +retries--; +free_memkb_prev = free_memkb; I would: 1. Reset the retries after a successful increase 2. Not allow free_memkb_prev to go down. So maybe something like the following? if (free_memkb = free_memkb_prev) { retries--; } else { retries = MAX_RETRIES; free_memkb_prev = free_memkb; } I'm inclined to say we could add an option to say wait forever, or to increase the period of the checks; but ultimately at some point someone (either xl or the human) needs to timeout and say, This is never going to finish. 10s seems like a very conservative default. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 13:16, kevin.t...@intel.com wrote: I've explained this several times. If there's a violation on above assumption from required devices, same for report-all and report-sel. If the violation is caused by unnecessary devices, please note I'm proposing 'warn' here so report-all at most just adds more warnings in domain builder. the conflict will be caught later if it becomes relevant to be assigned (e.g. thru hotplug). Since we're apparently not understanding one another, please explain with a suitable example how you envision things to behave. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 5:51 PM On 12.01.15 at 10:41, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 5:33 PM On 12.01.15 at 09:46, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, January 09, 2015 6:35 PM On 09.01.15 at 11:10, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] The question isn't about migrating with devices assigned, but about assigning devices after migration (consider a dual vif + SR-IOV NIC guest setup where the SR-IOV NIC gets hot-removed before migration and a new one hot-plugged afterwards). Furthermore any tying of the guest memory layout to the host's where the guest first boots is awkward, as post-migration there's not going to be any reliable correlation between the guest layout and the new host's. how can you solve this? like above example, a NIC on node-A leaves a reserved region in guest e820. now it's hot-removed and then migrated to node-b. there's no way to update e820 again since it's only boot structure. then user will still see such awkward regions. since it's not avoidable, report-all in the summary mail looks not causing a new problem. The solution to this are reserved regions specified in the guest config, independent of host characteristics. I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. I don't think anyone ever indicated that such erasure would be needed/wanted - I'm not sure how you ended up there. I ended here to indicate that report-all which gives user more reserved regions than necessary is not a weird case since above scenario can also create such fact. User shouldn't set expectation about reserved region layout. and this argument is necessary to support our proposal of using report-all. :-) The fact that ranges can't be removed from a guest's memory map is irrelevant - there's simply no question that this is that way. The main counter argument against report-all remains: It may result in unnecessarily little low memory in guests not needing all of the host regions to be reserved for them. the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features
On 12/01/15 08:00, Jan Beulich wrote: Following the earlier similar change validating CR4 modifications. Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Andrew Cooper andrew.coop...@citrix.com --- v2: consider CR0.PG during restore when checking EFER.LMA --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma return 0; } -static bool_t hvm_efer_valid(struct domain *d, - uint64_t value, uint64_t efer_validbits) +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value, + signed int cr0_pg) { -if ( nestedhvm_enabled(d) cpu_has_svm ) -efer_validbits |= EFER_SVME; +unsigned int ext1_ecx = 0, ext1_edx = 0; -return !((value ~efer_validbits) || - ((sizeof(long) != 8) (value EFER_LME)) || - (!cpu_has_svm (value EFER_SVME)) || - (!cpu_has_nx (value EFER_NX)) || - (!cpu_has_syscall (value EFER_SCE)) || - (!cpu_has_lmsl (value EFER_LMSLE)) || - (!cpu_has_ffxsr (value EFER_FFXSE)) || - ((value (EFER_LME|EFER_LMA)) == EFER_LMA)); +if ( cr0_pg 0 !is_hardware_domain(v-domain) ) +{ +unsigned int level; + +ASSERT(v == current); +hvm_cpuid(0x8000, level, NULL, NULL, NULL); +if ( level = 0x8001 ) +hvm_cpuid(0x8001, NULL, NULL, ext1_ecx, ext1_edx); +} +else +{ +ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32]; +ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32]; +} + +if ( (value EFER_SCE) + !(ext1_edx cpufeat_mask(X86_FEATURE_SYSCALL)) ) +return 0; + +if ( (value (EFER_LME | EFER_LMA)) + !(ext1_edx cpufeat_mask(X86_FEATURE_LM)) ) +return 0; + +if ( cr0_pg 0 (value EFER_LMA) (!(value EFER_LME) || !cr0_pg) ) +return 0; + +if ( (value EFER_NX) !(ext1_edx cpufeat_mask(X86_FEATURE_NX)) ) +return 0; + +if ( (value EFER_SVME) + (!(ext1_ecx cpufeat_mask(X86_FEATURE_SVM)) || + !nestedhvm_enabled(v-domain)) ) +return 0; + +if ( (value EFER_LMSLE) !cpu_has_lmsl ) +return 0; + +if ( (value EFER_FFXSE) + !(ext1_edx cpufeat_mask(X86_FEATURE_FFXSR)) ) +return 0; + +return 1; } /* These reserved bits in lower 32 remain 0 after any load of CR0 */ @@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma struct vcpu *v; struct hvm_hw_cpu ctxt; struct segment_register seg; -uint64_t efer_validbits; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma return -EINVAL; } -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA - | EFER_NX | EFER_SCE; -if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) ) +if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) ) { printk(XENLOG_G_ERR HVM%d restore: bad EFER %# PRIx64 \n, d-domain_id, ctxt.msr_efer); @@ -2936,12 +2966,10 @@ err: int hvm_set_efer(uint64_t value) { struct vcpu *v = current; -uint64_t efer_validbits; value = ~EFER_LMA; -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE; -if ( !hvm_efer_valid(v-domain, value, efer_validbits) ) +if ( !hvm_efer_valid(v, value, -1) ) { gdprintk(XENLOG_WARNING, Trying to set reserved bit in EFER: %#PRIx64\n, value); ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] common/memory: fix an XSM error path
On 12/01/15 08:21, Jan Beulich wrote: XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap return the extent at which failure was detected, not error indicators. Signed-off-by: Jan Beulich jbeul...@suse.com Reviewed-by: Andrew Cooper andrew.coop...@citrix.com --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; -rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d); -if ( rc ) +if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) ) { rcu_unlock_domain(d); -return rc; +return start_extent; } switch ( op ) ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] One question about the hypercall to translate gfn to mfn.
On 09/01/15 08:02, Tian, Kevin wrote: From: Tim Deegan [mailto:t...@xen.org] Sent: Thursday, January 08, 2015 8:43 PM Hi, Not really. The IOMMU tables are also 64-bit so there must be enough addresses to map all of RAM. There shouldn't be any need for these mappings to be _contiguous_, btw. You just need to have one free address for each mapping. Again, following how grant maps work, I'd imagine that PVH guests will allocate an unused GFN for each mapping and do enough bookkeeping to make sure they don't clash with other GFN users (grant mapping, ballooning, c). PV guests will probably be given a BFN by the hypervisor at map time (which will be == MFN in practice) and just needs to pass the same BFN to the unmap call later (it can store it in the GTT meanwhile). if possible prefer to make both consistent, i.e. always finding unused GFN? I don't think it will be possible. PV domains are already using BFNs supplied by Xen (in fact == MFN) for backend grant mappings, which would conflict with supplying their own for these mappings. But again, I think the kernel maintainers for Xen may have a better idea of how these interfaces are used inside the kernel. For example, it might be easy enough to wrap the two systems inside a common API inside linux. Again, following how grant mapping works seems like the way forward. So Konrad, do you have any insight here? :-) Malcolm took two pages of this notebook explaining to me how he thought it should work (in combination with his PV IOMMU work), so I'll let him explain. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH net] drivers: net: xen-netfront: remove residual dead code
On 10/01/15 09:20, Vincenzo Maffione wrote: This patch removes some unused arrays from the netfront private data structures. These arrays were used in flip receive mode. Reviewed-by: David Vrabel david.vra...@citrix.com Thanks. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
-Original Message- From: xen-devel-boun...@lists.xen.org [mailto:xen-devel- boun...@lists.xen.org] On Behalf Of Andrew Cooper Sent: 12 January 2015 09:47 To: Xen-devel Cc: Andrew Cooper; Keir (Xen.org); Jan Beulich Subject: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu The resource behind an event channel is domain centric rather than vcpu centric, and free_xen_event_channel() only follows the vcpu's domain pointer. This change allows mem_event_disable() to avoid arbitrarily referencing d-vcpu[0] just to pass the domain. No functional change. Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Keir Fraser k...@xen.org CC: Jan Beulich jbeul...@suse.com Reviewed-by: Paul Durrant paul.durr...@citrix.com --- xen/arch/x86/hvm/hvm.c | 12 ++-- xen/common/event_channel.c |4 +--- xen/common/mem_event.c |2 +- xen/include/xen/event.h|3 +-- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 8b06bfd..acfc032 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct hvm_ioreq_server *s, return 0; fail3: -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); fail2: spin_unlock(s-lock); @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct hvm_ioreq_server *s, list_del(sv-list_entry); if ( v-vcpu_id == 0 s-bufioreq.va != NULL ) -free_xen_event_channel(v, s-bufioreq_evtchn); +free_xen_event_channel(v-domain, s-bufioreq_evtchn); -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); xfree(sv); break; @@ -701,9 +701,9 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s) list_del(sv-list_entry); if ( v-vcpu_id == 0 s-bufioreq.va != NULL ) -free_xen_event_channel(v, s-bufioreq_evtchn); +free_xen_event_channel(v-domain, s-bufioreq_evtchn); -free_xen_event_channel(v, sv-ioreq_evtchn); +free_xen_event_channel(v-domain, sv-ioreq_evtchn); xfree(sv); } @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu *v, domid_t remote_domid, /* xchg() ensures that only we call free_xen_event_channel(). */ old_port = xchg(p_port, new_port); -free_xen_event_channel(v, old_port); +free_xen_event_channel(v-domain, old_port); return 0; } diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 7d6de54..cfe4978 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel( } -void free_xen_event_channel( -struct vcpu *local_vcpu, int port) +void free_xen_event_channel(struct domain *d, int port) { struct evtchn *chn; -struct domain *d = local_vcpu-domain; spin_lock(d-event_lock); diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c index 16ebdb5..0cc43d7 100644 --- a/xen/common/mem_event.c +++ b/xen/common/mem_event.c @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, struct mem_event_domain *med) } /* Free domU's event channel and leave the other one unbound */ -free_xen_event_channel(d-vcpu[0], med-xen_port); +free_xen_event_channel(d, med-xen_port); /* Unblock all vCPUs */ for_each_vcpu ( d, v ) diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 88526f8..0eb1dd3 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)( int alloc_unbound_xen_event_channel( struct vcpu *local_vcpu, domid_t remote_domid, xen_event_channel_notification_t notification_fn); -void free_xen_event_channel( -struct vcpu *local_vcpu, int port); +void free_xen_event_channel(struct domain *d, int port); /* Query if event channel is in use by the guest */ int guest_enabled_event(struct vcpu *v, uint32_t virq); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? Stefano says that O_DIRECT is not needed, only correct barrier semantics are and he believes those are correctly implemented. O_DIRECT would be an additional layer of safety perhaps, but sounds to be not strictly needed. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration
On 12/01/15 08:01, Jan Beulich wrote: While the REP MOVS acceleration appears to have helped qemu-traditional based guests, qemu-upstream (or really the respective video BIOSes) doesn't appear to benefit from that. Instead the acceleration added here provides a visible performance improvement during very early HVM guest boot. Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Andrew Cooper andrew.coop...@citrix.com --- v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos() Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew. v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by Andrew. Add output operand telling the compiler that buf is being written. --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard( return X86EMUL_OKAY; } +static int hvmemul_rep_stos_discard( +void *p_data, +enum x86_segment seg, +unsigned long offset, +unsigned int bytes_per_rep, +unsigned long *reps, +struct x86_emulate_ctxt *ctxt) +{ +return X86EMUL_OKAY; +} + static int hvmemul_rep_outs_discard( enum x86_segment src_seg, unsigned long src_offset, @@ -982,6 +993,113 @@ static int hvmemul_rep_movs( return X86EMUL_OKAY; } +static int hvmemul_rep_stos( +void *p_data, +enum x86_segment seg, +unsigned long offset, +unsigned int bytes_per_rep, +unsigned long *reps, +struct x86_emulate_ctxt *ctxt) +{ +struct hvm_emulate_ctxt *hvmemul_ctxt = +container_of(ctxt, struct hvm_emulate_ctxt, ctxt); +unsigned long addr; +paddr_t gpa; +p2m_type_t p2mt; +bool_t df = !!(ctxt-regs-eflags X86_EFLAGS_DF); +int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps, + hvm_access_write, hvmemul_ctxt, addr); + +if ( rc == X86EMUL_OKAY ) +{ +uint32_t pfec = PFEC_page_present | PFEC_write_access; + +if ( hvmemul_ctxt-seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) +pfec |= PFEC_user_mode; + +rc = hvmemul_linear_to_phys( +addr, gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); +} +if ( rc != X86EMUL_OKAY ) +return rc; + +/* Check for MMIO op */ +(void)get_gfn_query_unlocked(current-domain, gpa PAGE_SHIFT, p2mt); + +switch ( p2mt ) +{ +unsigned long bytes; +void *buf; + +default: +/* Allocate temporary buffer. */ +for ( ; ; ) +{ +bytes = *reps * bytes_per_rep; +buf = xmalloc_bytes(bytes); +if ( buf || *reps = 1 ) +break; +*reps = 1; +} + +if ( !buf ) +buf = p_data; +else +switch ( bytes_per_rep ) +{ +unsigned long dummy; + +#define CASE(bits, suffix) \ +case (bits) / 8: \ +asm ( rep stos #suffix \ + : =m (*(char (*)[bytes])buf), \ +=D (dummy), =c (dummy) \ + : a (*(const uint##bits##_t *)p_data), \ + 1 (buf), 2 (*reps) ); \ +break +CASE(8, b); +CASE(16, w); +CASE(32, l); +CASE(64, q); +#undef CASE + +default: +ASSERT_UNREACHABLE(); +xfree(buf); +return X86EMUL_UNHANDLEABLE; +} + +/* Adjust address for reverse store. */ +if ( df ) +gpa -= bytes - bytes_per_rep; + +rc = hvm_copy_to_guest_phys(gpa, buf, bytes); + +if ( buf != p_data ) +xfree(buf); + +switch ( rc ) +{ +case HVMCOPY_gfn_paged_out: +case HVMCOPY_gfn_shared: +return X86EMUL_RETRY; +case HVMCOPY_okay: +return X86EMUL_OKAY; +} + +gdprintk(XENLOG_WARNING, + Failed REP STOS: gpa=%PRIpaddr reps=%lu bytes_per_rep=%u\n, + gpa, *reps, bytes_per_rep); +/* fall through */ +case p2m_mmio_direct: +return X86EMUL_UNHANDLEABLE; + +case p2m_mmio_dm: +return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df, + p_data); +} +} + static int hvmemul_read_segment( enum x86_segment seg, struct segment_register *reg, @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_ .rep_ins = hvmemul_rep_ins, .rep_outs = hvmemul_rep_outs, .rep_movs = hvmemul_rep_movs, +.rep_stos = hvmemul_rep_stos, .read_segment = hvmemul_read_segment, .write_segment =
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 12:22, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), And have no way for the user to (securely) avoid that failure. Plus the definition of reasonable here is of course going to be arbitrary. Jan then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Xen 4.5 Development Update (GA slip by a week), GA slip + 1 day
On 9 Jan 2015, at 18:30, Konrad Rzeszutek Wilk konrad.w...@oracle.com wrote: On Fri, Jan 09, 2015 at 01:51:56PM +, Lars Kurth wrote: Note that I published the Acknowledgements and most 4.5 documentation is now in place. Bits which you may want to check and fix * Spelling of your name if it contains special characters as the scripts that I use nuke them - http://wiki.xenproject.org/wiki/Xen_Project_4.5_Acknowledgements * Some additions to http://wiki.xenproject.org/wiki/Xen_Project_Release_Features, which I will do And we do need to populate the Known Issues section of http://wiki.xenproject.org/wiki/Xen_Project_4.5_Release_Notes I've added the one that I am aware (systemd). In 4.4 we also had this http://wiki.xenproject.org/wiki?title=Xen_Project_4.4_Feature_List which was linked of the http://wiki.xenproject.org/wiki/Xen_Project_4.4_Release_Notes I can populate most of htem or we can just reference the blog that will be on the release day? Konrad, thank you. You don't need to: I created a separate page - see http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List We can just link to ti from the release notes Lars ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 7:37 PM On 12.01.15 at 12:22, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), And have no way for the user to (securely) avoid that failure. Plus the definition of reasonable here is of course going to be arbitrary. Jan actually here I didn't get your point then. It's your proposal to make reasonable assumption like below: --- d) Move down the lowmem RAM/MMIO boundary so that a single, contiguous chunk of lowmem results, with all other RAM moving up beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be considered here, and I think we can reasonably safely assume that no RMRRs will ever report ranges above 1Mb but below the host lowmem RAM/MMIO boundary (i.e. we can presumably rest assured that the lowmem chunk will always be reasonably big). --- Thanks Kevin then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On 12.01.15 at 12:41, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 7:37 PM On 12.01.15 at 12:22, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), And have no way for the user to (securely) avoid that failure. Plus the definition of reasonable here is of course going to be arbitrary. actually here I didn't get your point then. It's your proposal to make reasonable assumption like below: --- d) Move down the lowmem RAM/MMIO boundary so that a single, contiguous chunk of lowmem results, with all other RAM moving up beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be considered here, and I think we can reasonably safely assume that no RMRRs will ever report ranges above 1Mb but below the host lowmem RAM/MMIO boundary (i.e. we can presumably rest assured that the lowmem chunk will always be reasonably big). Correct - but my point is that this won't work well with your report-all mechanism, only with the report-sel one. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. You keep saying report-all or report-sel, but I'm not 100% clear what you mean by those. In any case, the naming has got to be a bit misleading: the important questions at the moment, AFAICT, are: 1. Whether we make holes at boot time for all RMRRs on the system, or whether only make RMRRs for some subset (or potentially some other arbitrary range, which may include RMRRs on other hosts to which we may want to migrate). 2. Whether those holes are made by the domain builder in libxc, or by hvmloader 3. What happens if Xen is asked to assign a device and it finds that the required RMRR is not empty: a. during guest creation b. after the guest has booted Obviously at some point some part of the toolstack needs to identify which RMRRs go with what device, so that either libxc or hvmloader can make the appropriate holes in the address space; but at that point, report is not so much the right word as query. (Obviously we want to report in the e820 map all RMRRs that we've made holes for in the guest.) -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, 2015-01-12 at 12:13 +, George Dunlap wrote: On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. You keep saying report-all or report-sel, but I'm not 100% clear what you mean by those. Is the distinction between all reserved areas and only (selectively) those which are related to an RMRR? That's how I've been reading it... Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m
On 10.01.15 at 00:04, edmund.h.wh...@intel.com wrote: On 01/09/2015 02:41 PM, Andrew Cooper wrote: Having some non-OS part of the guest swap the EPT tables and accidentally turn a DMA buffer read-only is not going to end well. The agent can certainly do bad things, and at some level you have to assume it is sensible enough not to. However, I'm not sure this is fundamentally more dangerous than what a privileged domain can do today using the MEMOP... operations, and people are already using those for very similar purposes. I don't follow - how is what privileged domain can do related to the proposed changes here (which are - via VMFUNC - at least partially guest controllable, and that's also the case Andrew mentioned in his reply)? I'm having a hard time understanding how a P2M stripped of anything that's not plain RAM can be very useful to a guest. IOW without such fundamental aspects clarified I don't see a point in looking at the individual patches (which btw, according to your wording elsewhere, should have been marked RFC). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold
On 2015/01/12 9:44, Jan Beulich wrote: We've had reports of systems where CMCIs would surface at a relatively high rate during certain periods of time, without them apparently causing subsequent more severe problems (see Xeon E7-8800/4800/2800 specification clarification SC1). Give the admin a knob to lower the impact on the system logs. Signed-off-by: Jan Beulich jbeul...@suse.com A small comment at the bottom, besides of that: Acked-by: Christoph Egger cheg...@amazon.de --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. If set, override Xen's default choice for the platform timer. +### cmci-threshold + `= integer` + + Default: `2` + +Specify the event count threshold for raising Corrected Machine Check +Interrupts. Specifying zero disables CMCI handling. + ### cmos-rtc-probe `= boolean` --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; +unsigned int threshold, max_threshold; +static unsigned int cmci_threshold = 2; +integer_param(cmci-threshold, cmci_threshold); rdmsrl(msr, val); /* Some other CPU already owns this bank. */ @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) goto out; } -val = ~CMCI_THRESHOLD_MASK; -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); -rdmsrl(msr, val); +if ( cmci_threshold ) +{ +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); +rdmsrl(msr, val); +} if (!(val CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ mcabanks_set(i, __get_cpu_var(no_cmci_banks)); +wrmsrl(msr, val ~CMCI_THRESHOLD_MASK); return 0; } +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); +threshold = cmci_threshold; +if ( threshold max_threshold ) +{ + mce_printk(MCE_QUIET, + CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n, + threshold, smp_processor_id(), i, max_threshold); + threshold = max_threshold; +} +wrmsrl(msr, (val ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h @@ -86,9 +86,6 @@ /* Bitfield of MSR_K8_HWCR register */ #define K8_HWCR_MCi_STATUS_WREN (1ULL 18) -/*Intel Specific bitfield*/ -#define CMCI_THRESHOLD 0x2 - #define MCi_MISC_ADDRMOD_MASK (0x7UL 6) #define MCi_MISC_PHYSMOD(0x2UL 6) I think these two are also Intel specific bitfields. Please leave the comment for those. Christoph ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
On 12/01/15 08:57, Jan Beulich wrote: The trivial wrapper evtchn_set_pending() is pretty pointless, as it only serves to invoke another wrapper evtchn_port_set_pending(). In turn, the latter is kind of inconsistent with its siblings in that is takes a struct vcpu * rather than a struct domain * - adjusting this allows for more efficient code in the majority of cases. Reviewed-by: David Vrabel david.vra...@citrix.com David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH RFC] xen-time: decreasing the rating of the xen clocksource below that of the tsc clocksource for dom0's
On 08/01/15 15:06, Imre Palik wrote: On 01/07/15 17:30, Ian Campbell wrote: On Wed, 2015-01-07 at 17:16 +0100, Imre Palik wrote: From: Palik, Imre im...@amazon.de In Dom0's the use of the TSC clocksource (whenever it is stable enough to be used) instead of the Xen clocksource should not cause any issues, as Dom0 VMs never live-migrated. Is this still true given that dom0's vcpus are migrated amongst pcpus on the host? The tsc are not synchronised on some generations of hardware so the result there would be the TSC appearing to do very odd things under dom0's feet. Does Linux cope with that or does it not matter for some other reason? First of all, if the initial pcpus are not synchronised, linux won't consider TSC as a viable clocksource. If the initial pcpus are synchronised, but then the dom0 vcpus are migrated to unsynchronised pcpus, then hopefully the tsc watchdog catches the issue, and the kernel falls back to the xen clocksource. The issue here is that the watchdog can only detect clock skews bigger than 0.0625s/0.5s. Hopefully this is enough to avoid the weirdest things. I don't think any such hardware exists. Either TSC is synchronized across all CPUs or none. Also, some parts of the kernel (e.g., scheduling) will always use the paravirt clock. No matter what priorities are set. So it should be safe for some definition of safe. But I was unable to test it on a hardware platform that would hit the problematic case described above. Ok. Can you list the various time sources and their ratings in the commit message for clarity. i.e, to justify 275 (below TSC = 300. above hpet = 250). David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 8:03 PM On 12.01.15 at 12:41, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 7:37 PM On 12.01.15 at 12:22, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), And have no way for the user to (securely) avoid that failure. Plus the definition of reasonable here is of course going to be arbitrary. actually here I didn't get your point then. It's your proposal to make reasonable assumption like below: --- d) Move down the lowmem RAM/MMIO boundary so that a single, contiguous chunk of lowmem results, with all other RAM moving up beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be considered here, and I think we can reasonably safely assume that no RMRRs will ever report ranges above 1Mb but below the host lowmem RAM/MMIO boundary (i.e. we can presumably rest assured that the lowmem chunk will always be reasonably big). Correct - but my point is that this won't work well with your report-all mechanism, only with the report-sel one. I've explained this several times. If there's a violation on above assumption from required devices, same for report-all and report-sel. If the violation is caused by unnecessary devices, please note I'm proposing 'warn' here so report-all at most just adds more warnings in domain builder. the conflict will be caught later if it becomes relevant to be assigned (e.g. thru hotplug). Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: Tian, Kevin Sent: Monday, January 12, 2015 8:29 PM From: George Dunlap Sent: Monday, January 12, 2015 8:14 PM On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. You keep saying report-all or report-sel, but I'm not 100% clear what you mean by those. In any case, the naming has got to be a bit misleading: the important questions at the moment, AFAICT, are: I explained them in original proposal 1. Whether we make holes at boot time for all RMRRs on the system, or whether only make RMRRs for some subset (or potentially some other arbitrary range, which may include RMRRs on other hosts to which we may want to migrate). I use 'report-all' to stand for making holes for all RMRRs on the system, while 'report-sel' for specified subset. more accurate... 'report-sel' for making holes for RMRRs belonging to specified devices. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [xen-4.5-testing test] 33303: regressions - FAIL
xen.org writes ([xen-4.5-testing test] 33303: regressions - FAIL): flight 33303 xen-4.5-testing real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33303/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-i386-qemuu-rhel6hvm-intel 7 redhat-installfail REGR. vs. 33264 osstest is very badly overloaded right now so everything is taking a long time. To see where the release was at I peeked at the results of the currently running test (flight 33348) and it looks like it's going to justify a pass. I also compared its results to 33285 (a not-too-bad xen-unstable flight before we branched) and things look reasonably good there too. So I think we are going to be good to go. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 11:17 +0100, Fabio Fantoni wrote: Il 12/01/2015 11:06, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? Stefano says that O_DIRECT is not needed, only correct barrier semantics are and he believes those are correctly implemented. I not understand if manual changes and/or settings are needed, about this I mean: only correct barrier semantics are and he believes those are correctly implemented If yes what exactly? If Stefano is correct then it should all just work, no manual steps needed (if manual steps were needed we wouldn't be taking this patch). Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 12/01/2015 11:06, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? Stefano says that O_DIRECT is not needed, only correct barrier semantics are and he believes those are correctly implemented. I not understand if manual changes and/or settings are needed, about this I mean: only correct barrier semantics are and he believes those are correctly implemented If yes what exactly? O_DIRECT would be an additional layer of safety perhaps, but sounds to be not strictly needed. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin kevin.t...@intel.com wrote: From: George Dunlap Sent: Thursday, January 08, 2015 8:55 PM On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap george.dun...@eu.citrix.com wrote: If RMRRs almost always happen up above 2G, for example, then a simple solution that wouldn't require too much work would be to make sure that the PCI MMIO hole we specify to libxc and to qemu-upstream is big enough to include all RMRRs. That would satisfy the libxc and qemu requirements. If we then store specific RMRRs we want included in xenstore, hvmloader can put them in the e820 map, and that would satisfy the hvmloader requirement. An alternate thing to do here would be to properly fix the qemu-upstream problem, by making a way for hvmloader to communicate changes in the gpfn layout to qemu. Then hvmloader could do the work of moving memory under RMRRs to higher memory; and libxc wouldn't need to be involved at all. I think it would also fix our long-standing issues with assigning PCI devices to qemu-upstream guests, which up until now have only been worked around. could you elaborate a bit for that long-standing issue? So qemu-traditional didn't particularly expect to know the guest memory layout. qemu-upstream does; it expects to know what areas of memory are guest memory and what areas of memory are unmapped. If a read or write happens to a gpfn which *xen* knows is valid, but which *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. The problem though is that the guest's memory map is not actually communicated to qemu-upstream in any way. Originally, qemu-upstream was only told how much memory the guest had, and it just happens to choose the same guest memory layout as the libxc domain builder does. This works, but it is bad design, because if libxc were to change for some reason, people would have to simply remember to also change the qemu-upstream layout. Where this really bites us is in PCI pass-through. The default 4G MMIO hole is very small; and hvmloader naturally expects to be able to make this area larger by relocating memory from below 4G to above 4G. It moves the memory in Xen's p2m, but it has no way of communicating this to qemu-upstream. So when the guest does an MMIO instuction that causes qemu-upstream to access that memory, the guest crashes. There are two work-arounds at the moment: 1. A flag which tells hvmloader not to relocate memory 2. The option to tell qemu-upstream to make the memory hole larger. Both are just work-arounds though; a proper fix would be to allow hvmloader some way of telling qemu that the memory has moved, so it can update its memory map. This will (I'm pretty sure) have an effect on RMRR regions as well, for the reasons I've mentioned above: whether make the holes for the RMRRs in libxc or in hvmloader, if we *move* that memory up to the top of the address space (rather than, say, just not giving that RAM to the guest), then qemu-upstream's idea of the guest memory map will be wrong, and will probably crash at some point. Having the ability for hvmloader to populate and/or move the memory around, and then tell qemu-upstream what the resulting map looked like, would fix both the MMIO-resize issue and the RMRR problem, wrt qemu-upstream. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
On 12/01/15 08:57, Jan Beulich wrote: --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru d-evtchn_port_ops-init(d, evtchn); } -static inline void evtchn_port_set_pending(struct vcpu *v, +static inline void evtchn_port_set_pending(struct domain *d, + unsigned int vcpu_id, struct evtchn *evtchn) I would rename this to the, now vacant, evtchn_set_pending(). It takes an evtchn* not a port. (Its sole caller was evtchn_set_pending(), so the patch won't grow) Furthermore, all callers except send_guest_vcpu_virq() currently use evtchn-notify_vcpu_id to get a struct vcpu* to pass. I think you can drop the vcpu_id parameter and use evtchn-notify_vcpu_id directly, which reduces the likelyhood of a bug where the evtchn is bound to one vcpu but a caller gets the wrong id and raises the event channel on the wrong vcpu. ~Andrew { -v-domain-evtchn_port_ops-set_pending(v, evtchn); +d-evtchn_port_ops-set_pending(d-vcpu[vcpu_id], evtchn); } static inline void evtchn_port_clear_pending(struct domain *d, ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
From: Stefano Stabellini Sent: Monday, January 12, 2015 7:36 PM On Wed, 24 Dec 2014, Liang Li wrote: Use the 'xl pci-attach $DomU $BDF' command to attach more then one PCI devices to the guest, then detach the devices with 'xl pci-detach $DomU $BDF', after that, re-attach these PCI devices again, an error message will be reported like following: libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an error message from QMP server: Duplicate ID 'pci-pt-03_10.1' for device. The count of calling xen_pt_region_add and xen_pt_region_del are not the same will cause the XenPCIPassthroughState and it's related QemuOpts object not be released properly. Thanks for the patch! From this description, I don't quite understand why the memory_region_ref and memory_region_unref calls are wrong. What do you mean by The count of calling xen_pt_region_add and xen_pt_region_del are not the same? On unplug xen_pt_region_del does not get called? Or the memory region argument is not exactly the same as the one initially passed to xen_pt_region_add? agree. Liang, could you elaborate how the patch is associated with above explanation? :-) Signed-off-by: Liang Li liang.z...@intel.com Reported-by: Longtao Pang longtaox.p...@intel.com --- hw/xen/xen_pt.c | 4 1 file changed, 4 deletions(-) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..523b8a2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec) XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, memory_listener); -memory_region_ref(sec-mr); xen_pt_region_update(s, sec, true); } @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec) memory_listener); xen_pt_region_update(s, sec, false); -memory_region_unref(sec-mr); } static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, io_listener); -memory_region_ref(sec-mr); xen_pt_region_update(s, sec, true); } @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) io_listener); xen_pt_region_update(s, sec, false); -memory_region_unref(sec-mr); } static const MemoryListener xen_pt_memory_listener = { -- 1.9.1 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m
Ed White writes ([PATCH 00/11] Alternate p2m: support multiple copies of host p2m): This set of patches adds support to hvm domains for EPTP switching by creating multiple copies of the host p2m (currently limited to 10 copies). Thanks for this. Did you CC me in my capacity as tools maintainer ? I can't see anything in this series which deals with any necessary tools changes. Are there tools parts to come later ? Thanks, Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
From: George Dunlap Sent: Monday, January 12, 2015 8:14 PM On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:23 PM On 12.01.15 at 11:12, kevin.t...@intel.com wrote: From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Monday, January 12, 2015 6:09 PM On 12.01.15 at 10:56, kevin.t...@intel.com wrote: the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). And how would you then deal with the one guest needing that range reserved? if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. 3GB) while warn other conflictions (e.g. 3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. You keep saying report-all or report-sel, but I'm not 100% clear what you mean by those. In any case, the naming has got to be a bit misleading: the important questions at the moment, AFAICT, are: I explained them in original proposal 1. Whether we make holes at boot time for all RMRRs on the system, or whether only make RMRRs for some subset (or potentially some other arbitrary range, which may include RMRRs on other hosts to which we may want to migrate). I use 'report-all' to stand for making holes for all RMRRs on the system, while 'report-sel' for specified subset. including other RMRRs (from admin for migration) is orthogonal to above open. 2. Whether those holes are made by the domain builder in libxc, or by hvmloader based on current discussion, whether to make holes in hvmloader doesn't bring fundamental difference. as long as domain builder still need to populate memory (even minimal for hvmloader to boot), it needs to check conflict and may ideally make hole too (though we may make assumption not doing that) 3. What happens if Xen is asked to assign a device and it finds that the required RMRR is not empty: a. during guest creation b. after the guest has booted for Xen we don't need differentiate a/b. by default it's clear failure should be returned as it implies a security/correctness issue if moving forward. but based on discussion an override to 'warn' only is preferred, so admin can make decision (remains an open on whether to do global override or per-device override) Obviously at some point some part of the toolstack needs to identify which RMRRs go with what device, so that either libxc or hvmloader can make the appropriate holes in the address space; but at that point, report is not so much the right word as query. (Obviously we want to report in the e820 map all RMRRs that we've made holes for in the guest.) yes, using 'report' doesn't catch all the changes we need to make. Just use them to simplify discussion in case all are on the same page. However clearly my original explanation didn't make it. :/ and state my major intention again. I don't think the preparation (i.e. detect confliction and make holes) for device assignment should be a a blocking failure. Throw warning should be enough (i.e. in libxc). We should let actual device assignment path to make final call based on admin's configuration (default 'fail' w/ 'warn' override). Based on that policy I think 'report-all' (making holes for all host RMRRs) is an acceptable approach, w/ small impact on possibly more warning messages (actually not bad to help admin understand the hotplug possibility on this platform) and show more reserved regions to the end user (but he shouldn't make any assumption here). :-) Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
Ian Campbell writes (Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output): We might like to consider something along these lines for the future: A reasonable idea (although I wonder if it should be configurable). Acked-by: Ian Jackson ian.jack...@eu.citrix.com Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
On 12.01.15 at 10:46, andrew.coop...@citrix.com wrote: The resource behind an event channel is domain centric rather than vcpu centric, and free_xen_event_channel() only follows the vcpu's domain pointer. I wonder whether for symmetry alloc_unbound_xen_event_channel() shouldn't then also take a [struct domain *, unsigned int vcpu_id] pair instead of a struct vcpu *. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel
On 12/01/15 10:22, Ian Campbell wrote: On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote: This is RFC because explicitly changes the logic introduced by c/s b34f2c375 xsm: label xen-consumer event channels, and is only compile tested. Xen event channels are not internal resources. They still have one end in a domain, and are created at the request of privileged domains. This logic which successfully creates a Xen event channel opens up undesirable failure cases with ill-specified XSM policies. If a domain is permitted to create ioreq servers or memevent listeners, but not to create event channels, the ioreq/memevent creation will succeed but attempting to bind the returned event channel will fail without any indication of a permission error. Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Daniel De Graaf dgde...@tycho.nsa.gov CC: Keir Fraser k...@xen.org CC: Jan Beulich jbeul...@suse.com CC: Tim Deegan t...@xen.org CC: Ian Campbell ian.campb...@citrix.com CC: Ian Jackson ian.jack...@eu.citrix.com --- xen/common/event_channel.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index cfe4978..89a7d99 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel( chn = evtchn_from_port(d, port); rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); +if ( rc ) +goto out; out here appears to return port, not rc so you aren't returning failure, but an even more half setup port than before. Ah yes - so I am. rc was intended. All callers do correctly check for 0 to indicate failure. And I think you need to free the port on failure too. At the point that we bail, chn-state is still ECS_FREE so there is nothing to deallocate. ~Andrew chn-state = ECS_UNBOUND; chn-xen_consumer = get_xen_consumer(notification_fn); chn-notify_vcpu_id = local_vcpu-vcpu_id; -chn-u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; +chn-u.unbound.remote_domid = remote_domid; out: spin_unlock(d-event_lock); ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation
On 12/01/15 08:23, Jan Beulich wrote: While for us it's not as bad as it was for Linux, their commit 13e457e0ee (KVM: x86: Emulator does not decode clflush well, by Nadav Amit na...@cs.technion.ac.il) nevertheless points out two shortcomings in our code: opcode 0F AE /7 is clflush only when it uses a memory mode (otherwise it's SFENCE) and when there's no REP prefix (an operand size prefix is fine, as that's CLFLUSHOPT). Signed-off-by: Jan Beulich jbeul...@suse.com Acked-by: Andrew Cooper andrew.coop...@citrix.com --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4400,7 +4400,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg 7 ) { -case 7: /* clflush */ +case 7: /* clflush{,opt} */ +fail_if(modrm_mod == 3); +fail_if(rep_prefix()); fail_if(ops-wbinvd == NULL); if ( (rc = ops-wbinvd(ctxt)) != 0 ) goto done; ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
Ian Campbell writes ([OSSTEST PATCH] mg-debian-installer-update: produce deterministic output): Currently rerunning mg-debian-install-update when the external files have changed still produces differences in the local files produced ^ Missing not ? during post-processing. Avoid these differences by: - Using gzip -n, which avoids storing a timestamp in the gzip header (as well as the name, which we don't need). - Using pax -M norm, which normalises all timestamps (among other things, such as the owner, which we don't care about) - Using tar --mtime, with a reference within the dpkg-deb created hierarchy (which has timestamps from the package and is therefore dependent only on the downloaded package revision) With this the results of two invocations of mg-debian-installer-update(-all) are identical (assuming no changes to the downloaded files) as demonstrated by runnign this quick hack: Excellent. Apart from the missing word in the description, Acked-by: Ian Jackson ian.jack...@eu.citrix.com Thanks, Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
On Mon, 2015-01-12 at 12:27 +, Ian Jackson wrote: Ian Campbell writes (Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output): We might like to consider something along these lines for the future: A reasonable idea (although I wonder if it should be configurable). I did consider just inserting a $CURL_OPTS which could be set to include pragma no-cache from the command line. I mostly didn't because I was too lazy to think about the correct shell quoting at the time... Acked-by: Ian Jackson ian.jack...@eu.citrix.com Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] tools: libxl: do not leak diskpath during local disk attach
Ian Jackson writes (Re: [PATCH] tools: libxl: do not leak diskpath during local disk attach): I will apply this patch to staging and queue it for backport. Backported to 4.3 and 4.4. Thanks, Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface - for Xen 4.3
With changeset 22885 support was added for setting the MTU in the vif-bridge script for when a vif interface was set to 'online'. The was not done for the 'add' operation. The 'add' operation was added to the script for when tap devices were specified (c/s 21944). With the setting of the MTU for the 'online' case was there a reason for omitting the 'add'? This patch sets the MTU for both 'online' and 'add' in the vif-bridge script. Backport for Xen version 4.3 Signed-off-by: Charles Arnold carn...@suse.com diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge index f489519..da8a0f8 100644 --- a/tools/hotplug/Linux/vif-bridge +++ b/tools/hotplug/Linux/vif-bridge @@ -82,11 +82,7 @@ fi case $command in online) setup_virtual_bridge_port $dev -mtu=`ip link show $bridge | awk '/mtu/ { print $5 }'` -if [ -n $mtu ] [ $mtu -gt 0 ] -then -ip link set $dev mtu $mtu || : -fi +set_mtu $bridge $dev add_to_bridge $bridge $dev ;; @@ -97,6 +93,7 @@ case $command in add) setup_virtual_bridge_port $dev +set_mtu $bridge $dev add_to_bridge $bridge $dev ;; esac diff --git a/tools/hotplug/Linux/xen-network-common.sh b/tools/hotplug/Linux/xen-network-common.sh index 8cff156..9a9526b 100644 --- a/tools/hotplug/Linux/xen-network-common.sh +++ b/tools/hotplug/Linux/xen-network-common.sh @@ -132,3 +132,14 @@ add_to_bridge () { ip link set ${dev} up } +# Usage: set_mtu bridge dev +set_mtu () { +local bridge=$1 +local dev=$2 +mtu=`ip link show ${bridge}| awk '/mtu/ { print $5 }'` +if [ -n $mtu ] [ $mtu -gt 0 ] +then +ip link set ${dev} mtu $mtu || : +fi +} + ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] libxl: Don't ignore error when we fail to give access to ioport/irq/iomem
On Mon, Jan 12, 2015 at 04:35:19PM +, Ian Campbell wrote: On Fri, 2015-01-09 at 16:06 +, Wei Liu wrote: On Fri, Jan 09, 2015 at 03:56:45PM +, Julien Grall wrote: If we fail to give the access, the domain will unlikely work correctly. So we should bail out at the first error. Signed-off-by: Julien Grall julien.gr...@linaro.org Cc: Ian Jackson ian.jack...@eu.citrix.com Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com Cc: Ian Campbell ian.campb...@citrix.com Acked-by: Wei Liu wei.l...@citrix.com Applied, thanks. Should this be backported? Yes. This should be backported. Wei. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface - for Xen 4.4, 4.5, and unstable
Add quotes around $bridge and $dev to handle spaces in names. This should go into 4.4, 4.5 and unstable. Signed-off-by: Charles Arnold carn...@suse.com diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge index 3d72ca4..8b67b0a 100644 --- a/tools/hotplug/Linux/vif-bridge +++ b/tools/hotplug/Linux/vif-bridge @@ -77,7 +77,7 @@ fi case $command in online) setup_virtual_bridge_port $dev -set_mtu $bridge $dev +set_mtu $bridge $dev add_to_bridge $bridge $dev ;; @@ -88,7 +88,7 @@ case $command in add) setup_virtual_bridge_port $dev -set_mtu $bridge $dev +set_mtu $bridge $dev add_to_bridge $bridge $dev ;; esac ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [libvirt] [PATCH 10/12] Introduce support for parsing/formatting Xen xl config format
John Ferlan wrote: On 01/10/2015 12:03 AM, Jim Fehlig wrote: Introduce a parser/formatter for the xl config format. Since the deprecation of xm/xend, the VM config file format has diverged as new features are added to libxl. This patch adds support for parsing and formating the xl config format. It supports the existing xm config format, plus adds support for spice graphics and xl disk config syntax. Disk config is specified a bit differently in xl as compared to xm. In xl, disk config consists of comma-separated positional parameters and keyword/value pairs separated by commas. Positional parameters are specified as follows target, format, vdev, access Supported keys for key=value options are devtype, backend, backendtype, script, direct-io-safe, The positional paramters can also be specified in key/value form. For example the following xl disk config are equivalent /dev/vg/guest-volume,,hda /dev/vg/guest-volume,raw,hda,rw format=raw, vdev=hda, access=rw, target=/dev/vg/guest-volume See $xen_sources/docs/misc/xl-disk-configuration.txt for more details. xl disk config is parsed with the help of xlu_disk_parse() from libxlutil, libxl's utility library. Although the library exists in all Xen versions supported by the libxl virt driver, only recently has the corresponding header file been included. A check for the header is done in configure.ac. If not found, xlu_disk_parse() is declared externally. Signed-off-by: Kiarie Kahurani davidkiar...@gmail.com Signed-off-by: Jim Fehlig jfeh...@suse.com --- configure.ac | 3 + po/POTFILES.in | 1 + src/Makefile.am| 4 +- src/libvirt_xenconfig.syms | 4 + src/xenconfig/xen_common.c | 3 +- src/xenconfig/xen_xl.c | 492 + src/xenconfig/xen_xl.h | 33 +++ 7 files changed, 538 insertions(+), 2 deletions(-) The following is just from the Coverity check... I don't have all the build environments that have proved to be problematic over the last week or so... I assume all you've done is take the generated code and use that rather than going through the problems as a result of attempting to generate the code. In place of the generated code I'm using libxlutil from Xen. The initial series copied Xen's flex parser for parsing disk config strings, but thankfully Ian Campbell noticed this and suggested using Xen's libxlutil instead, which is provided specifically for this purpose. Thanks for the review. I'll address the issues you've noted in the next version. Regards, Jim ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH for-4.6] libxl, hotplug/Linux: default to phy backend for raw format file
On Thu, 2015-01-08 at 16:47 +, Stefano Stabellini wrote: On Thu, 8 Jan 2015, Ian Campbell wrote: On Thu, 2015-01-08 at 16:07 +, Wei Liu wrote: On Thu, Jan 08, 2015 at 02:07:42PM +, Ian Campbell wrote: On Wed, 2014-11-26 at 16:55 +, Wei Liu wrote: Modify libxl and hotplug script to allow raw format file to use phy backend. The block script now tests the path and determine the actual type of file (block device or regular file) then use the actual type to determine which branch to run. With these changes, plus the current ordering of backend preference (phy qdisk tap), we will use phy backend for raw format file by default. http://lists.xen.org/archives/html/xen-devel/2012-04/msg00077.html includes (in the quotes, Stefano's reply is about something else but has conveniently trimmed most of the other uninteresting stuff): use /dev/loop+blkback. This requires loop driver AIO and O_DIRECT patches which are not (AFAIK) yet upstream. and I have it in my mind that using /dev/loop+blkback is somehow unsafe, for reasons relating to crash consistency and the proper implementation (in /dev/loop, blkback is good I think) of barriers and such, e.g. relating to whether data is really on the platter or not when we've to the frontend that it is (which is critical for proper operation of journalling file systems). It's entirely possible that I'm either plain wrong or a decade out of date on this though. CC-ing Konrad in case he has any insights as blkback maintainer (I think, MAINTAINERS doesn't have a specific entry) Too bad, as far as I can tell AIO and O_DIRECT are still missing in loop device. I guess we will have to wait until those two things are upstreamed. I'm kinda hoping someone will tell me I'm wrong... Even without O_DIRECT, using /dev/loop+blkback is safe if the barriers in the block protocol are implemented correctly. Last time I look, they are, so I think we should be able to switch to it. Nobody has contradicted you in this, so I've applied the resend of this patch (from 1420724813-17920-1-git-send-email-wei.l...@citrix.com) ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface
Forgot to put [Patch] in the subject line for the last two replies :( - Charles ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 4/4] dt-uart: support /chosen/stdout-path property.
Hi Ian, On 12/01/15 15:22, Ian Campbell wrote: On Thu, 2015-01-08 at 13:30 +, Julien Grall wrote: On 08/01/15 13:22, Ian Campbell wrote: On Thu, 2015-01-08 at 13:15 +, Julien Grall wrote: Hi Ian, On 08/01/15 11:53, Ian Campbell wrote: +ret = dt_property_read_string(chosen, stdout-path, stdout); +if ( ret = 0 ) +{ +printk(Taking dtuart configuration from /chosen/stdout-path\n); +if ( strlcpy(opt_dtuart, stdout, sizeof(opt_dtuart)) + = sizeof(opt_dtuart) ) +printk(WARNING: /chosen/stdout-path too long, truncated\n); I would add XENLOG_WARNING here and ... +} +else if ( ret != -EINVAL /* Not present */ ) +printk(Failed to read /chosen/stdout-path (%d)\n, ret); XENLOG_ERROR here. In practice these only go via the earlyprintk mechanism, since the console can't be setup yet. I'm not sure it's worthwhile tagging such messages. earlyprintk is transparent for the console code. Tagging may help if we decide to implement other kind of console later (VGA, PCI UART...). Anyway, I doesn't change much things here as the message is tagged as WARNING by default. So it will be always printing. It turns out that none of the existing prints in this function use the tags, and I think its of marginal use in this context so I don't think it is necessary to go changing them all, or to only use the tags for these two messages. Ok. I'm fine with it. Regards, -- Julien Grall ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v4 2/2] gnttab: refactor locking for scalability
On 09.01.15 at 16:12, cheg...@amazon.de wrote: @@ -188,6 +191,26 @@ nr_active_grant_frames(struct grant_table *gt) return num_act_frames_from_sha_frames(nr_grant_frames(gt)); } +static inline struct active_grant_entry * +active_entry_acquire(struct grant_table *t, grant_ref_t e) +{ +struct active_grant_entry *act; + +/* not perfect, but better than nothing for a debug build + * sanity check */ When coding style issues are being pointed out, please make sure you go through your patch series and address _all_ of them, even if not each and every violation was explicitly named. @@ -514,17 +513,22 @@ static int grant_map_exists(const struct domain *ld, nr_grant_entries(rgt)); for ( ref = *ref_count; ref max_iter; ref++ ) { -act = active_entry(rgt, ref); +struct active_grant_entry *act; +act = active_entry_acquire(rgt, ref); -if ( !act-pin ) -continue; +#define CONTINUE_IF(cond) \ +if ((cond)) { \ +active_entry_release(act); \ +continue; \ +} -if ( act-domid != ld-domain_id ) -continue; +CONTINUE_IF(!act-pin); +CONTINUE_IF(act-domid != ld-domain_id); +CONTINUE_IF(act-frame != mfn); I highly question this is in any way better than the v3 code. Is there a clear reason not to follow the advice of putting the active_entry_release(act) into the third of the for() expressions? And if you _really_ want/need it this way, please again get the coding style right (and drop the pointless redundant parentheses). @@ -545,16 +555,32 @@ static void mapcount( grant_handle_t handle; *wrc = *rdc = 0; +ASSERT(rw_is_locked(rd-grant_table-lock)); + +/* N.B.: while taking the left side maptrack spinlock prevents + * any mapping changes, the right side active entries could be + * changing while we are counting. To be perfectly correct, the + * caller should hold the grant table write lock, or some other + * mechanism should be used to prevent concurrent changes during + * this operation. This is tricky because we can't promote a read + * lock into a write lock. + */ +spin_lock(lgt-maptrack_lock); So you added the suggested ASSERT(), but you didn't adjust the comment (which actually should precede the ASSERT() imo) in any way. @@ -698,12 +723,9 @@ __gnttab_map_grant_ref( GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act-frame; -act_pin = act-pin; cache_flags = (shah-flags (GTF_PAT | GTF_PWT | GTF_PCD) ); -read_unlock(rgt-lock); - /* pg may be set, with a refcount included, from __get_paged_frame */ if ( !pg ) { @@ -778,8 +800,6 @@ __gnttab_map_grant_ref( goto undo_out; } -double_maptrack_lock(lgt, rgt); Again an un-addressed comment from v3: Taking these two together - isn't patch 1 wrong then, in that it acquires both maptrack locks in double_gt_lock()? @@ -941,18 +960,19 @@ __gnttab_unmap_common( rgt = rd-grant_table; read_lock(rgt-lock); -double_maptrack_lock(lgt, rgt); + +read_lock(rgt-lock); Acquiring the same read lock twice in a row? @@ -3045,9 +3097,11 @@ static void gnttab_usage_print(struct domain *rd) uint16_t status; uint64_t frame; -act = active_entry(gt, ref); -if ( !act-pin ) +act = active_entry_acquire(gt, ref); +if ( !act-pin ) { Coding style again. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
On Mon, 12 Jan 2015, Paolo Bonzini wrote: On 12/01/2015 14:35, Li, Liang Z wrote: diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) } out: -memory_listener_register(s-memory_listener, address_space_memory); +memory_listener_register(s-memory_listener, + s-dev.bus_master_as); memory_listener_register(s-io_listener, address_space_io); XEN_PT_LOG(d, Real physical device %02x:%02x.%d registered successfully!\n, By further debugging, I found when using 'address_space_memory', 'xen_pt_region_del' won't be called when the memory region's name is not ' xen-pci-pt-*', when using ' s-dev.bus_master_as ', there is no such issue. I think use the device related address space here is more reasonable, but I am not sure. Could you give some suggestion? Yes, this patch makes sense. The listener will be called every time the command register is written. Paolo, thanks for reviewing! Liang, please resend with an appropriate patch description and sign-off-by line. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [libvirt] [PATCH 00/12] Replace Xen xl parsing/formatting impl
On 01/12/2015 08:06 AM, Ian Campbell wrote: On Fri, 2015-01-09 at 22:03 -0700, Jim Fehlig wrote: The first attempt to implement support for parsing/formatting Xen's xl disk config format copied Xen's flex-based parser into libvirt, which has proved to be challenging in the context of autotools. But as it turns out, Xen provides an interface to the parser via libxlutil. This series reverts the first attempt, along with subsequent attempts to fix it, and replaces it with an implementation based on libxlutil. The first nine patches revert the original implementation and subsequent fixes. Patch 10 provides an implemenation based on libxlutil. Patches 11 and 12 are basically unchanged from patches 3 and 4 in the first attempt. One upshot of using libxlutil instead of copying the flex source is removing the potential for source divergence. Thanks for doing this, looks good to me, FWIW. Is the presence/absence of xen-xl support exposed via virsh anywhere? If so then I can arrange for my Xen osstest patches for libvirt testing to use xen-xl when available but still fallback to xen-xm. I've had a look in virsh capabilities and virsh help domxml-from-native but not seeing xen-xm, so assuming xen-xl won't magically appear in any of those places either. I'm not sure if 'virsh capabilities' can show it, but it does sound like a nice place to enhance if possible. Also, if 'virsh --version=long' doesn't state whether libxl support was compiled in, it should be patched to do so; although that only shows what the client side supports (and not necessarily what the remote server side supports). -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel