date:20150112

The trivial wrapper evtchn_set_pending() is pretty pointless, as it
only serves to invoke another wrapper evtchn_port_set_pending(). In
turn, the latter is kind of inconsistent with its siblings in that is
takes a struct vcpu * rather than a struct domain * - adjusting this
allows for more efficient code in the majority of cases.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -95,8 +95,6 @@ static uint8_t get_xen_consumer(xen_even
 /* Get the notification function for a given Xen-bound event channel. */
 #define xen_notification_fn(e) (xen_consumers[(e)-xen_consumer-1])
 
-static void evtchn_set_pending(struct vcpu *v, int port);
-
 static int virq_is_global(uint32_t virq)
 {
 int rc;
@@ -287,7 +285,7 @@ static long evtchn_bind_interdomain(evtc
  * We may have lost notifications on the remote unbound port. Fix that up
  * here by conservatively always setting a notification on the local port.
  */
-evtchn_set_pending(ld-vcpu[lchn-notify_vcpu_id], lport);
+evtchn_port_set_pending(ld, lchn-notify_vcpu_id, lchn);
 
 bind-local_port = lport;
 
@@ -599,11 +597,10 @@ static long evtchn_close(evtchn_close_t 
 return __evtchn_close(current-domain, close-port);
 }
 
-int evtchn_send(struct domain *d, unsigned int lport)
+int evtchn_send(struct domain *ld, unsigned int lport)
 {
 struct evtchn *lchn, *rchn;
-struct domain *ld = d, *rd;
-struct vcpu   *rvcpu;
+struct domain *rd;
 intrport, ret = 0;
 
 spin_lock(ld-event_lock);
@@ -633,14 +630,13 @@ int evtchn_send(struct domain *d, unsign
 rd= lchn-u.interdomain.remote_dom;
 rport = lchn-u.interdomain.remote_port;
 rchn  = evtchn_from_port(rd, rport);
-rvcpu = rd-vcpu[rchn-notify_vcpu_id];
 if ( consumer_is_xen(rchn) )
-(*xen_notification_fn(rchn))(rvcpu, rport);
+xen_notification_fn(rchn)(rd-vcpu[rchn-notify_vcpu_id], rport);
 else
-evtchn_set_pending(rvcpu, rport);
+evtchn_port_set_pending(rd, rchn-notify_vcpu_id, rchn);
 break;
 case ECS_IPI:
-evtchn_set_pending(ld-vcpu[lchn-notify_vcpu_id], lport);
+evtchn_port_set_pending(ld, lchn-notify_vcpu_id, lchn);
 break;
 case ECS_UNBOUND:
 /* silently drop the notification */
@@ -655,11 +651,6 @@ out:
 return ret;
 }
 
-static void evtchn_set_pending(struct vcpu *v, int port)
-{
-evtchn_port_set_pending(v, evtchn_from_port(v-domain, port));
-}
-
 int guest_enabled_event(struct vcpu *v, uint32_t virq)
 {
 return ((v != NULL)  (v-virq_to_evtchn[virq] != 0));
@@ -669,6 +660,7 @@ void send_guest_vcpu_virq(struct vcpu *v
 {
 unsigned long flags;
 int port;
+struct domain *d;
 
 ASSERT(!virq_is_global(virq));
 
@@ -678,7 +670,8 @@ void send_guest_vcpu_virq(struct vcpu *v
 if ( unlikely(port == 0) )
 goto out;
 
-evtchn_set_pending(v, port);
+d = v-domain;
+evtchn_port_set_pending(d, v-vcpu_id, evtchn_from_port(d, port));
 
  out:
 spin_unlock_irqrestore(v-virq_lock, flags);
@@ -707,7 +700,7 @@ static void send_guest_global_virq(struc
 goto out;
 
 chn = evtchn_from_port(d, port);
-evtchn_set_pending(d-vcpu[chn-notify_vcpu_id], port);
+evtchn_port_set_pending(d, chn-notify_vcpu_id, chn);
 
  out:
 spin_unlock_irqrestore(v-virq_lock, flags);
@@ -731,7 +724,7 @@ void send_guest_pirq(struct domain *d, c
 }
 
 chn = evtchn_from_port(d, port);
-evtchn_set_pending(d-vcpu[chn-notify_vcpu_id], port);
+evtchn_port_set_pending(d, chn-notify_vcpu_id, chn);
 }
 
 static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
@@ -1202,7 +1195,6 @@ void notify_via_xen_event_channel(struct
 {
 struct evtchn *lchn, *rchn;
 struct domain *rd;
-intrport;
 
 spin_lock(ld-event_lock);
 
@@ -1219,9 +1211,8 @@ void notify_via_xen_event_channel(struct
 if ( likely(lchn-state == ECS_INTERDOMAIN) )
 {
 rd= lchn-u.interdomain.remote_dom;
-rport = lchn-u.interdomain.remote_port;
-rchn  = evtchn_from_port(rd, rport);
-evtchn_set_pending(rd-vcpu[rchn-notify_vcpu_id], rport);
+rchn  = evtchn_from_port(rd, lchn-u.interdomain.remote_port);
+evtchn_port_set_pending(rd, rchn-notify_vcpu_id, rchn);
 }
 
 spin_unlock(ld-event_lock);
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
 d-evtchn_port_ops-init(d, evtchn);
 }
 
-static inline void evtchn_port_set_pending(struct vcpu *v,
+static inline void evtchn_port_set_pending(struct domain *d,
+   unsigned int vcpu_id,
struct evtchn *evtchn)
 {
-v-domain-evtchn_port_ops-set_pending(v, evtchn);
+d-evtchn_port_ops-set_pending(d-vcpu[vcpu_id], evtchn);
 }
 
 static inline

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 10:41, kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 5:33 PM
  On 12.01.15 at 09:46, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Friday, January 09, 2015 6:35 PM
   On 09.01.15 at 11:10, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   The question isn't about migrating with devices assigned, but about
   assigning devices after migration (consider a dual vif + SR-IOV NIC
   guest setup where the SR-IOV NIC gets hot-removed before
   migration and a new one hot-plugged afterwards).

   Furthermore any tying of the guest memory layout to the host's
   where the guest first boots is awkward, as post-migration there's
   not going to be any reliable correlation between the guest layout
   and the new host's.

   how can you solve this? like above example, a NIC on node-A leaves
   a reserved region in guest e820. now it's hot-removed and then
   migrated to node-b. there's no way to update e820 again since it's
   only boot structure. then user will still see such awkward regions.
   since it's not avoidable, report-all in the summary mail looks not
   causing a new problem.

  The solution to this are reserved regions specified in the guest config,
  independent of host characteristics.

  I don't think how reserved regions are specified matter here. My point
  is that when a region is reserved in e820 at boot time, there's no way
  to erase that knowledge in the guest even when devices causing that
  reservation are hot removed later.

 I don't think anyone ever indicated that such erasure would be
 needed/wanted - I'm not sure how you ended up there.

 I ended here to indicate that report-all which gives user more reserved
 regions than necessary is not a weird case since above scenario can also
 create such fact. User shouldn't set expectation about reserved region
 layout. and this argument is necessary to support our proposal of using
 report-all. :-)

The fact that ranges can't be removed from a guest's memory map
is irrelevant - there's simply no question that this is that way. The
main counter argument against report-all remains: It may result in
unnecessarily little low memory in guests not needing all of the host
regions to be reserved for them.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] arm64/EFI: minor corrections

- don't bail when using the last slot of bootinfo.mem.bank[] (due to
  premature incrementing of the array index)
- GUIDs should be static const (and placed into .init.* whenever
  possible)
- PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
  append one to the message passed to the function

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/arch/arm/efi/efi-boot.h
+++ b/xen/arch/arm/efi/efi-boot.h
@@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt,
 
 static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table)
 {
-const EFI_GUID fdt_guid = DEVICE_TREE_GUID;
+static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID;
 EFI_CONFIGURATION_TABLE *tables;
 void *fdt = NULL;
 int i;
@@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem
  || desc_ptr-Type == EfiBootServicesCode
  || desc_ptr-Type == EfiBootServicesData )
 {
-bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart;
-bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * 
EFI_PAGE_SIZE;
-if ( ++i = NR_MEM_BANKS )
+if ( i = NR_MEM_BANKS )
 {
-PrintStr(LWarning: All );
-DisplayUint(NR_MEM_BANKS, -1);
-PrintStr(L bootinfo mem banks exhausted.\r\n);
+PrintStr(LWarning: All  __stringify(NR_MEM_BANKS)
+   bootinfo mem banks exhausted.\r\n);
 break;
 }
+bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart;
+bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * 
EFI_PAGE_SIZE;
+++i;
 }
 desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size);
 }
@@ -334,7 +334,7 @@ static void __init efi_arch_process_memo
 status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size,
 desc_ver);
 if ( EFI_ERROR(status) )
-PrintErrMesg(LUpdating FDT failed\r\n, status);
+PrintErrMesg(LUpdating FDT failed, status);
 }
 
 static void __init efi_arch_pre_exit_boot(void)
@@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli
 
 status = efi_bs-AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void 
**)buf);
 if ( EFI_ERROR(status) )
-PrintErrMesg(LUnable to allocate string buffer\r\n, status);
+PrintErrMesg(LUnable to allocate string buffer, status);
 
 if ( image_name )
 {



arm64/EFI: minor corrections

- don't bail when using the last slot of bootinfo.mem.bank[] (due to
  premature incrementing of the array index)
- GUIDs should be static const (and placed into .init.* whenever
  possible)
- PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
  append one to the message passed to the function

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/arch/arm/efi/efi-boot.h
+++ b/xen/arch/arm/efi/efi-boot.h
@@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt,
 
 static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table)
 {
-const EFI_GUID fdt_guid = DEVICE_TREE_GUID;
+static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID;
 EFI_CONFIGURATION_TABLE *tables;
 void *fdt = NULL;
 int i;
@@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem
  || desc_ptr-Type == EfiBootServicesCode
  || desc_ptr-Type == EfiBootServicesData )
 {
-bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart;
-bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * 
EFI_PAGE_SIZE;
-if ( ++i = NR_MEM_BANKS )
+if ( i = NR_MEM_BANKS )
 {
-PrintStr(LWarning: All );
-DisplayUint(NR_MEM_BANKS, -1);
-PrintStr(L bootinfo mem banks exhausted.\r\n);
+PrintStr(LWarning: All  __stringify(NR_MEM_BANKS)
+   bootinfo mem banks exhausted.\r\n);
 break;
 }
+bootinfo.mem.bank[i].start = desc_ptr-PhysicalStart;
+bootinfo.mem.bank[i].size = desc_ptr-NumberOfPages * 
EFI_PAGE_SIZE;
+++i;
 }
 desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size);
 }
@@ -334,7 +334,7 @@ static void __init efi_arch_process_memo
 status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size,
 desc_ver);
 if ( EFI_ERROR(status) )
-PrintErrMesg(LUpdating FDT failed\r\n, status);
+PrintErrMesg(LUpdating FDT failed, status);
 }
 
 static void __init efi_arch_pre_exit_boot(void)
@@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli
 
 status = efi_bs-AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void 
**)buf);
 if ( EFI_ERROR(status) )
-PrintErrMesg(LUnable to allocate string buffer\r\n, status);
+PrintErrMesg(LUnable to allocate string buffer, status);
 
 if ( image_name )
 {

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 09:46, kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Friday, January 09, 2015 6:35 PM
  On 09.01.15 at 11:10, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  The question isn't about migrating with devices assigned, but about
  assigning devices after migration (consider a dual vif + SR-IOV NIC
  guest setup where the SR-IOV NIC gets hot-removed before
  migration and a new one hot-plugged afterwards).

  Furthermore any tying of the guest memory layout to the host's
  where the guest first boots is awkward, as post-migration there's
  not going to be any reliable correlation between the guest layout
  and the new host's.

  how can you solve this? like above example, a NIC on node-A leaves
  a reserved region in guest e820. now it's hot-removed and then
  migrated to node-b. there's no way to update e820 again since it's
  only boot structure. then user will still see such awkward regions.
  since it's not avoidable, report-all in the summary mail looks not
  causing a new problem.

 The solution to this are reserved regions specified in the guest config,
 independent of host characteristics.

 I don't think how reserved regions are specified matter here. My point
 is that when a region is reserved in e820 at boot time, there's no way
 to erase that knowledge in the guest even when devices causing that
 reservation are hot removed later.

I don't think anyone ever indicated that such erasure would be
needed/wanted - I'm not sure how you ended up there.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
 In the meantime, I saw this:
 http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
 Based on the post above seems that phy will have important risk of data 
 loss if I understand good, from Ian Campbell post:
  xl also uses qdisk for raw disk images instead of loop+blkback which
  xend used, because there are concerns that the loop driver can lead to
  data loss (by not implementing direct i/o the underlying device, and/or
  not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation

While for us it's not as bad as it was for Linux, their commit
13e457e0ee (KVM: x86: Emulator does not decode clflush well, by
Nadav Amit na...@cs.technion.ac.il) nevertheless points out two
shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
a memory mode (otherwise it's SFENCE) and when there's no REP prefix
(an operand size prefix is fine, as that's CLFLUSHOPT).

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4400,7 +4400,9 @@ x86_emulate(
 case 0xae: /* Grp15 */
 switch ( modrm_reg  7 )
 {
-case 7: /* clflush */
+case 7: /* clflush{,opt} */
+fail_if(modrm_mod == 3);
+fail_if(rep_prefix());
 fail_if(ops-wbinvd == NULL);
 if ( (rc = ops-wbinvd(ctxt)) != 0 )
 goto done;



x86emul: tighten CLFLUSH emulation

While for us it's not as bad as it was for Linux, their commit
13e457e0ee (KVM: x86: Emulator does not decode clflush well, by
Nadav Amit na...@cs.technion.ac.il) nevertheless points out two
shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
a memory mode (otherwise it's SFENCE) and when there's no REP prefix
(an operand size prefix is fine, as that's CLFLUSHOPT).

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4400,7 +4400,9 @@ x86_emulate(
 case 0xae: /* Grp15 */
 switch ( modrm_reg  7 )
 {
-case 7: /* clflush */
+case 7: /* clflush{,opt} */
+fail_if(modrm_mod == 3);
+fail_if(rep_prefix());
 fail_if(ops-wbinvd == NULL);
 if ( (rc = ops-wbinvd(ctxt)) != 0 )
 goto done;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold

We've had reports of systems where CMCIs would surface at a relatively
high rate during certain periods of time, without them apparently
causing subsequent more severe problems (see Xeon E7-8800/4800/2800
specification clarification SC1). Give the admin a knob to lower the
impact on the system logs.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
 
 If set, override Xen's default choice for the platform timer.
 
+### cmci-threshold
+ `= integer`
+
+ Default: `2`
+
+Specify the event count threshold for raising Corrected Machine Check
+Interrupts.  Specifying zero disables CMCI handling.
+
 ### cmos-rtc-probe
  `= boolean`
 
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
 {
 unsigned msr = MSR_IA32_MCx_CTL2(i);
 u64 val;
+unsigned int threshold, max_threshold;
+static unsigned int cmci_threshold = 2;
+integer_param(cmci-threshold, cmci_threshold);
 
 rdmsrl(msr, val);
 /* Some other CPU already owns this bank. */
@@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
 goto out;
 }
 
-val = ~CMCI_THRESHOLD_MASK;
-wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
-rdmsrl(msr, val);
+if ( cmci_threshold )
+{
+wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
+rdmsrl(msr, val);
+}
 
 if (!(val  CMCI_EN)) {
 /* This bank does not support CMCI. Polling timer has to handle it. */
 mcabanks_set(i, __get_cpu_var(no_cmci_banks));
+wrmsrl(msr, val  ~CMCI_THRESHOLD_MASK);
 return 0;
 }
+max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
+threshold = cmci_threshold;
+if ( threshold  max_threshold )
+{
+   mce_printk(MCE_QUIET,
+  CMCI: threshold %#x too large for CPU%u bank %u, using 
%#x\n,
+  threshold, smp_processor_id(), i, max_threshold);
+   threshold = max_threshold;
+}
+wrmsrl(msr, (val  ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
 mcabanks_set(i, __get_cpu_var(mce_banks_owned));
 out:
 mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -86,9 +86,6 @@
 /* Bitfield of MSR_K8_HWCR register */
 #define K8_HWCR_MCi_STATUS_WREN(1ULL  18)
 
-/*Intel Specific bitfield*/
-#define CMCI_THRESHOLD 0x2
-
 #define MCi_MISC_ADDRMOD_MASK (0x7UL  6)
 #define MCi_MISC_PHYSMOD(0x2UL  6)
 



x86/MCE: allow overriding the CMCI threshold

We've had reports of systems where CMCIs would surface at a relatively
high rate during certain periods of time, without them apparently
causing subsequent more severe problems (see Xeon E7-8800/4800/2800
specification clarification SC1). Give the admin a knob to lower the
impact on the system logs.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
 
 If set, override Xen's default choice for the platform timer.
 
+### cmci-threshold
+ `= integer`
+
+ Default: `2`
+
+Specify the event count threshold for raising Corrected Machine Check
+Interrupts.  Specifying zero disables CMCI handling.
+
 ### cmos-rtc-probe
  `= boolean`
 
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
 {
 unsigned msr = MSR_IA32_MCx_CTL2(i);
 u64 val;
+unsigned int threshold, max_threshold;
+static unsigned int cmci_threshold = 2;
+integer_param(cmci-threshold, cmci_threshold);
 
 rdmsrl(msr, val);
 /* Some other CPU already owns this bank. */
@@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
 goto out;
 }
 
-val = ~CMCI_THRESHOLD_MASK;
-wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
-rdmsrl(msr, val);
+if ( cmci_threshold )
+{
+wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
+rdmsrl(msr, val);
+}
 
 if (!(val  CMCI_EN)) {
 /* This bank does not support CMCI. Polling timer has to handle it. */
 mcabanks_set(i, __get_cpu_var(no_cmci_banks));
+wrmsrl(msr, val  ~CMCI_THRESHOLD_MASK);
 return 0;
 }
+max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
+threshold = cmci_threshold;
+if ( threshold  max_threshold )
+{
+   mce_printk(MCE_QUIET,
+  CMCI: threshold %#x too large for CPU%u bank %u, using 
%#x\n,
+  threshold, smp_processor_id(), i, max_threshold);
+   threshold = max_threshold;
+}
+wrmsrl(msr, (val  ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
 mcabanks_set(i, __get_cpu_var(mce_banks_owned));
 out:
 mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
---

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni


Il 09/01/2015 18:38, Dario Faggioli ha scritto:

On Fri, 2015-01-09 at 14:42 +, Wei Liu wrote:

On Thu, Jan 08, 2015 at 03:33:17PM +0100, Fabio Fantoni wrote:

Sorry for the probably stupid question, what are the pros and cons of
default use of phy instead qdisk for raw files as domU disk?


There's no stupid question. :-)

I was told that it performs better and enables other potential
improvements.


Not a big deal, probably, but IMO it would be nice to have at least a
few words about when it's happening in the changelog, wouldn't it?


I'm also interested in detailed changelog about.

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data 
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).


Thanks for any reply and sorry for my bad english.




Regards,
Dario




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] common/memory: fix an XSM error path

XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
return the extent at which failure was detected, not error indicators.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
 return start_extent;
 args.domain = d;
 
-rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d);
-if ( rc )
+if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) )
 {
 rcu_unlock_domain(d);
-return rc;
+return start_extent;
 }
 
 switch ( op )



common/memory: fix an XSM error path

XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
return the extent at which failure was detected, not error indicators.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
 return start_extent;
 args.domain = d;
 
-rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d);
-if ( rc )
+if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) )
 {
 rcu_unlock_domain(d);
-return rc;
+return start_extent;
 }
 
 switch ( op )
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 5:33 PM

  On 12.01.15 at 09:46, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Friday, January 09, 2015 6:35 PM
   On 09.01.15 at 11:10, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   The question isn't about migrating with devices assigned, but about
   assigning devices after migration (consider a dual vif + SR-IOV NIC
   guest setup where the SR-IOV NIC gets hot-removed before
   migration and a new one hot-plugged afterwards).

   Furthermore any tying of the guest memory layout to the host's
   where the guest first boots is awkward, as post-migration there's
   not going to be any reliable correlation between the guest layout
   and the new host's.

   how can you solve this? like above example, a NIC on node-A leaves
   a reserved region in guest e820. now it's hot-removed and then
   migrated to node-b. there's no way to update e820 again since it's
   only boot structure. then user will still see such awkward regions.
   since it's not avoidable, report-all in the summary mail looks not
   causing a new problem.

  The solution to this are reserved regions specified in the guest config,
  independent of host characteristics.

  I don't think how reserved regions are specified matter here. My point
  is that when a region is reserved in e820 at boot time, there's no way
  to erase that knowledge in the guest even when devices causing that
  reservation are hot removed later.

 I don't think anyone ever indicated that such erasure would be
 needed/wanted - I'm not sure how you ended up there.

I ended here to indicate that report-all which gives user more reserved
regions than necessary is not a weird case since above scenario can also
create such fact. User shouldn't set expectation about reserved region
layout. and this argument is necessary to support our proposal of using
report-all. :-)

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Friday, January 09, 2015 6:46 PM

  On 09.01.15 at 11:26, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
   On 09.01.15 at 07:57, kevin.t...@intel.com wrote:
   1.1) per-device 'warn' vs. global 'warn'

   Both Tim/Jan prefer to 'warn' as a per-device option to the admin instead
   of a global option.

   In a glimpse a per-device 'warn' option provides more fine-grained
 control
   than a global option, however if thinking it carefully allowing one 
   device
   w/
   potential problem isn't more correct or secure than allowing multiple
   devices w/ potential problem. Even in practice a device like USB can
   work bearing 1MB confliction, like Jan pointed out there's always corner
   cases which we might not know so as long as we open door for one
 device,
   it implies a problematic environment to users and user's judge on
 whether
   he can live up to this problem is not impacted by how many devices the
 door
   is opened for (he anyway needs to study warning message and do
  verification
   if choosing to live up)

   Regarding to that, imo if we agree to provide 'warn' option, just 
   providing
   a global overriding option (definitely per-vm) is acceptable and simpler.

  If the admin determined that ignoring the RMRR requirements for one
  devices is safe, that doesn't (and shouldn't) mean this is the case for
  all other devices too.

  I don't think admin can determine whether it's 100% safe. What admin can
  decide is whether he lives up to the potential problem based on his purpose
  or based on some experiments. only device vendor knows when and how
  RMRR is used. So as long as warn is opened for one device, I think it
  already means a problem environment and then adding more device is
  just same situation.

 What if the admin consulted the device and BIOS vendors, and got
 assured there's not going to be any accesses to the reserved regions
 post-boot?

consultancy could be still inaccurate, or man-error may happen. 

   1.2) when to 'fail'

   There is one open whether we should fail immediately in domain builder
   if a confliction is detected.

   Jan's comment is yes, we should 'fail' the VM creation as it's an error.

   My previous point is more mimicking native behavior, where a device
   failure (in our case it's actually potential device failure since VM is 
   not
   powered yet) doesn't impact user until its function is actually touched.
   In our case, even domain builder fails to re-arrange guest RAM to skip
   reserved regions, we have centralized policy (either 'fail' or 'warn' per
   above conclusion) in Xen hypervisor when the device is actually assigned.
   so a 'warn' should be fine, but my insist on this is not strong.

  See my earlier reply: Failure to add a device to me is more like a
  device preventing a bare metal system from coming up altogether.

  not all devices are required for bare metal to boot. it causes problem
  only when it's being used in the boot process. say at powering up the
  disk (insert in the PCI slot) is broken (not sure whether you call such
  thing as 'failure to add a device'), it is only error when BIOS tries to
  read disk.

 Not necessarily. Any malfunctioning device touched by the BIOS,
 irrespective of whether the device is needed for booting, can cause
 the boot process to hang. Again, the analogy to bare metal is
 device presence, not whether the device is functioning properly.

  note device assignment path is the actual path to decide whether a
  device will be present to the guest. not at this domain build time.

 That would only make a marginal difference in time of when domain
 creation fails.

it's not marginal difference. instead it's about who owns the policy.

to me, detect/avoid conflictions in domain builder is just a preparation
for later device assignment (either deterministic static assignment or
non-deterministic hotplug). As a preparation, we don't need to make
a failure here as a blocker to prevent guest boot. Instead, leave the
decision to where device assignment actually happens then hard
requirement is made on any conflictions a.t.m. Then we just follow
the existing policy of device assignment (either block guest boot, or 
move forward w/o presenting the device), if confliction is treat as
a failure by default (w/o 'warn' override)

   and another point is about hotplug. 'fail' for future devices is too
  strict,
   but to differentiate that from static-assigned devices, domain builder
   will then need maintain a per-device reserved region structure. just
   'warn' makes things simple.

  Whereas here I agree - hotplug should just fail (without otherwise
  impacting the guest).

  so 'should' - 'shoundn't'?

 No. Perhaps what you imply from fail is different from my reading:
 I mean this to be the result of the hotplug operation - the device
 would just not appear in the guest. The guest isn't to be

Re: [Xen-devel] preparing for 4.4.2 / 4.3.4

2015-01-12 Thread Olaf Hering

On Thu, Jan 08, Jan Beulich wrote:

 now that 4.5 is almost out the door, I'd like to get stable releases
 prepared on the other two active branches. 4.3.4 is expected to be
 the last xen.org managed release on the 4.3 branch. Aiming at RC1s
 some time next week, please indicate commits you consider relevant
 to backport but find missing from the respective branches.

e86539a388314cd3dca88f5e69d7873343197cd8 (libxc: check return values on mmap() 
and madvise() on xc_alloc_hypercall_buffer())

Olaf

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Friday, January 09, 2015 6:35 PM

  On 09.01.15 at 11:10, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Boot time device assignment is different: The question isn't whether
  an assigned device works, instead the proper analogy is whether a
  device is _present_. If a device doesn't work on bare metal, it will
  still be discoverable. Yet if device assignment fails, that's not going
  to be the case - for security reasons, the guest would not see any
  notion of the device.

  the question is whether we want such device assignment fail due to
  RMRR confliction, and the fail decision should be when Xen handles
  actual assignment instead of when domain builder prepares reserved
  regions.

 Detecting the failure only in the hypervisor has the downside of
 potentially leaving the user with little clues as to what went wrong.
 Sending messages to the hypervisor log in that case is
 questionable, yet the tool stack (namely libxc) is known to not
 always do a good job in error propagation.

  The question isn't about migrating with devices assigned, but about
  assigning devices after migration (consider a dual vif + SR-IOV NIC
  guest setup where the SR-IOV NIC gets hot-removed before
  migration and a new one hot-plugged afterwards).

  Furthermore any tying of the guest memory layout to the host's
  where the guest first boots is awkward, as post-migration there's
  not going to be any reliable correlation between the guest layout
  and the new host's.

  how can you solve this? like above example, a NIC on node-A leaves
  a reserved region in guest e820. now it's hot-removed and then
  migrated to node-b. there's no way to update e820 again since it's
  only boot structure. then user will still see such awkward regions.
  since it's not avoidable, report-all in the summary mail looks not
  causing a new problem.

 The solution to this are reserved regions specified in the guest config,
 independent of host characteristics.

I don't think how reserved regions are specified matter here. My point
is that when a region is reserved in e820 at boot time, there's no way
to erase that knowledge in the guest even when devices causing that
reservation are hot removed later.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [linux-3.14 test] 33341: tolerable FAIL - PUSHED

flight 33341 linux-3.14 real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33341/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 32448

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-armhf-armhf-libvirt  9 guest-start  fail   never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass

version targeted for testing:
 linuxc3b70f0bbb6a883f4afa639286043d3f71fbbddf
baseline version:
 linux83a926f7a4e39fb6be0576024e67fe161593defa


People who touched revisions under test:
  Eric W. Biederman ebied...@xmission.com
  Andreas MÃ¼ller g...@stapelspeicher.org
  Andrew Lunn and...@lunn.ch
  Andrew Morton a...@linux-foundation.org
  Andy Lutomirski l...@amacapital.net
  Baruch Siach bar...@tkos.co.il
  Catalin Marinas catalin.mari...@arm.com
  Cedric Bosdonnat cbosdon...@suse.com
  Chris Mason c...@fb.com
  Christoph Hellwig h...@lst.de
  Dan Carpenter dan.carpen...@oracle.com
  Darrick J. Wong darrick.w...@oracle.com
  Dmitry Eremin-Solenikov dbarysh...@gmail.com
  Dmitry Osipenko dig...@gmail.com
  Eric W. Biederman ebied...@xmission.com
  Filipe Manana fdman...@suse.com
  Greg Kroah-Hartman gre...@linuxfoundation.org
  H. Peter Anvin h...@zytor.com
  Hannes Reinecke h...@suse.de
  Herbert Xu herb...@gondor.apana.org.au
  Ingo Molnar mi...@kernel.org
  Jaehoon Chung jh80.ch...@samsung.com
  James Hogan james.ho...@imgtec.com
  Jan Kara j...@suse.cz
  Jason Cooper ja...@lakedaemon.net
  Joe Thornber e...@redhat.com
  Johannes Berg johannes.b...@intel.com
  Josef Bacik jba...@fb.com
  Kashyap Desai kashyap.de...@avagotech.com
  Lee Jones lee.jo...@linaro.org
  Linus Torvalds torva...@linux-foundation.org
  Luis Henriques luis.henriq...@canonical.com
  Michael Halcrow mhalc...@google.com
  Mike Snitzer snit...@redhat.com
  Mikulas Patocka mpato...@redhat.com
  Milan Broz gmazyl...@gmail.com
  Mimi Zohar zo...@linux.vnet.ibm.com
  NeilBrown ne...@suse.de
  Oleg Nesterov o...@redhat.com
  Paolo Bonzini pbonz...@redhat.com
  Paul Moore pmo...@redhat.com
  Peng Tao tao.p...@primarydata.com
  Peter Guo peter@bayhubtech.com
  Rabin Vincent rabin.vinc...@axis.com
  Richard Guy Briggs r...@redhat.com
  Richard Weinberger rich...@nod.at
  Sumit Saxena sumit.sax...@avagotech.com
  sumit.sax...@avagotech.com sumit.sax...@avagotech.com
  Takashi Iwai ti...@suse.de
  Thierry Reding tred...@nvidia.com
  Thomas Gleixner t...@linutronix.de
  Trond Myklebust trond.mykleb...@primarydata.com
  Tyler Hicks tyhi...@canonical.com
  Ulf Hansson ulf.hans...@linaro.org
  Uwe Kleine-KÃ¶nig u.kleine-koe...@pengutronix.de
  Will Deacon will.dea...@arm.com
  Zhang Rui rui.zh...@intel.com


jobs:
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvops

Re: [Xen-devel] preparing for 4.4.2 / 4.3.4

 On 12.01.15 at 09:47, o...@aepfle.de wrote:
 On Thu, Jan 08, Jan Beulich wrote:
 
 now that 4.5 is almost out the door, I'd like to get stable releases
 prepared on the other two active branches. 4.3.4 is expected to be
 the last xen.org managed release on the 4.3 branch. Aiming at RC1s
 some time next week, please indicate commits you consider relevant
 to backport but find missing from the respective branches.
 
 e86539a388314cd3dca88f5e69d7873343197cd8 (libxc: check return values on 
 mmap() and madvise() on xc_alloc_hypercall_buffer())

Tool stack backport requests should be Cc-ed to the respective
maintainer (now added).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

The resource behind an event channel is domain centric rather than vcpu
centric, and free_xen_event_channel() only follows the vcpu's domain pointer.

This change allows mem_event_disable() to avoid arbitrarily referencing
d-vcpu[0] just to pass the domain.

No functional change.

Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
---
 xen/arch/x86/hvm/hvm.c |   12 ++--
 xen/common/event_channel.c |4 +---
 xen/common/mem_event.c |2 +-
 xen/include/xen/event.h|3 +--
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 8b06bfd..acfc032 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct 
hvm_ioreq_server *s,
 return 0;
 
  fail3:
-free_xen_event_channel(v, sv-ioreq_evtchn);
+free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
  fail2:
 spin_unlock(s-lock);
@@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct 
hvm_ioreq_server *s,
 list_del(sv-list_entry);
 
 if ( v-vcpu_id == 0  s-bufioreq.va != NULL )
-free_xen_event_channel(v, s-bufioreq_evtchn);
+free_xen_event_channel(v-domain, s-bufioreq_evtchn);
 
-free_xen_event_channel(v, sv-ioreq_evtchn);
+free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
 xfree(sv);
 break;
@@ -701,9 +701,9 @@ static void hvm_ioreq_server_remove_all_vcpus(struct 
hvm_ioreq_server *s)
 list_del(sv-list_entry);
 
 if ( v-vcpu_id == 0  s-bufioreq.va != NULL )
-free_xen_event_channel(v, s-bufioreq_evtchn);
+free_xen_event_channel(v-domain, s-bufioreq_evtchn);
 
-free_xen_event_channel(v, sv-ioreq_evtchn);
+free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
 xfree(sv);
 }
@@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu *v, 
domid_t remote_domid,
 
 /* xchg() ensures that only we call free_xen_event_channel(). */
 old_port = xchg(p_port, new_port);
-free_xen_event_channel(v, old_port);
+free_xen_event_channel(v-domain, old_port);
 return 0;
 }
 
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 7d6de54..cfe4978 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel(
 }
 
 
-void free_xen_event_channel(
-struct vcpu *local_vcpu, int port)
+void free_xen_event_channel(struct domain *d, int port)
 {
 struct evtchn *chn;
-struct domain *d = local_vcpu-domain;
 
 spin_lock(d-event_lock);
 
diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c
index 16ebdb5..0cc43d7 100644
--- a/xen/common/mem_event.c
+++ b/xen/common/mem_event.c
@@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, struct 
mem_event_domain *med)
 }
 
 /* Free domU's event channel and leave the other one unbound */
-free_xen_event_channel(d-vcpu[0], med-xen_port);
+free_xen_event_channel(d, med-xen_port);
 
 /* Unblock all vCPUs */
 for_each_vcpu ( d, v )
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 88526f8..0eb1dd3 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)(
 int alloc_unbound_xen_event_channel(
 struct vcpu *local_vcpu, domid_t remote_domid,
 xen_event_channel_notification_t notification_fn);
-void free_xen_event_channel(
-struct vcpu *local_vcpu, int port);
+void free_xen_event_channel(struct domain *d, int port);
 
 /* Query if event channel is in use by the guest */
 int guest_enabled_event(struct vcpu *v, uint32_t virq);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Konrad Rzeszutek Wilk [mailto:konrad.w...@oracle.com]
 Sent: Saturday, January 10, 2015 4:28 AM
 
 On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote:
  On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich jbeul...@suse.com wrote:
   On 08.01.15 at 16:59, dunl...@umich.edu wrote:
   On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich jbeul...@suse.com wrote:
   the 1st invocation of this interface will save all reported reserved
   regions under domain structure, and later invocation (e.g. from
   hvmloader) gets saved content.
  
   Why would the reserved regions need attaching to the domain
   structure? The combination of (to be) assigned devices and
   global RMRR list always allow reproducing the intended set of
   regions without any extra storage.
  
   So when you say (to be) assigned devices, you mean any device which
   is currently assigned, *or may be assigned at some point in the
   future*?
  
   Yes.
  
   Do you think the extra storage for this VM might possibly be assigned
   this device at some point wouldn't really be that much bigger than
   this VM might possibly map this RMRR at some point in the future?
  
   Since listing devices without RMRR association would be pointless,
   I think a list of devices would require less storage. But see below.
  
   It seems a lot cleaner to me to have the toolstack tell Xen what
   ranges are reserved for RMRR per VM, and then have Xen check again
   when assigning a device to make sure that the RMRRs have already been
   reserved.
  
   With an extra level of what can be got wrong by the admin.
   However, I now realize that doing it this way would allow
   specifying regions not associated with any device on the host
   the guest boots on, but associated with one on a host the guest
   may later migrate to.
 
  I did say the toolstack, not the admin. :-)
 
  At the xl level, I envisioned a single boolean that would say, Make
  my memory layout resemble the host system -- so the MMIO hole would
  be the same size, and all the RMRRs would be reserved.
 
 Like the e820_host=1 ? :-)
 

so this is the extension to report-all, not just for reserved regions but for
all e820 entries. :-)

one thing I'm struggling here (w/ Jan in other threads) is whether reporting
all reserved regions on the host can be a default setting to simplify the 
overall rmrr implementations, given that fact that end user shouldn't set
expectation on actual reserved regions.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

In particular the .rodata.str2 case is relevant for the EFI boot code
(moving around 3k from permanent to init-time sections).

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -170,7 +170,10 @@ _clean_%/: FORCE
 %.o: %.S Makefile
$(CC) $(AFLAGS) -c $ -o $@
 
-SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \
+SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \
+   $(foreach w,1 2 4, \
+   rodata.str$(w).$(a)) \
+   rodata.cst$(a)) \
 $(foreach r,rel rel.ro,data.$(r).local)
 
 $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile



extend list of sections convertible to .init.* ones in init-only objects

In particular the .rodata.str2 case is relevant for the EFI boot code
(moving around 3k from permanent to init-time sections).

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -170,7 +170,10 @@ _clean_%/: FORCE
 %.o: %.S Makefile
$(CC) $(AFLAGS) -c $ -o $@
 
-SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \
+SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \
+   $(foreach w,1 2 4, \
+   rodata.str$(w).$(a)) \
+   rodata.cst$(a)) \
 $(foreach r,rel rel.ro,data.$(r).local)
 
 $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni


Il 12/01/2015 10:31, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.



Thanks for your reply.
I saw other posts about and if I understand good when O_DIRECT patches 
will be in upstream loop driver the data loss risk will be solved, right?


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] xen-ringwatch issues

On Fri, 2015-01-09 at 17:00 -0500, moftah moftah wrote:
 Hi Ian,
 
 
 OK, I have followed your suggestions and created a new patch

Thanks, please submit it in the form describe in the wiki page
http://wiki.xen.org/wiki/Submitting_Xen_Patches which I referenced
before.

In particular, a Signed-off-by is a strict requirement for licensing
reasons, to indicate that you have read the DCO (which is at
http://wiki.xen.org/wiki/Submitting_Xen_Patches#Signing_off_a_patch ).

Please also provide a suitable changelog entry and use a unified diff
(diff -u).

I think rather than [-]* the right addition would be -? (i.e. a single
optional negative sign), unless Python's re syntax is lots different to
what I expect.

 this new patch does fix the issue fully for the negative numbers

 I didnt run the patch on 64bit OS since xenserver has 32bit Dom0

These fields are unsigned int, .i.e. always 32-bits. So I think your if
self.conf  0: self.cons = 4294967296 + self.cons etc will work. The
Internet also suggests ctypes.c_uint32(i).value, dunno if that is
better. In any case writing 4294967296 as 2**32 would be slightly
clearer.

Ian.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 01/11] VMX: VMFUNC and #VE definitions and detection.

On 09/01/15 21:26, Ed White wrote:
 Currently, neither is enabled globally but may be enabled on a per-VCPU
 basis by the altp2m code.

 Everything can be force-disabled globally by specifying vmfunc=0 on the
 Xen command line.

 Remove the check for EPTE bit 63 == zero in ept_split_super_page(), as
 that bit is now hardware-defined.

 Signed-off-by: Ed White edmund.h.wh...@intel.com
 ---
  docs/misc/xen-command-line.markdown |  7 +++
  xen/arch/x86/hvm/vmx/vmcs.c | 40 
 +
  xen/arch/x86/mm/p2m-ept.c   |  1 -
  xen/include/asm-x86/hvm/vmx/vmcs.h  | 16 +++
  xen/include/asm-x86/hvm/vmx/vmx.h   | 13 +++-
  xen/include/asm-x86/msr-index.h |  1 +
  6 files changed, 76 insertions(+), 2 deletions(-)

 diff --git a/docs/misc/xen-command-line.markdown 
 b/docs/misc/xen-command-line.markdown
 index 152ae03..00fbae7 100644
 --- a/docs/misc/xen-command-line.markdown
 +++ b/docs/misc/xen-command-line.markdown
 @@ -1305,6 +1305,13 @@ The optional `keep` parameter causes Xen to continue 
 using the vga
  console even after dom0 has been started.  The default behaviour is to
  relinquish control to dom0.
  
 +### vmfunc (Intel)
 + `= boolean`
 +
 + Default: `true`
 +
 +Use VMFUNC and #VE support if available.
 +
  ### vpid (Intel)
   `= boolean`
  
 diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
 index 9d8033e..4274e92 100644
 --- a/xen/arch/x86/hvm/vmx/vmcs.c
 +++ b/xen/arch/x86/hvm/vmx/vmcs.c
 @@ -50,6 +50,9 @@ boolean_param(unrestricted_guest, 
 opt_unrestricted_guest_enabled);
  static bool_t __read_mostly opt_apicv_enabled = 1;
  boolean_param(apicv, opt_apicv_enabled);
  
 +static bool_t __read_mostly opt_vmfunc_enabled = 1;
 +boolean_param(vmfunc, opt_vmfunc_enabled);

Please can experimental features be off by default.  (I am specifically
looking to avoid the issues we had with apicv getting into stable
releases despite reliably causing problems for migration).

I suspect you will have many interested testers for this featureset, and
it is fine to patch the default later when the feature gets declared stable.

I also wonder whether it might be better to have a vmx= command line
parameter with vmfunc as a subopt, to save gaining an ever increasing
set of related top level parameters?

Other than this, the content of the rest of the patch appears fine.

~Andrew


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c

2015-01-12 Thread Jiang Liu

Commit b81975eade8c (x86, irq: Clean up irqdomain transition code)
breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke
setup_IO_APIC(), so no irqdomains created for IOAPICs and
mp_map_pin_to_irq() fails at the very beginning.

Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize
irqdomain for IOAPICs.

Signed-off-by: Jiang Liu jiang@linux.intel.com
---
Hi Sander,
Could you please help to test this patch against 3.19-rc3?
I have reworked it based Konrad's suggestions.
Thanks!
Gerry
---
 arch/x86/kernel/apic/io_apic.c |   31 +++
 arch/x86/xen/smp.c |1 +
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3f5f60406ab1..81d4faeb8040 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -38,6 +38,8 @@
 #include linux/slab.h
 #include linux/bootmem.h
 
+#include xen/xen.h
+
 #include asm/idle.h
 #include asm/io.h
 #include asm/smp.h
@@ -2165,6 +2167,9 @@ static inline void __init check_timer(void)
unsigned long flags;
int no_pin1 = 0;
 
+   if (!nr_legacy_irqs())
+   return;
+
local_irq_save(flags);
 
/*
@@ -2185,6 +2190,13 @@ static inline void __init check_timer(void)
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
legacy_pic-init(1);
 
+   /*
+* legacy_pic will be changed to null_legacy_pic if init() fails to
+* to detect legacy PIC hardware. Recheck again.
+*/
+   if (!nr_legacy_irqs())
+   goto out;
+
pin1  = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
pin2  = ioapic_i8259.pin;
@@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx)
ioapics[idx].pin_info = NULL;
 }
 
+static void setup_IO_APIC_IDs(void)
+{
+   if (xen_domain())
+   return;
+
+   x86_init.mpparse.setup_ioapic_ids();
+   sync_Arb_IDs();
+}
+
 void __init setup_IO_APIC(void)
 {
int ioapic;
@@ -2382,16 +2403,10 @@ void __init setup_IO_APIC(void)
for_each_ioapic(ioapic)
BUG_ON(mp_irqdomain_create(ioapic));
 
-   /*
- * Set up IO-APIC IRQ routing.
- */
-   x86_init.mpparse.setup_ioapic_ids();
-
-   sync_Arb_IDs();
+   setup_IO_APIC_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
-   if (nr_legacy_irqs())
-   check_timer();
+   check_timer();
 
ioapic_initialized = 1;
 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4c071aeb8417..9f404df64422 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -327,6 +327,7 @@ static void __init xen_smp_prepare_cpus(unsigned int 
max_cpus)
xen_raw_printk(m);
panic(m);
}
+   setup_IO_APIC();
xen_init_lock_cpu(0);
 
smp_store_boot_cpu_info();
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix

On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote:
 3) report-sel vs. report-all

One thing I'm not clear on is whether you are suggesting to reserve RMRR
(either -all or -sel) for every domain by default, or whether the guest
CFG will need to explicitly opt-in, IOW is there a 3rd report-none
option which is the default unless otherwise requested (e.g. by
e820_host=1, or some other new option)?

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c

On 12/01/15 13:39, Jiang Liu wrote:
 Commit b81975eade8c (x86, irq: Clean up irqdomain transition code)
 breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke
 setup_IO_APIC(), so no irqdomains created for IOAPICs and
 mp_map_pin_to_irq() fails at the very beginning.
 
 Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize
 irqdomain for IOAPICs.

Having Xen call setup_IO_APIC() to initialize the irq domains then having to
add special cases to it is just wrong.

The bits of init deferred by mp_register_apic() are also deferred to
two different places which looks odd.

What about something like the following (untested) patch?

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3f5f604..e180680 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -253,8 +253,10 @@ int __init arch_early_ioapic_init(void)
if (!nr_legacy_irqs())
io_apic_irqs = ~0UL;
 
-   for_each_ioapic(i)
+   for_each_ioapic(i) {
+   BUG_ON(mp_irqdomain_create(ioapic));
alloc_ioapic_saved_registers(i);
+   }
 
/*
 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -2379,8 +2381,6 @@ void __init setup_IO_APIC(void)
io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
 
apic_printk(APIC_VERBOSE, ENABLING IO-APIC IRQs\n);
-   for_each_ioapic(ioapic)
-   BUG_ON(mp_irqdomain_create(ioapic));
 
/*
  * Set up IO-APIC IRQ routing.
@@ -2929,7 +2929,8 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
/*
 * If mp_register_ioapic() is called during early boot stage when
 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
-* we are still using bootmem allocator. So delay it to setup_IO_APIC().
+* we are still using bootmem allocator. So delay it to
+* arch_early_ioapic_init().
 */
if (hotplug) {
if (mp_irqdomain_create(idx)) {

 --- a/arch/x86/kernel/apic/io_apic.c
 +++ b/arch/x86/kernel/apic/io_apic.c
 @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx)
   ioapics[idx].pin_info = NULL;
  }
  
 +static void setup_IO_APIC_IDs(void)
 +{
 + if (xen_domain())
 + return;

This would have to xen_pv_domain().

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

On 12/01/15 11:36, Roger Pau Monné wrote:
 El 12/01/15 a les 8.09, Bob Liu ha escrit:

 On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
 El 06/01/15 a les 14.19, Bob Liu ha escrit:
 When there is no enough free grants, gnttab_alloc_grant_references()
 will fail and block request queue will stop.
 If the system is always lack of grants, blkif_restart_queue_callback() 
 can't be
 scheduled and block request queue can't be restart(block I/O hang).

 But when there are former requests complete, some grants may free to
 persistent_gnts_c, we can give the request queue another chance to restart 
 and
 avoid block hang.

 Reported-by: Junxiao Bi junxiao...@oracle.com
 Signed-off-by: Bob Liu bob@oracle.com
 ---
  drivers/block/xen-blkfront.c |   11 +++
  1 file changed, 11 insertions(+)

 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
 index 2236c6f..dd30f99 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
 @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
 struct blkfront_info *info,
}
}
}
 +
 +  /*
 +   * Request queue would be stopped if failed to alloc enough grants and
 +   * won't be restarted until gnttab_free_count = info-callback-count.
 +   *
 +   * But there is another case, once we have enough persistent grants we
 +   * can try to restart the request queue instead of continue to wait for
 +   * 'gnttab_free_count'.
 +   */
 +  if (info-persistent_gnts_c = info-callback.count)
 +  schedule_work(info-work);

 I guess I'm missing something here, but blkif_completion is called by
 blkif_interrupt, which in turn calls kick_pending_request_queues when
 finished, which IMHO should be enough to restart the processing of requests.


 You are right, sorry for the mistake.

 The problem we met was a xenblock I/O hang.
 Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8
 but block request queue was still stopped.
 It's very hard to reproduce this issue, we only see it once.

 I think there might be a race condition:

 request A  request B:

info-persistent_gnts_c  max_grefs
and fail to alloc enough grants


 
 interrupt happen, blkif_complte():
 info-persistent_gnts_c++
 kick_pending_request_queues()

 stop block request queue
 added to callback list

 If the system don't have enough grants(but have enough persistent_gnts),
 request queue would still hang.
 
 Not sure how can this happen, blkif_queue_request explicitly checks that
 persistent_gnts_c  max_grefs before adding the callback and stopping
 the request queue, so in your case the queue should not be blocked. Can
 you dump the state of info-connected?

I think Bob has correctly identified a race.

After calling blk_stop_queue(), check info-persistent_gnts again and
restart the queue if there free grefs.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Li, Liang Z

   Use the 'xl pci-attach $DomU $BDF' command to attach more than one
   PCI devices to the guest, then detach the devices with 'xl
   pci-detach $DomU $BDF', after that, re-attach these PCI devices
   again, an error message will be reported like following:
  
   libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an
   error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
   for device.
  
   The count of calling xen_pt_region_add and xen_pt_region_del are not
   the same will cause the XenPCIPassthroughState and it's related
   QemuOpts object not be released properly.
 
  Thanks for the patch!
 
  From this description, I don't quite understand why the
  memory_region_ref and memory_region_unref calls are wrong.  What do
  you mean by The count of calling xen_pt_region_add and
  xen_pt_region_del are not the same?

I means for some memory regions , only the xen_pt_region_add callback function
was called while the xen_pt_region_del was not called.

  On unplug xen_pt_region_del does not get called?
  Or the memory region argument is not exactly the same as the one
  initially passed to xen_pt_region_add?
 
 
 agree. Liang, could you elaborate how the patch is associated with above
 explanation? :-)


I have verified the following patch can work too:

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d)
 }
 
 out:
-memory_listener_register(s-memory_listener, address_space_memory);
+memory_listener_register(s-memory_listener, 
+ s-dev.bus_master_as);
 memory_listener_register(s-io_listener, address_space_io);
 XEN_PT_LOG(d,
Real physical device %02x:%02x.%d registered successfully!\n,

By further debugging, I found when using 'address_space_memory',  
'xen_pt_region_del' 
won't be called when the memory region's name is not  ' xen-pci-pt-*', when 
using
 ' s-dev.bus_master_as ', there is no such issue.

I think use the device related address space here is more reasonable, but I am 
not sure.
 Could you give some suggestion?

Liang

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Pasi Kärkkäinen

On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote:
On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin kevin.t...@intel.com wrote:
From: George Dunlap
Sent: Thursday, January 08, 2015 8:55 PM

On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap
george.dun...@eu.citrix.com wrote:
If RMRRs almost always happen up above 2G, for example, then a simple
solution that wouldn't require too much work would be to make sure
that the PCI MMIO hole we specify to libxc and to qemu-upstream is big
enough to include all RMRRs. That would satisfy the libxc and qemu
requirements.

If we then store specific RMRRs we want included in xenstore,
hvmloader can put them in the e820 map, and that would satisfy the
hvmloader requirement.

An alternate thing to do here would be to properly fix the
qemu-upstream problem, by making a way for hvmloader to communicate
changes in the gpfn layout to qemu.

Then hvmloader could do the work of moving memory under RMRRs to
higher memory; and libxc wouldn't need to be involved at all.

I think it would also fix our long-standing issues with assigning PCI
devices to qemu-upstream guests, which up until now have only been
worked around.

could you elaborate a bit for that long-standing issue?

So qemu-traditional didn't particularly expect to know the guest
memory layout. qemu-upstream does; it expects to know what areas of
memory are guest memory and what areas of memory are unmapped. If a
read or write happens to a gpfn which *xen* knows is valid, but which
*qemu-upstream* thinks is unmapped, then qemu-upstream will crash.

The problem though is that the guest's memory map is not actually
communicated to qemu-upstream in any way. Originally, qemu-upstream
was only told how much memory the guest had, and it just happens to
choose the same guest memory layout as the libxc domain builder does.
This works, but it is bad design, because if libxc were to change for
some reason, people would have to simply remember to also change the
qemu-upstream layout.

Where this really bites us is in PCI pass-through. The default 4G
MMIO hole is very small; and hvmloader naturally expects to be able to
make this area larger by relocating memory from below 4G to above 4G.
It moves the memory in Xen's p2m, but it has no way of communicating
this to qemu-upstream. So when the guest does an MMIO instuction that
causes qemu-upstream to access that memory, the guest crashes.

There are two work-arounds at the moment:
1. A flag which tells hvmloader not to relocate memory
2. The option to tell qemu-upstream to make the memory hole larger.

Both are just work-arounds though; a proper fix would be to allow
hvmloader some way of telling qemu that the memory has moved, so it
can update its memory map.

This will (I'm pretty sure) have an effect on RMRR regions as well,
for the reasons I've mentioned above: whether make the holes for the
RMRRs in libxc or in hvmloader, if we *move* that memory up to the top
of the address space (rather than, say, just not giving that RAM to
the guest), then qemu-upstream's idea of the guest memory map will be
wrong, and will probably crash at some point.

Having the ability for hvmloader to populate and/or move the memory
around, and then tell qemu-upstream what the resulting map looked
like, would fix both the MMIO-resize issue and the RMRR problem, wrt
qemu-upstream.

Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz?

You can now specify the mmio_hole size for HVM guests when using qemu-upstream:
http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List

Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config,
which allows configuring the MMIO size below 4GB.

Backport pc q35: Add new machine opt max-ram-below-4g:
http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458

x86: hvm: Allow configuration of the size of the mmio_hole.:
http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7

-- Pasi

-George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote:
 In particular the .rodata.str2 case is relevant for the EFI boot code
 (moving around 3k from permanent to init-time sections).

So we go from handling .rodata.str1.{1,2,3,8}
to .rodata.str{1,2,4}.{1,2,4,8,16}?

 Signed-off-by: Jan Beulich jbeul...@suse.com

Acked-by: Ian Campbell ian.campb...@citrix.com



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

 On 12.01.15 at 15:03, ian.campb...@eu.citrix.com wrote:
 On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote:
 In particular the .rodata.str2 case is relevant for the EFI boot code
 (moving around 3k from permanent to init-time sections).
 
 So we go from handling .rodata.str1.{1,2,3,8}
 to .rodata.str{1,2,4}.{1,2,4,8,16}?

These plus .rodata.cst{1,2,4,8,16}.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections

 On 12.01.15 at 14:46, ian.campb...@eu.citrix.com wrote:
 On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote:
 - don't bail when using the last slot of bootinfo.mem.bank[] (due to
   premature incrementing of the array index)
 - GUIDs should be static const (and placed into .init.* whenever
   possible)
 - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
   append one to the message passed to the function
   - Avoid needless use of DisplayUint via __stringify.
 
 Signed-off-by: Jan Beulich jbeul...@suse.com
 
 Acked-by: Ian Campbell ian.campb...@citrix.com
 
 Will you commit yourself or would you like me to?

I will.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

On Mon, Jan 12, 2015 at 12:28 PM, Tian, Kevin kevin.t...@intel.com wrote:
 From: George Dunlap
 Sent: Monday, January 12, 2015 8:14 PM

 On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:23 PM

   On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Monday, January 12, 2015 6:09 PM

On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
the result is related to another open whether we want to block guest
boot for such problem. If 'warn' in domain builder is acceptable, we
don't need to change lowmem for such rare 1GB case, just throws
a warning for unnecessary conflictions (doesn't hurt if user doesn't
assign it).

   And how would you then deal with the one guest needing that
   range reserved?

   if guest needs the range, then report-all or report-sel doesn't matter.
   domain builder throws the warning, and later device assignment will
   fail (or warn w/ override). In reality I think 1GB is rare. Making such
   assumption to simplify implementation is reasonable.

  One of my main problems with all you recent argumentation here
  is the arbitrary use of the 1Gb boundary - there's nothing special
  in this discussion with where the boundary is. Everything revolves
  around the (undue) effect of report-all on domains not needing all
  of the ranges found on the host.

  I'm not sure which part of my argument is not clear here. report-all
  would be a problem here only if we want to fix all the conflictions
  (then pulling unnecessary devices increases the confliction possibility)
  in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
  while warn other conflictions (e.g. 3G) in domain builder (let later
  assignment path to actually fail if confliction does matter), then we
  don't need to solve all conflictions in domain builder (if say 1G example
  fixing it may instead reduce lowmem greatly) and then report-all
  may just add more warnings than report-sel for unused devices.

 You keep saying report-all or report-sel, but I'm not 100% clear
 what you mean by those.  In any case, the naming has got to be a bit
 misleading: the important questions at the moment, AFAICT, are:

 I explained them in original proposal

Yes, I read it and didn't understand it there either. :-)

 1. Whether we make holes at boot time for all RMRRs on the system, or
 whether only make RMRRs for some subset (or potentially some other
 arbitrary range, which may include RMRRs on other hosts to which we
 may want to migrate).

 I use 'report-all' to stand for making holes for all RMRRs on the system,
 while 'report-sel' for specified subset.

 including other RMRRs (from admin for migration) is orthogonal to
 above open.

Right; so the report in this case is report to the guest.

As I said, I think that's confusing terminology; after all, we want to
report to the guest all holes that we make, and only the holes that we
make.  The question isn't then which ones we report, but which ones we
make holes for. :-)

So for this discussion, maybe rmrr-host (meaning, copy RMRRs from
the host) or rmrr-sel (meaning, specify a selection of RMRRs, which
may be from this host, or even another host)?

Given that the ranges may be of arbitrary size, and that we may want
to specify additional ranges for migration to other hosts, then I
think we need at some level we need the machinery to be in place to
specify the RMRRs that will be reserved for a specific guest.

At the xl level, there should of course be a way to specify use all
host RMRRs; but what should happen then is that xl / libxl should
query Xen for the host RMRRs and then pass those down to the next
layer of the library.

 2. Whether those holes are made by the domain builder in libxc, or by
 hvmloader

 based on current discussion, whether to make holes in hvmloader
 doesn't bring fundamental difference. as long as domain builder
 still need to populate memory (even minimal for hvmloader to boot),
 it needs to check conflict and may ideally make hole too (though we
 may make assumption not doing that)

Well it will have an impact on the overall design of the code; but
you're right, if RMRRs really can (and will) be anywhere in memory,
then the domain builder will need to know what RMRRs are going to be
reserved for this VM and avoid populating those.  If, on the other
hand, we can make some fairly small assumptions about where there will
not be any RMRRs, then we can get away with handling everything in
hvmloader.

 3. What happens if Xen is asked to assign a device and it finds that
 the required RMRR is not empty:
  a. during guest creation
  b. after the guest has booted

 for Xen we don't need differentiate a/b. by default it's clear failure
 should be returned as it implies a security/correctness issue if
 moving forward. but based on discussion an override to 'warn' only

Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Paolo Bonzini



On 12/01/2015 14:35, Li, Liang Z wrote:
 
 diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644
 --- a/hw/xen/xen_pt.c
 +++ b/hw/xen/xen_pt.c
 @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d)
  }
  
  out:
 -memory_listener_register(s-memory_listener, address_space_memory);
 +memory_listener_register(s-memory_listener, 
 + s-dev.bus_master_as);
  memory_listener_register(s-io_listener, address_space_io);
  XEN_PT_LOG(d,
 Real physical device %02x:%02x.%d registered 
 successfully!\n,
 
 By further debugging, I found when using 'address_space_memory',  
 'xen_pt_region_del' 
 won't be called when the memory region's name is not  ' xen-pci-pt-*', when 
 using
  ' s-dev.bus_master_as ', there is no such issue.
 
 I think use the device related address space here is more reasonable, but I 
 am not sure.
  Could you give some suggestion?

Yes, this patch makes sense.  The listener will be called every time the
command register is written.

Paolo

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 1/3] x86: also allow REP STOS emulation acceleration

On Fri, Jan 9, 2015 at 12:10 PM, Jan Beulich jbeul...@suse.com wrote:
 On 09.01.15 at 12:45, t...@xen.org wrote:
 At 11:24 + on 09 Jan (1420799087), Jan Beulich wrote:
  On 09.01.15 at 12:18, t...@xen.org wrote:
   +default:
   +xfree(buf);
   +ASSERT(!buf);
 
  looks dodgy...

 In which way? The default is supposed to be unreachable, and sits
 in the else branch to an if(!buf), i.e. in a release build we'll correctly
 free the buffer, while in a debug build the ASSERT() will trigger.

 Oh I see.  Can you please use ASSERT(0) for that?

 I sincerely dislike ASSERT(0), but if that's the only way to get
 the patch accepted...

Um, by what set of criteria is ASSERT(!buf) better than ASSERT(0)?  At
least the second tells the reader that you're not using ASSERT() the
normal way.

I agree that ASSERT_UNREACHABLE() is probably the best option.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [seabios test] 33359: regressions - FAIL

flight 33359 seabios real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33359/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-qemuu-debianhvm-amd64  5 xen-boot fail REGR. vs. 33317

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass

version targeted for testing:
 seabios  301dd092c2d04a5d70c94b9d873d810785e94a84
baseline version:
 seabios  60e0e55f212dadd043ab9e39bee05a48013ddd8f


People who touched revisions under test:
  Kevin O'Connor ke...@koconnor.net


jobs:
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64fail
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-amd64-amd64-xl-win7-amd64   fail
 test-amd64-i386-xl-win7-amd64fail
 test-amd64-i386-xl-credit2   pass
 test-amd64-i386-freebsd10-i386   pass
 test-amd64-amd64-xl-pcipt-intel  fail
 test-amd64-amd64-xl-pvh-intelfail
 test-amd64-i386-rhel6hvm-intel   pass
 test-amd64-i386-qemut-rhel6hvm-intel pass
 test-amd64-i386-qemuu-rhel6hvm-intel pass
 test-amd64-amd64-libvirt fail
 test-amd64-i386-libvirt  fail
 test-amd64-i386-xl-multivcpu pass
 test-amd64-amd64-pairpass
 test-amd64-i386-pair pass
 test-amd64-amd64-xl-sedf-pin

Re: [Xen-devel] [PATCH 00/12] Replace Xen xl parsing/formatting impl

On Fri, 2015-01-09 at 22:03 -0700, Jim Fehlig wrote:
 The first attempt to implement support for parsing/formatting Xen's
 xl disk config format copied Xen's flex-based parser into libvirt, which
 has proved to be challenging in the context of autotools.  But as it turns
 out, Xen provides an interface to the parser via libxlutil.
 
 This series reverts the first attempt, along with subsequent attempts to
 fix it, and replaces it with an implementation based on libxlutil.  The
 first nine patches revert the original implementation and subsequent fixes.
 Patch 10 provides an implemenation based on libxlutil.  Patches 11 and
 12 are basically unchanged from patches 3 and 4 in the first attempt.
 
 One upshot of using libxlutil instead of copying the flex source is
 removing the potential for source divergence.

Thanks for doing this, looks good to me, FWIW.

Is the presence/absence of xen-xl support exposed via virsh anywhere? If
so then I can arrange for my Xen osstest patches for libvirt testing to
use xen-xl when available but still fallback to xen-xm. I've had a look
in virsh capabilities and virsh help domxml-from-native but not
seeing xen-xm, so assuming xen-xl won't magically appear in any of those
places either.

(TBH, this may become moot since I suspect your patches will be well
established by the time my osstest patches hit osstest...)

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

2015-01-12 Thread Roger Pau Monné

El 12/01/15 a les 14.04, David Vrabel ha escrit:
 On 12/01/15 11:36, Roger Pau Monné wrote:
 El 12/01/15 a les 8.09, Bob Liu ha escrit:

 On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
 El 06/01/15 a les 14.19, Bob Liu ha escrit:
 When there is no enough free grants, gnttab_alloc_grant_references()
 will fail and block request queue will stop.
 If the system is always lack of grants, blkif_restart_queue_callback() 
 can't be
 scheduled and block request queue can't be restart(block I/O hang).

 But when there are former requests complete, some grants may free to
 persistent_gnts_c, we can give the request queue another chance to 
 restart and
 avoid block hang.

 Reported-by: Junxiao Bi junxiao...@oracle.com
 Signed-off-by: Bob Liu bob@oracle.com
 ---
  drivers/block/xen-blkfront.c |   11 +++
  1 file changed, 11 insertions(+)

 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
 index 2236c6f..dd30f99 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
 @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
 struct blkfront_info *info,
   }
   }
   }
 +
 + /*
 +  * Request queue would be stopped if failed to alloc enough grants and
 +  * won't be restarted until gnttab_free_count = info-callback-count.
 +  *
 +  * But there is another case, once we have enough persistent grants we
 +  * can try to restart the request queue instead of continue to wait for
 +  * 'gnttab_free_count'.
 +  */
 + if (info-persistent_gnts_c = info-callback.count)
 + schedule_work(info-work);

 I guess I'm missing something here, but blkif_completion is called by
 blkif_interrupt, which in turn calls kick_pending_request_queues when
 finished, which IMHO should be enough to restart the processing of 
 requests.


 You are right, sorry for the mistake.

 The problem we met was a xenblock I/O hang.
 Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8
 but block request queue was still stopped.
 It's very hard to reproduce this issue, we only see it once.

 I think there might be a race condition:

 request A  request B:

info-persistent_gnts_c  max_grefs
and fail to alloc enough grants


 
 interrupt happen, blkif_complte():
 info-persistent_gnts_c++
 kick_pending_request_queues()

blkif_interrupt can never interrupt blkif_queue_request, because it's
holding a spinlock (info-io_lock). If you have seen this trace in the
wild it means something is really wrong and we are calling
blkif_queue_request without acquiring the spinlock and thus without
disabling interrupts.


 stop block request queue
 added to callback list

 If the system don't have enough grants(but have enough persistent_gnts),
 request queue would still hang.

 Not sure how can this happen, blkif_queue_request explicitly checks that
 persistent_gnts_c  max_grefs before adding the callback and stopping
 the request queue, so in your case the queue should not be blocked. Can
 you dump the state of info-connected?
 
 I think Bob has correctly identified a race.
 
 After calling blk_stop_queue(), check info-persistent_gnts again and
 restart the queue if there free grefs.
 
 David
 
 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [xen-4.5-testing test] 33348: tolerable FAIL - PUSHED

flight 33348 xen-4.5-testing real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33348/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-libvirt  9 guest-start  fail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass

version targeted for testing:
 xen  3508d75155dae3154d10cd5c33d44336f6f4bf1c
baseline version:
 xen  36174af3fbeb1b662c0eadbfa193e77f68cc955b


People who touched revisions under test:
  Andrew Cooper andrew.coop...@citrix.com
  Boris Ostrovsky boris.ostrov...@oracle.com
  Ed Swierk eswi...@skyportsystems.com
  Ian Campbell ian.campb...@citrix.com
  Ian Campbell i...@hellion.org.uk
  Ian Jackson ian.jack...@eu.citrix.com
  Ian Jackson i...@mariner.uk.xensource.com
  Jan Beulich jbeul...@suse.com
  Julien Grall julien.gr...@linaro.org
  Kevin Tian kevin.t...@intel.com
  Konrad Rzeszutek Wilk konrad.w...@oracle.com
  Mihai DonÈu mdo...@bitdefender.com
  Olaf Hering o...@aepfle.de
  Robert Hu robert...@intel.com
  RÄzvan Cojocaru rcojoc...@bitdefender.com
  Stefano Stabellini stefano.stabell...@eu.citrix.com
  Wei Liu wei.l...@citrix.com
  Yang Hongyang yan...@cn.fujitsu.com


jobs:
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-rumpuserxen-amd64   pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64

[Xen-devel] [PATCH v2] xenstored: log tdb message via xenstored's logging mechanisms

TDB provides us with a callback for this purpose. Use it in both
xenstored and xs_tdb_dump.

While at it make the existing log() macro tollerate memory failures.

Signed-off-by: Ian Campbell ian.campb...@citrix.com
---
v2: Use tdb_logger consistently.
Did not: move location of talloc_free, since talloc_free(NULL)
returns an error.
---
 tools/xenstore/xenstored_core.c |   39 +--
 tools/xenstore/xs_tdb_dump.c|   12 +++-
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 4eaff57..3fd9a20 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -89,9 +89,14 @@ static void check_store(void);
 #define log(...)   \
do {\
char *s = talloc_asprintf(NULL, __VA_ARGS__);   \
-   trace(%s\n, s);   \
-   syslog(LOG_ERR, %s,  s);  \
-   talloc_free(s); \
+   if (s) {\
+   trace(%s\n, s);   \
+   syslog(LOG_ERR, %s,  s);  \
+   talloc_free(s); \
+   } else {\
+   trace(talloc failure during logging\n);   \
+   syslog(LOG_ERR, talloc failure during logging\n); \
+   }   \
} while (0)
 
 
@@ -1479,13 +1484,35 @@ static void manual_node(const char *name, const char 
*child)
talloc_free(node);
 }
 
+static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
+{
+   va_list ap;
+   char *s;
+
+   va_start(ap, fmt);
+   s = talloc_vasprintf(NULL, fmt, ap);
+   va_end(ap);
+
+   if (s) {
+   trace(TDB: %s\n, s);
+   syslog(LOG_ERR, TDB: %s,  s);
+   if (verbose)
+   xprintf(TDB: %s, s);
+   talloc_free(s);
+   } else {
+   trace(talloc failure during logging\n);
+   syslog(LOG_ERR, talloc failure during logging\n);
+   }
+}
+
 static void setup_structure(void)
 {
char *tdbname;
tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
 
if (!(tdb_flags  TDB_INTERNAL))
-   tdb_ctx = tdb_open(tdbname, 0, tdb_flags, O_RDWR, 0);
+   tdb_ctx = tdb_open_ex(tdbname, 0, tdb_flags, O_RDWR, 0,
+ tdb_logger, NULL);
 
if (tdb_ctx) {
/* XXX When we make xenstored able to restart, this will have
@@ -1516,8 +1543,8 @@ static void setup_structure(void)
talloc_free(tlocal);
}
else {
-   tdb_ctx = tdb_open(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT,
-  0640);
+   tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT,
+ 0640, tdb_logger, NULL);
if (!tdb_ctx)
barf_perror(Could not create tdb file %s, tdbname);
 
diff --git a/tools/xenstore/xs_tdb_dump.c b/tools/xenstore/xs_tdb_dump.c
index b91cdef..9f636f9 100644
--- a/tools/xenstore/xs_tdb_dump.c
+++ b/tools/xenstore/xs_tdb_dump.c
@@ -33,6 +33,15 @@ static char perm_to_char(enum xs_perm_type perm)
'?';
 }
 
+static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
+{
+   va_list ap;
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+}
+
 int main(int argc, char *argv[])
 {
TDB_DATA key;
@@ -41,7 +50,8 @@ int main(int argc, char *argv[])
if (argc != 2)
barf(Usage: xs_tdb_dump tdbfile);
 
-   tdb = tdb_open(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0);
+   tdb = tdb_open_ex(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0,
+ tdb_logger, NULL);
if (!tdb)
barf_perror(Could not open %s, argv[1]);
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH] Add a flight to test OVMF master branch

2015-01-12 Thread Wei Liu

Do the usual stuffs for adding a new branch (ap-* cr-* etc).

Modify ts-xen-build so that it builds Xen with the specified ovmf tree
and revision.

Only build and test on x86 by modifying make-flight and mfi-common.

New branch is added to cr-daily-branch. It exports the tree and
changeset used for the test if set. Otherwise the values in Config.mk
are used.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
 ap-common|5 +
 ap-fetch-version |4 
 ap-fetch-version-old |5 +
 ap-print-url |3 +++
 ap-push  |5 +
 cr-daily-branch  |   14 ++
 cr-for-branches  |2 +-
 cri-common   |1 +
 make-flight  |6 ++
 mfi-common   |5 -
 ts-xen-build |7 +++
 11 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/ap-common b/ap-common
index 96cbd14..e2c3e42 100644
--- a/ap-common
+++ b/ap-common
@@ -51,6 +51,10 @@
 : ${PUSH_TREE_SEABIOS:=$XENBITS:/home/xen/git/osstest/seabios.git}
 : ${BASE_TREE_SEABIOS:=git://xenbits.xen.org/osstest/seabios.git}
 
+: ${TREE_OVMF_UPSTREAM:=https://github.com/tianocore/edk2.git}
+: ${PUSH_TREE_OVMF:=$XENBITS:/home/xen/git/osstest/ovmf.git}
+: ${BASE_TREE_OVMF:=git://xenbits.xen.org/osstest/ovmf.git}
+
 : ${TREE_LINUXFIRMWARE:=git://xenbits.xen.org/osstest/linux-firmware.git}
 : ${PUSH_TREE_LINUXFIRMWARE:=$XENBITS:/home/osstest/ext/linux-firmware.git}
 : 
${UPSTREAM_TREE_LINUXFIRMWARE:=$GIT_KERNEL_ORG/pub/scm/linux/kernel/git/firmware/linux-firmware.git}
@@ -77,6 +81,7 @@ fi
 : ${LOCALREV_LIBVIRT:=daily-cron.$branch}
 : ${LOCALREV_RUMPUSERXEN:=daily-cron.$branch}
 : ${LOCALREV_SEABIOS:=daily-cron.$branch}
+: ${LOCALREV_OVMF:=daily-cron.$branch}
 
 : ${TREEBASE_LINUX_XCP:=http://hg.uk.xensource.com/carbon/trunk/linux-2.6.27}
 
diff --git a/ap-fetch-version b/ap-fetch-version
index f6c65d8..33aaf00 100755
--- a/ap-fetch-version
+++ b/ap-fetch-version
@@ -85,6 +85,10 @@ seabios)
repo_tree_rev_fetch_git seabios \
$TREE_SEABIOS_UPSTREAM master $LOCALREV_SEABIOS
;;
+ovmf)
+   repo_tree_rev_fetch_git ovmf \
+   $TREE_OVMF_UPSTREAM master $LOCALREV_OVMF
+   ;;
 osstest)
if [ x$OSSTEST_USE_HEAD != xy ] ; then
git fetch $HOME/testing.git pretest:ap-fetch 2
diff --git a/ap-fetch-version-old b/ap-fetch-version-old
index 43c997c..7b7f50e 100755
--- a/ap-fetch-version-old
+++ b/ap-fetch-version-old
@@ -30,6 +30,7 @@ select_xenbranch
 : ${BASE_LOCALREV_LIBVIRT:=daily-cron.$branch.old}
 : ${BASE_LOCALREV_RUMPUSERXEN:=daily-cron.$branch.old}
 : ${BASE_LOCALREV_SEABIOS:=daily-cron.$branch.old}
+: ${BASE_LOCALREV_OVMF:=daily-cron.$branch.old}
 
 : ${BASE_TREE_QEMU_UPSTREAM:=${TREE_QEMU_UPSTREAM/\/staging\//\/}}
 
@@ -92,6 +93,10 @@ seabios)
repo_tree_rev_fetch_git seabios \
$BASE_TREE_SEABIOS xen-tested-master $BASE_LOCALREV_SEABIOS
;;
+ovmf)
+   repo_tree_rev_fetch_git ovmf \
+   $BASE_TREE_OVMF xen-tested-master $BASE_LOCALREV_OVMF
+   ;;
 osstest)
if [ x$OSSTEST_USE_HEAD != xy ] ; then
git fetch -f $HOME/testing.git incoming:ap-fetch
diff --git a/ap-print-url b/ap-print-url
index 7b27e1e..3db2375 100755
--- a/ap-print-url
+++ b/ap-print-url
@@ -58,6 +58,9 @@ rumpuserxen)
 seabios)
echo $TREE_SEABIOS_UPSTREAM
;;
+ovmf)
+   echo $TREE_OVMF_UPSTREAM
+   ;;
 osstest)
echo none:;
;;
diff --git a/ap-push b/ap-push
index a2aa747..01089f3 100755
--- a/ap-push
+++ b/ap-push
@@ -36,6 +36,7 @@ TREE_XEN=$PUSH_TREE_XEN
 TREE_LIBVIRT=$PUSH_TREE_LIBVIRT
 TREE_RUMPUSERXEN=$PUSH_TREE_RUMPUSERXEN
 TREE_SEABIOS=$PUSH_TREE_SEABIOS
+TREE_OVMF=$PUSH_TREE_OVMF
 
 if info_linux_tree $branch; then
cd $repos/linux
@@ -92,6 +93,10 @@ seabios)
cd $repos/seabios
git push $TREE_SEABIOS $revision:refs/heads/xen-tested-master
;;
+ovmf)
+   cd $repos/ovmf
+   git push $TREE_OVMF $revision:refs/heads/xen-tested-master
+   ;;
 osstest)
git push $HOME/testing.git $revision:incoming
git push $XENBITS:/home/xen/git/osstest.git $revision:master
diff --git a/cr-daily-branch b/cr-daily-branch
index fc663ce..c7d1071 100755
--- a/cr-daily-branch
+++ b/cr-daily-branch
@@ -146,6 +146,16 @@ if [ x$REVISION_SEABIOS = x ]; then
: REVISION_SEABIOS from Config.mk
 fi
 fi
+if [ x$REVISION_OVMF = x ]; then
+if [ x$tree = xovmf ]; then
+   TREE_OVMF=$TREE_OVMF_UPSTREAM
+   export TREE_OVMF
+   determine_version REVISION_OVMF ovmf OVMF
+   export REVISION_OVMF
+else
+   : REVISION_OVMF from Config.mk
+fi
+fi
 if [ x$REVISION_LIBVIRT = x ]; then
determine_version REVISION_LIBVIRT libvirt LIBVIRT
export REVISION_LIBVIRT
@@ -203,6 +213,10 @@ seabios)
realtree=seabios
NEW_REVISION=$REVISION_SEABIOS
;;
+ovmf)
+   realtree=ovmf
+   NEW_REVISION=$REVISION_OVMF
+   ;;
 *)

Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections

On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote:
 - don't bail when using the last slot of bootinfo.mem.bank[] (due to
   premature incrementing of the array index)
 - GUIDs should be static const (and placed into .init.* whenever
   possible)
 - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
   append one to the message passed to the function
  - Avoid needless use of DisplayUint via __stringify.

 Signed-off-by: Jan Beulich jbeul...@suse.com

Acked-by: Ian Campbell ian.campb...@citrix.com

Will you commit yourself or would you like me to?

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] summary-1 (v2) Design proposal for RMRR fix

 From: Ian Campbell [mailto:ian.campb...@citrix.com]
 Sent: Monday, January 12, 2015 9:42 PM
 
 On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote:
  3) report-sel vs. report-all
 
 One thing I'm not clear on is whether you are suggesting to reserve RMRR
 (either -all or -sel) for every domain by default, or whether the guest
 CFG will need to explicitly opt-in, IOW is there a 3rd report-none
 option which is the default unless otherwise requested (e.g. by
 e820_host=1, or some other new option)?

only when a device is assigned (or potentially prepare for hotplug usage),
and report-all/sel is from hypervisor p.o.v to tell userspace about all RMRR
regions on this platform or just RMRR regions belonging to specified devices.
'e820_host' only makes holes to prepare (more than RMRR requires). finally 
we still need query actual reserved regions reported by hypervisor and 
then mark them reserved in guest e820 table and avoid conflict for PCI BAR 
allocation etc. 

Thanks
Kevin
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [qemu-mainline bisection] complete test-amd64-amd64-xl-qemuu-win7-amd64

branch xen-unstable
xen branch xen-unstable
job test-amd64-amd64-xl-qemuu-win7-amd64
test windows-install

Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://git.qemu.org/qemu.git
Tree: xen git://xenbits.xen.org/xen.git

*** Found and reproduced problem changeset ***

  Bug is in tree:  qemuu git://git.qemu.org/qemu.git
  Bug introduced:  49d2e648e8087d154d8bf8b91f27c8e05e79d5a6
  Bug not present: 60fb1a87b47b14e4ea67043aa56f353e77fbd70a


  commit 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6
  Author: Marcel Apfelbaum marce...@redhat.com
  Date:   Tue Dec 16 16:58:05 2014 +
  
  machine: remove qemu_machine_opts global list
  
  QEMU has support for options per machine, keeping
  a global list of options is no longer necessary.
  
  Signed-off-by: Marcel Apfelbaum marce...@redhat.com
  Reviewed-by: Alexander Graf ag...@suse.de
  Reviewed-by: Greg Bellows greg.bell...@linaro.org
  Message-id: 1418217570-15517-2-git-send-email-marce...@redhat.com
  Signed-off-by: Peter Maydell peter.mayd...@linaro.org


For bisection revision-tuple graph see:
   
http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.qemu-mainline.test-amd64-amd64-xl-qemuu-win7-amd64.windows-install.html
Revision IDs in each graph node refer, respectively, to the Trees above.


Searching for failure / basis pass:
 33123 fail [host=itch-mite] / 32598 ok.
Failure / basis pass flights: 33123 / 32598
(tree with no url: seabios)
Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://git.qemu.org/qemu.git
Tree: xen git://xenbits.xen.org/xen.git
Latest 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
Basis pass 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
7e58e2ac7778cca3234c33387e49577bb7732714 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
Generating revisions with ./adhoc-revtuple-generator  
git://xenbits.xen.org/linux-pvops.git#83a926f7a4e39fb6be0576024e67fe161593defa-83a926f7a4e39fb6be0576024e67fe161593defa
 
git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860
 
git://xenbits.xen.org/staging/qemu-xen-unstable.git#b0d42741f8e9a00854c3b3faca1da84bfc69bf22-b0d42741f8e9a00854c3b3faca1da84bfc69bf22
 
git://git.qemu.org/qemu.git#7e58e2ac7778cca3234c33387e49577bb7732714-ab0302ee764fd702465aef6d88612cdff4302809
 
git://xenbits.xen.org/xen.git#36174af3fbeb1b662c0eadbfa193e77f68cc955b-36174af3fbeb1b662c0eadbfa193e77f68cc955b
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
Loaded 1005 nodes in revision graph
Searching for test results:
 32585 pass irrelevant
 32598 pass 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
7e58e2ac7778cca3234c33387e49577bb7732714 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32611 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32626 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32689 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32659 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32876 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32854 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22

Re: [Xen-devel] [libvirt] [PATCH 09/12] Revert src/xenconfig: Xen-xl parser

2015-01-12 Thread John Ferlan



On 01/10/2015 12:03 AM, Jim Fehlig wrote:
 This reverts commit 2c78051a14acfb7aba078d569b1632dfe0ca0853.
 
 Conflicts:
   src/Makefile.am
 
 Signed-off-by: Jim Fehlig jfeh...@suse.com
 ---
  .gitignore|   1 -
  cfg.mk|   3 +-
  configure.ac  |   1 -
  po/POTFILES.in|   1 -
  src/Makefile.am   |  25 +--
  src/libvirt_xenconfig.syms|   4 -
  src/xenconfig/xen_common.c|   3 +-
  src/xenconfig/xen_xl.c| 499 
 --
  src/xenconfig/xen_xl.h|  33 ---
  src/xenconfig/xen_xl_disk.l   | 256 --
  src/xenconfig/xen_xl_disk_i.h |  39 
  11 files changed, 4 insertions(+), 861 deletions(-)
 

OK - so reverting is fine; however, xen_xl_disk.{c,h} still exist...
Simple enough solution, but they will show up in someone's git status
output since they are also removed from .gitignore.

There are a couple Coverity issues from the next 3 patches - I'll note
them for those directly.

ACK

John


 diff --git a/.gitignore b/.gitignore
 index eac2203..9d09709 100644
 --- a/.gitignore
 +++ b/.gitignore
 @@ -140,7 +140,6 @@
  /src/remote/*_protocol.[ch]
  /src/rpc/virkeepaliveprotocol.[ch]
  /src/rpc/virnetprotocol.[ch]
 -/src/xenconfig/xen_xl_disk.[ch]
  /src/test_libvirt*.aug
  /src/test_virtlockd.aug
  /src/util/virkeymaps.h
 diff --git a/cfg.mk b/cfg.mk
 index 3df3dcb..21f83c3 100644
 --- a/cfg.mk
 +++ b/cfg.mk
 @@ -89,9 +89,8 @@ distdir: sc_vulnerable_makefile_CVE-2012-3386.z
  endif
  
  # Files that should never cause syntax check failures.
 -#  (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$
  VC_LIST_ALWAYS_EXCLUDE_REGEX = \
 -  
 (^(HACKING|docs/(news\.html\.in|.*\.patch)|src/xenconfig/xen_xl_disk.[chl])|\.(po|fig|gif|ico|png))$$
 +  (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$
  
  # Functions like free() that are no-ops on NULL arguments.
  useless_free_options =   \
 diff --git a/configure.ac b/configure.ac
 index 167b875..9d12079 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -146,7 +146,6 @@ m4_ifndef([LT_INIT], [
  ])
  AM_PROG_CC_C_O
  AM_PROG_LD
 -AM_PROG_LEX
  
  AC_MSG_CHECKING([for how to mark DSO non-deletable at runtime])
  LIBVIRT_NODELETE=
 diff --git a/po/POTFILES.in b/po/POTFILES.in
 index 094c8e3..e7cb2cc 100644
 --- a/po/POTFILES.in
 +++ b/po/POTFILES.in
 @@ -247,7 +247,6 @@ src/xenapi/xenapi_driver.c
  src/xenapi/xenapi_utils.c
  src/xenconfig/xen_common.c
  src/xenconfig/xen_sxpr.c
 -src/xenconfig/xen_xl.c
  src/xenconfig/xen_xm.c
  tests/virpolkittest.c
  tools/libvirt-guests.sh.in
 diff --git a/src/Makefile.am b/src/Makefile.am
 index c7975e5..e0e47d0 100644
 --- a/src/Makefile.am
 +++ b/src/Makefile.am
 @@ -1000,22 +1000,11 @@ CPU_SOURCES = 
 \
  VMX_SOURCES =\
   vmx/vmx.c vmx/vmx.h
  
 -AM_LFLAGS = -Pxl_disk_ --header-file=../$*.h
 -LEX_OUTPUT_ROOT = lex.xl_disk_
 -BUILT_SOURCES += xenconfig/xen_xl_disk.c xenconfig/xen_xl_disk.h
 -# Generated header file is not implicitly added to dist
 -EXTRA_DIST += xenconfig/xen_xl_disk.h
 -CLEANFILES += xenconfig/xen_xl_disk.h xenconfig/xen_xl_disk.c
 -
 -XENXLDISKPARSER_SOURCES = xenconfig/xen_xl_disk.l
 -
  XENCONFIG_SOURCES =  \
   xenconfig/xenxs_private.h   \
 - xenconfig/xen_common.c xenconfig/xen_common.h   \
 + xenconfig/xen_common.c xenconfig/xen_common.h   \
   xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h   \
 - xenconfig/xen_xm.c xenconfig/xen_xm.h   \
 - xenconfig/xen_xl.c xenconfig/xen_xl.h   \
 - xenconfig/xen_xl_disk_i.h
 + xenconfig/xen_xm.c xenconfig/xen_xm.h
  
  pkgdata_DATA =   cpu/cpu_map.xml
  
 @@ -1070,19 +1059,10 @@ libvirt_vmx_la_SOURCES = $(VMX_SOURCES)
  endif WITH_VMX
  
  if WITH_XENCONFIG
 -# Flex generated XL disk parser needs to be compiled without WARN_FLAGS
 -# Add the generated object to its own library to control CFLAGS
 -noinst_LTLIBRARIES += libvirt_xenxldiskparser.la
 -libvirt_xenxldiskparser_la_CFLAGS = \
 - -I$(srcdir)/conf $(AM_CFLAGS) -Wno-unused-parameter
 -libvirt_xenxldiskparser_la_SOURCES = \
 - $(XENXLDISKPARSER_SOURCES)
 -
  noinst_LTLIBRARIES += libvirt_xenconfig.la
  libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la
  libvirt_xenconfig_la_CFLAGS = \
   -I$(srcdir)/conf $(AM_CFLAGS)
 -libvirt_xenconfig_la_LIBADD = libvirt_xenxldiskparser.la
  libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES)
  endif WITH_XENCONFIG
  
 @@ -1844,7 +1824,6 @@ EXTRA_DIST +=   
 \
   $(VBOX_DRIVER_EXTRA_DIST)   \
   $(VMWARE_DRIVER_SOURCES)\

Re: [Xen-devel] [RFC V9 2/4] domain snapshot overview

On Mon, 2015-01-12 at 00:01 -0700, Chun Yan Liu wrote:
 
  On 1/8/2015 at 08:26 PM, in message 
  1420719995.19787.62.ca...@citrix.com, Ian
 Campbell ian.campb...@citrix.com wrote: 
  On Mon, 2014-12-22 at 20:42 -0700, Chun Yan Liu wrote: 

On 12/19/2014 at 06:25 PM, in message  
  1418984720.20028.15.ca...@citrix.com, 
   Ian Campbell ian.campb...@citrix.com wrote:  
On Thu, 2014-12-18 at 22:45 -0700, Chun Yan Liu wrote:  
   
  On 12/18/2014 at 11:10 PM, in message   
1418915443.11882.86.ca...@citrix.com,  
 Ian Campbell ian.campb...@citrix.com wrote:   
  On Tue, 2014-12-16 at 14:32 +0800, Chunyan Liu wrote:   
   Changes to V8:   
 * add an overview document, so that one can has a overall look  

   about the whole domain snapshot work, limits, requirements,   
   how to do, etc.   
  
   =
  
   Domain snapshot overview   
 
  I don't see a similar section for disk snapshots, are you not   
  considering those here except as a part of a domain snapshot or is 
  this   
   
  an oversight?   
 
  There are three main use cases (that I know of at least) for   
  snapshotting like behaviour.   
 
  One is as you've mentioned below for backup, i.e. to preserve the 
  VM   
  at a certain point in time in order to be able to roll back to it. 
  Is   
  this the only usecase you are considering?   
   
 Yes. I didn't take disk snapshot thing into the scope.  
   
 
  A second use case is to support gold image type deployments, i.e. 

  where you create one baseline single disk image and then clone it   
  multiple times to deploy lots of guests. I think this is usually a 
  disk  

  snapshot type thing, but maybe it can be implemented as restoring 
  a   
  gold domain snapshot multiple times (e.g. for start of day 
  performance   
  reasons).   
   
 As we initially discussed about the thing, disk snapshot thing can be 
  
  done  
 be existing tools directly like qemu-img, vhd-util.  
  
I was reading this section as a more generic overview of snapshotting,  
without reference to where/how things might ultimately be implemented.  
  
From a design point of view it would be useful to cover the various use 
 
cases, even if the solution is that the user implements them using CLI  
tools by hand (xl) or the toolstack does it for them internally  
(libvirt).  
  
This way we can more clearly see the full picture, which allows us to  
validate that we are making the right choices about what goes where.  

   OK. I see. I think this user case is more like how to use the snapshot,  
  rather 
   than how to implement snapshot. Right? 
   
  Correct, what the user is actually trying to achieve with the 
  functionality. 
   
   'Gold image' or 'Gold domain', the needed work is more like cloning 
   disks. 
   
  Yes, or resuming multiple times. 
 
 I see. But IMO it doesn't need change in snapshot design and implementation.
 Even resuming multiple times, they couldn't use the same image but duplicate
 the image multiple times.

Perhaps, but the use case should be included so that this rationale for
not worrying about it can be written down (so that people like me don't
keep asking...) 

 
   
  The third case, (which is similar to the first), is taking a disk   
  snapshot in order to be able to run you usual backup software on 
  the   
  snapshot (which is now unchanging, which is handy) and then 
  deleting the  

  disk snapshot (this differs from the first case in which disk is 
  active   
   
  after the snapshot, and due to the lack of the memory part).   
   
 Sorry, I'm still not quite clear about what this user case wants to 
 do.  
  
The user has an active domain which they want to backup, but backup  
software often does not cope well if the data is changing under its  
feet.  
  
So the users wants to take a snapshot of the domains disks while 
leaving  
the domain running, so they can backup that static version of the disk  
out of band from the VM itself (e.g. by attaching it to a separate  
backup VM).  

   Got it. So that's simply disk-only snapshot when domian is active. As you 
   mentioned below, that needs guest agent to quiesce the disks. But 
   currently 
   xen hypervisor can't support that, right? 
   
  I don't think that's relevant right now, let me explain: 
   
  I think it's important to consider all the use cases for snapshotting, 
  not because I think they need to be implemented now but to make sure 
  that we don't make any design decisions now which would make it 
  *impossible* to implement it in the future (at least

Re: [Xen-devel] (v2) Design proposal for RMRR fix

On Mon, Jan 12, 2015 at 1:56 PM, Pasi Kärkkäinen pa...@iki.fi wrote:
On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote:
So qemu-traditional didn't particularly expect to know the guest
memory layout. qemu-upstream does; it expects to know what areas of
memory are guest memory and what areas of memory are unmapped. If a
read or write happens to a gpfn which *xen* knows is valid, but which
*qemu-upstream* thinks is unmapped, then qemu-upstream will crash.

There are two work-arounds at the moment:
1. A flag which tells hvmloader not to relocate memory
2. The option to tell qemu-upstream to make the memory hole larger.

Both are just work-arounds though; a proper fix would be to allow
hvmloader some way of telling qemu that the memory has moved, so it
can update its memory map.

Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz?

You can now specify the mmio_hole size for HVM guests when using
qemu-upstream:
http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List

Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config,
which allows configuring the MMIO size below 4GB.

Backport pc q35: Add new machine opt max-ram-below-4g:
http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458

x86: hvm: Allow configuration of the size of the mmio_hole.:
http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7

Yes -- that's workaround #2 above (tell qemu-upstream to make the
memory hole larger). But it's still a work-around, because it
requires the admin to figure out how big a memory hole he needs. With
qemu-traditional, he could just assign whatever devices he wanted, and
hvmloader would make it the right size automatically. Ideally that's
how it would work for qemu-upstream as well.

-George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 4/4] dt-uart: support /chosen/stdout-path property.

On Thu, 2015-01-08 at 13:30 +, Julien Grall wrote:
 On 08/01/15 13:22, Ian Campbell wrote:
  On Thu, 2015-01-08 at 13:15 +, Julien Grall wrote:
  Hi Ian,
 
  On 08/01/15 11:53, Ian Campbell wrote:
  +ret = dt_property_read_string(chosen, stdout-path, 
  stdout);
  +if ( ret = 0 )
  +{
  +printk(Taking dtuart configuration from 
  /chosen/stdout-path\n);
  +if ( strlcpy(opt_dtuart, stdout, sizeof(opt_dtuart))
  + = sizeof(opt_dtuart) )
  +printk(WARNING: /chosen/stdout-path too long, 
  truncated\n);
 
  I would add XENLOG_WARNING here and ...
 
  +}
  +else if ( ret != -EINVAL /* Not present */ )
  +printk(Failed to read /chosen/stdout-path (%d)\n, ret);
 
  XENLOG_ERROR here.
  
  In practice these only go via the earlyprintk mechanism, since the
  console can't be setup yet. I'm not sure it's worthwhile tagging such
  messages.
 
 earlyprintk is transparent for the console code. Tagging may help if we
 decide to implement other kind of console later (VGA, PCI UART...).
 
 Anyway, I doesn't change much things here as the message is tagged as
 WARNING by default. So it will be always printing.

It turns out that none of the existing prints in this function use the
tags, and I think its of marginal use in this context so I don't think
it is necessary to go changing them all, or to only use the tags for
these two messages.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
 the result is related to another open whether we want to block guest
 boot for such problem. If 'warn' in domain builder is acceptable, we
 don't need to change lowmem for such rare 1GB case, just throws
 a warning for unnecessary conflictions (doesn't hurt if user doesn't
 assign it). 

And how would you then deal with the one guest needing that
range reserved?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel

On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote:
 This is RFC because explicitly changes the logic introduced by c/s b34f2c375
 xsm: label xen-consumer event channels, and is only compile tested.
 
 Xen event channels are not internal resources.  They still have one end in a
 domain, and are created at the request of privileged domains.  This logic
 which successfully creates a Xen event channel opens up undesirable failure
 cases with ill-specified XSM policies.
 
 If a domain is permitted to create ioreq servers or memevent listeners, but
 not to create event channels, the ioreq/memevent creation will succeed but
 attempting to bind the returned event channel will fail without any indication
 of a permission error.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Daniel De Graaf dgde...@tycho.nsa.gov
 CC: Keir Fraser k...@xen.org
 CC: Jan Beulich jbeul...@suse.com
 CC: Tim Deegan t...@xen.org
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 ---
  xen/common/event_channel.c |4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
 index cfe4978..89a7d99 100644
 --- a/xen/common/event_channel.c
 +++ b/xen/common/event_channel.c
 @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel(
  chn = evtchn_from_port(d, port);
  
  rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid);
 +if ( rc )
 +goto out;

out here appears to return port, not rc so you aren't returning failure,
but an even more half setup port than before.

And I think you need to free the port on failure too.

  
  chn-state = ECS_UNBOUND;
  chn-xen_consumer = get_xen_consumer(notification_fn);
  chn-notify_vcpu_id = local_vcpu-vcpu_id;
 -chn-u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID;
 +chn-u.unbound.remote_domid = remote_domid;
  
   out:
  spin_unlock(d-event_lock);



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 6:09 PM
 
  On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
  the result is related to another open whether we want to block guest
  boot for such problem. If 'warn' in domain builder is acceptable, we
  don't need to change lowmem for such rare 1GB case, just throws
  a warning for unnecessary conflictions (doesn't hurt if user doesn't
  assign it).
 
 And how would you then deal with the one guest needing that
 range reserved?
 
 if guest needs the range, then report-all or report-sel doesn't matter.
 domain builder throws the warning, and later device assignment will
 fail (or warn w/ override). In reality I think 1GB is rare. Making such
 assumption to simplify implementation is reasonable.

One of my main problems with all you recent argumentation here
is the arbitrary use of the 1Gb boundary - there's nothing special
in this discussion with where the boundary is. Everything revolves
around the (undue) effect of report-all on domains not needing all
of the ranges found on the host.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Stefano Stabellini

On Wed, 24 Dec 2014, Liang Li wrote:
 Use the 'xl pci-attach $DomU $BDF' command to attach more then
 one PCI devices to the guest, then detach the devices with
 'xl pci-detach $DomU $BDF', after that, re-attach these PCI
 devices again, an error message will be reported like following:
 
 libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive
 an error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
 for device.
 
 The count of calling xen_pt_region_add and xen_pt_region_del are
 not the same will cause the XenPCIPassthroughState and it's related
 QemuOpts object not be released properly.

Thanks for the patch!

From this description, I don't quite understand why the
memory_region_ref and memory_region_unref calls are wrong.  What do you
mean by The count of calling xen_pt_region_add and xen_pt_region_del
are not the same?

On unplug xen_pt_region_del does not get called?
Or the memory region argument is not exactly the same as the one
initially passed to xen_pt_region_add?


 Signed-off-by: Liang Li liang.z...@intel.com
 Reported-by: Longtao Pang longtaox.p...@intel.com
 ---
  hw/xen/xen_pt.c | 4 
  1 file changed, 4 deletions(-)
 
 diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
 index c1bf357..523b8a2 100644
 --- a/hw/xen/xen_pt.c
 +++ b/hw/xen/xen_pt.c
 @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, 
 MemoryRegionSection *sec)
  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
   memory_listener);
  
 -memory_region_ref(sec-mr);
  xen_pt_region_update(s, sec, true);
  }
  
 @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, 
 MemoryRegionSection *sec)
   memory_listener);
  
  xen_pt_region_update(s, sec, false);
 -memory_region_unref(sec-mr);
  }
  
  static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
 @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener *l, 
 MemoryRegionSection *sec)
  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
   io_listener);
  
 -memory_region_ref(sec-mr);
  xen_pt_region_update(s, sec, true);
  }
  
 @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, 
 MemoryRegionSection *sec)
   io_listener);
  
  xen_pt_region_update(s, sec, false);
 -memory_region_unref(sec-mr);
  }
  
  static const MemoryListener xen_pt_memory_listener = {
 -- 
 1.9.1
 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

2015-01-12 Thread Roger Pau Monné

El 12/01/15 a les 8.09, Bob Liu ha escrit:
 
 On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
 El 06/01/15 a les 14.19, Bob Liu ha escrit:
 When there is no enough free grants, gnttab_alloc_grant_references()
 will fail and block request queue will stop.
 If the system is always lack of grants, blkif_restart_queue_callback() 
 can't be
 scheduled and block request queue can't be restart(block I/O hang).

 But when there are former requests complete, some grants may free to
 persistent_gnts_c, we can give the request queue another chance to restart 
 and
 avoid block hang.

 Reported-by: Junxiao Bi junxiao...@oracle.com
 Signed-off-by: Bob Liu bob@oracle.com
 ---
  drivers/block/xen-blkfront.c |   11 +++
  1 file changed, 11 insertions(+)

 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
 index 2236c6f..dd30f99 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
 @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
 struct blkfront_info *info,
 }
 }
 }
 +
 +   /*
 +* Request queue would be stopped if failed to alloc enough grants and
 +* won't be restarted until gnttab_free_count = info-callback-count.
 +*
 +* But there is another case, once we have enough persistent grants we
 +* can try to restart the request queue instead of continue to wait for
 +* 'gnttab_free_count'.
 +*/
 +   if (info-persistent_gnts_c = info-callback.count)
 +   schedule_work(info-work);

 I guess I'm missing something here, but blkif_completion is called by
 blkif_interrupt, which in turn calls kick_pending_request_queues when
 finished, which IMHO should be enough to restart the processing of requests.

 
 You are right, sorry for the mistake.
 
 The problem we met was a xenblock I/O hang.
 Dumped data showed at that time info-persistent_gnt_c = 8, max_gref = 8
 but block request queue was still stopped.
 It's very hard to reproduce this issue, we only see it once.
 
 I think there might be a race condition:
 
 request A  request B:
 
info-persistent_gnts_c  max_grefs
and fail to alloc enough grants
 
 
 
 interrupt happen, blkif_complte():
 info-persistent_gnts_c++
 kick_pending_request_queues()
 
 stop block request queue
 added to callback list
 
 If the system don't have enough grants(but have enough persistent_gnts),
 request queue would still hang.

Not sure how can this happen, blkif_queue_request explicitly checks that
persistent_gnts_c  max_grefs before adding the callback and stopping
the request queue, so in your case the queue should not be blocked. Can
you dump the state of info-connected?

Roger.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

 On 12.01.15 at 12:33, andrew.coop...@citrix.com wrote:
 On 12/01/15 08:57, Jan Beulich wrote:
 --- a/xen/include/xen/event.h
 +++ b/xen/include/xen/event.h
 @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
  d-evtchn_port_ops-init(d, evtchn);
  }
  
 -static inline void evtchn_port_set_pending(struct vcpu *v,
 +static inline void evtchn_port_set_pending(struct domain *d,
 +   unsigned int vcpu_id,
 struct evtchn *evtchn)
 
 I would rename this to the, now vacant, evtchn_set_pending().  It takes
 an evtchn* not a port.  (Its sole caller was evtchn_set_pending(), so
 the patch won't grow)

No (and I had actually considered it) - that would get its name out of
sync with all its sibling wrappers.

 Furthermore, all callers except send_guest_vcpu_virq() currently use
 evtchn-notify_vcpu_id to get a struct vcpu* to pass.  I think you can
 drop the vcpu_id parameter and use evtchn-notify_vcpu_id directly,
 which reduces the likelyhood of a bug where the evtchn is bound to one
 vcpu but a caller gets the wrong id and raises the event channel on the
 wrong vcpu.

Generally a nice idea, but it doesn't immediately/obviously fit with
the use in send_guest_vcpu_virq().

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] xl only waits 33 seconds for ballooning to complete

On Thu, Jan 8, 2015 at 1:11 AM, Mike Latimer mlati...@suse.com wrote:
 On Wednesday, January 07, 2015 09:38:31 AM Ian Campbell wrote:
 That's exactly what I was about to suggest as I read the penultimate
 paragraph, i.e. keep waiting so long as some reasonable delta occurs on
 each iteration.

 Thanks, Ian.

 I wonder if there is a future-safe threshold on the amount of delta that
 indicates progress is being made. Should some minimum safe progress amount or
 percentage be set, or is it better to just make sure free memory is increasing
 at the end of each iteration of the loop?

 For example, the following simple change just tracks free_memkb and only
 decrements the retry count if it has not increased since the last check:

 --
 diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
 index ed0d478..4cf2991 100644
 --- a/tools/libxl/xl_cmdimpl.c
 +++ b/tools/libxl/xl_cmdimpl.c
 @@ -2196,7 +2196,7 @@ static int preserve_domain(uint32_t *r_domid,
 libxl_event *event,
  static int freemem(uint32_t domid, libxl_domain_build_info *b_info)
  {
  int rc, retries = 3;
 -uint32_t need_memkb, free_memkb;
 +uint32_t need_memkb, free_memkb, free_memkb_prev = 0;

  if (!autoballoon)
  return 0;
 @@ -2229,7 +2229,10 @@ static int freemem(uint32_t domid,
 libxl_domain_build_info *b_info)
  if (rc  0)
  return rc;

 -retries--;
 +/* only decrement retry count if free_memkb is not increasing */
 +if (free_memkb = free_memkb_prev)
 +retries--;
 +free_memkb_prev = free_memkb;

I would:
1. Reset the retries after a successful increase
2. Not allow free_memkb_prev to go down.

So maybe something like the following?

 if (free_memkb = free_memkb_prev) {
   retries--;
 } else {
   retries = MAX_RETRIES;
   free_memkb_prev = free_memkb;
 }

I'm inclined to say we could add an option to say wait forever, or
to increase the period of the checks; but ultimately at some point
someone (either xl or the human) needs to timeout and say, This is
never going to finish.  10s seems like a very conservative default.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 13:16, kevin.t...@intel.com wrote:
 I've explained this several times. If there's a violation on above 
 assumption 
 from required devices, same for report-all and report-sel. If the violation 
 is 
 caused by unnecessary devices, please note I'm proposing 'warn' here so
 report-all at most just adds more warnings in domain builder. the conflict
 will be caught later if it becomes relevant to be assigned (e.g. thru 
 hotplug).

Since we're apparently not understanding one another, please
explain with a suitable example how you envision things to behave.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 5:51 PM

  On 12.01.15 at 10:41, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 5:33 PM
   On 12.01.15 at 09:46, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Friday, January 09, 2015 6:35 PM
On 09.01.15 at 11:10, kevin.t...@intel.com wrote:
 From: Jan Beulich [mailto:jbeul...@suse.com]
The question isn't about migrating with devices assigned, but about
assigning devices after migration (consider a dual vif + SR-IOV NIC
guest setup where the SR-IOV NIC gets hot-removed before
migration and a new one hot-plugged afterwards).

Furthermore any tying of the guest memory layout to the host's
where the guest first boots is awkward, as post-migration there's
not going to be any reliable correlation between the guest layout
and the new host's.

how can you solve this? like above example, a NIC on node-A leaves
a reserved region in guest e820. now it's hot-removed and then
migrated to node-b. there's no way to update e820 again since it's
only boot structure. then user will still see such awkward regions.
since it's not avoidable, report-all in the summary mail looks not
causing a new problem.

   The solution to this are reserved regions specified in the guest config,
   independent of host characteristics.

   I don't think how reserved regions are specified matter here. My point
   is that when a region is reserved in e820 at boot time, there's no way
   to erase that knowledge in the guest even when devices causing that
   reservation are hot removed later.

  I don't think anyone ever indicated that such erasure would be
  needed/wanted - I'm not sure how you ended up there.

  I ended here to indicate that report-all which gives user more reserved
  regions than necessary is not a weird case since above scenario can also
  create such fact. User shouldn't set expectation about reserved region
  layout. and this argument is necessary to support our proposal of using
  report-all. :-)

 The fact that ranges can't be removed from a guest's memory map
 is irrelevant - there's simply no question that this is that way. The
 main counter argument against report-all remains: It may result in
 unnecessarily little low memory in guests not needing all of the host
 regions to be reserved for them.

the result is related to another open whether we want to block guest
boot for such problem. If 'warn' in domain builder is acceptable, we
don't need to change lowmem for such rare 1GB case, just throws
a warning for unnecessary conflictions (doesn't hurt if user doesn't
assign it). 

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features

On 12/01/15 08:00, Jan Beulich wrote:
 Following the earlier similar change validating CR4 modifications.

 Signed-off-by: Jan Beulich jbeul...@suse.com

Acked-by: Andrew Cooper andrew.coop...@citrix.com

 ---
 v2: consider CR0.PG during restore when checking EFER.LMA

 --- a/xen/arch/x86/hvm/hvm.c
 +++ b/xen/arch/x86/hvm/hvm.c
 @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma
  return 0;
  }
  
 -static bool_t hvm_efer_valid(struct domain *d,
 - uint64_t value, uint64_t efer_validbits)
 +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value,
 + signed int cr0_pg)
  {
 -if ( nestedhvm_enabled(d)  cpu_has_svm )
 -efer_validbits |= EFER_SVME;
 +unsigned int ext1_ecx = 0, ext1_edx = 0;
  
 -return !((value  ~efer_validbits) ||
 - ((sizeof(long) != 8)  (value  EFER_LME)) ||
 - (!cpu_has_svm  (value  EFER_SVME)) ||
 - (!cpu_has_nx  (value  EFER_NX)) ||
 - (!cpu_has_syscall  (value  EFER_SCE)) ||
 - (!cpu_has_lmsl  (value  EFER_LMSLE)) ||
 - (!cpu_has_ffxsr  (value  EFER_FFXSE)) ||
 - ((value  (EFER_LME|EFER_LMA)) == EFER_LMA));
 +if ( cr0_pg  0  !is_hardware_domain(v-domain) )
 +{
 +unsigned int level;
 +
 +ASSERT(v == current);
 +hvm_cpuid(0x8000, level, NULL, NULL, NULL);
 +if ( level = 0x8001 )
 +hvm_cpuid(0x8001, NULL, NULL, ext1_ecx, ext1_edx);
 +}
 +else
 +{
 +ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32];
 +ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32];
 +}
 +
 +if ( (value  EFER_SCE) 
 + !(ext1_edx  cpufeat_mask(X86_FEATURE_SYSCALL)) )
 +return 0;
 +
 +if ( (value  (EFER_LME | EFER_LMA)) 
 + !(ext1_edx  cpufeat_mask(X86_FEATURE_LM)) )
 +return 0;
 +
 +if ( cr0_pg  0  (value  EFER_LMA)  (!(value  EFER_LME) || 
 !cr0_pg) )
 +return 0;
 +
 +if ( (value  EFER_NX)  !(ext1_edx  cpufeat_mask(X86_FEATURE_NX)) )
 +return 0;
 +
 +if ( (value  EFER_SVME) 
 + (!(ext1_ecx  cpufeat_mask(X86_FEATURE_SVM)) ||
 +  !nestedhvm_enabled(v-domain)) )
 +return 0;
 +
 +if ( (value  EFER_LMSLE)  !cpu_has_lmsl )
 +return 0;
 +
 +if ( (value  EFER_FFXSE) 
 + !(ext1_edx  cpufeat_mask(X86_FEATURE_FFXSR)) )
 +return 0;
 +
 +return 1;
  }
  
  /* These reserved bits in lower 32 remain 0 after any load of CR0 */
 @@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma
  struct vcpu *v;
  struct hvm_hw_cpu ctxt;
  struct segment_register seg;
 -uint64_t efer_validbits;
  
  /* Which vcpu is this? */
  vcpuid = hvm_load_instance(h);
 @@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma
  return -EINVAL;
  }
  
 -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA
 -   | EFER_NX | EFER_SCE;
 -if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) )
 +if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) )
  {
  printk(XENLOG_G_ERR HVM%d restore: bad EFER %# PRIx64 \n,
 d-domain_id, ctxt.msr_efer);
 @@ -2936,12 +2966,10 @@ err:
  int hvm_set_efer(uint64_t value)
  {
  struct vcpu *v = current;
 -uint64_t efer_validbits;
  
  value = ~EFER_LMA;
  
 -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE;
 -if ( !hvm_efer_valid(v-domain, value, efer_validbits) )
 +if ( !hvm_efer_valid(v, value, -1) )
  {
  gdprintk(XENLOG_WARNING, Trying to set reserved bit in 
   EFER: %#PRIx64\n, value);





___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] common/memory: fix an XSM error path

On 12/01/15 08:21, Jan Beulich wrote:
 XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
 return the extent at which failure was detected, not error indicators.

 Signed-off-by: Jan Beulich jbeul...@suse.com

Reviewed-by: Andrew Cooper andrew.coop...@citrix.com


 --- a/xen/common/memory.c
 +++ b/xen/common/memory.c
 @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
  return start_extent;
  args.domain = d;
  
 -rc = xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d);
 -if ( rc )
 +if ( xsm_memory_adjust_reservation(XSM_TARGET, current-domain, d) )
  {
  rcu_unlock_domain(d);
 -return rc;
 +return start_extent;
  }
  
  switch ( op )





 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] One question about the hypercall to translate gfn to mfn.

On 09/01/15 08:02, Tian, Kevin wrote:
 From: Tim Deegan [mailto:t...@xen.org]
 Sent: Thursday, January 08, 2015 8:43 PM

 Hi,

 Not really.  The IOMMU tables are also 64-bit so there must be enough
 addresses to map all of RAM.  There shouldn't be any need for these
 mappings to be _contiguous_, btw.  You just need to have one free
 address for each mapping.  Again, following how grant maps work, I'd
 imagine that PVH guests will allocate an unused GFN for each mapping
 and do enough bookkeeping to make sure they don't clash with other GFN
 users (grant mapping, ballooning, c).  PV guests will probably be
 given a BFN by the hypervisor at map time (which will be == MFN in
 practice) and just needs to pass the same BFN to the unmap call later
 (it can store it in the GTT meanwhile).

 if possible prefer to make both consistent, i.e. always finding unused GFN?

 I don't think it will be possible.  PV domains are already using BFNs
 supplied by Xen (in fact == MFN) for backend grant mappings, which
 would conflict with supplying their own for these mappings.  But
 again, I think the kernel maintainers for Xen may have a better idea
 of how these interfaces are used inside the kernel.  For example,
 it might be easy enough to wrap the two systems inside a common API
 inside linux.   Again, following how grant mapping works seems like
 the way forward.

 
 So Konrad, do you have any insight here? :-)

Malcolm took two pages of this notebook explaining to me how he thought
it should work (in combination with his PV IOMMU work), so I'll let him
explain.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH net] drivers: net: xen-netfront: remove residual dead code

On 10/01/15 09:20, Vincenzo Maffione wrote:
 This patch removes some unused arrays from the netfront private
 data structures. These arrays were used in flip receive mode.

Reviewed-by: David Vrabel david.vra...@citrix.com

Thanks.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

2015-01-12 Thread Paul Durrant

 -Original Message-
 From: xen-devel-boun...@lists.xen.org [mailto:xen-devel-
 boun...@lists.xen.org] On Behalf Of Andrew Cooper
 Sent: 12 January 2015 09:47
 To: Xen-devel
 Cc: Andrew Cooper; Keir (Xen.org); Jan Beulich
 Subject: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel()
 to take a domain rather than vcpu
 
 The resource behind an event channel is domain centric rather than vcpu
 centric, and free_xen_event_channel() only follows the vcpu's domain
 pointer.
 
 This change allows mem_event_disable() to avoid arbitrarily referencing
 d-vcpu[0] just to pass the domain.
 
 No functional change.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Keir Fraser k...@xen.org
 CC: Jan Beulich jbeul...@suse.com

Reviewed-by: Paul Durrant paul.durr...@citrix.com

 ---
  xen/arch/x86/hvm/hvm.c |   12 ++--
  xen/common/event_channel.c |4 +---
  xen/common/mem_event.c |2 +-
  xen/include/xen/event.h|3 +--
  4 files changed, 9 insertions(+), 12 deletions(-)
 
 diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
 index 8b06bfd..acfc032 100644
 --- a/xen/arch/x86/hvm/hvm.c
 +++ b/xen/arch/x86/hvm/hvm.c
 @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct
 hvm_ioreq_server *s,
  return 0;
 
   fail3:
 -free_xen_event_channel(v, sv-ioreq_evtchn);
 +free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
   fail2:
  spin_unlock(s-lock);
 @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct
 hvm_ioreq_server *s,
  list_del(sv-list_entry);
 
  if ( v-vcpu_id == 0  s-bufioreq.va != NULL )
 -free_xen_event_channel(v, s-bufioreq_evtchn);
 +free_xen_event_channel(v-domain, s-bufioreq_evtchn);
 
 -free_xen_event_channel(v, sv-ioreq_evtchn);
 +free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
  xfree(sv);
  break;
 @@ -701,9 +701,9 @@ static void
 hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
  list_del(sv-list_entry);
 
  if ( v-vcpu_id == 0  s-bufioreq.va != NULL )
 -free_xen_event_channel(v, s-bufioreq_evtchn);
 +free_xen_event_channel(v-domain, s-bufioreq_evtchn);
 
 -free_xen_event_channel(v, sv-ioreq_evtchn);
 +free_xen_event_channel(v-domain, sv-ioreq_evtchn);
 
  xfree(sv);
  }
 @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu
 *v, domid_t remote_domid,
 
  /* xchg() ensures that only we call free_xen_event_channel(). */
  old_port = xchg(p_port, new_port);
 -free_xen_event_channel(v, old_port);
 +free_xen_event_channel(v-domain, old_port);
  return 0;
  }
 
 diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
 index 7d6de54..cfe4978 100644
 --- a/xen/common/event_channel.c
 +++ b/xen/common/event_channel.c
 @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel(
  }
 
 
 -void free_xen_event_channel(
 -struct vcpu *local_vcpu, int port)
 +void free_xen_event_channel(struct domain *d, int port)
  {
  struct evtchn *chn;
 -struct domain *d = local_vcpu-domain;
 
  spin_lock(d-event_lock);
 
 diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c
 index 16ebdb5..0cc43d7 100644
 --- a/xen/common/mem_event.c
 +++ b/xen/common/mem_event.c
 @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d,
 struct mem_event_domain *med)
  }
 
  /* Free domU's event channel and leave the other one unbound */
 -free_xen_event_channel(d-vcpu[0], med-xen_port);
 +free_xen_event_channel(d, med-xen_port);
 
  /* Unblock all vCPUs */
  for_each_vcpu ( d, v )
 diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
 index 88526f8..0eb1dd3 100644
 --- a/xen/include/xen/event.h
 +++ b/xen/include/xen/event.h
 @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)(
  int alloc_unbound_xen_event_channel(
  struct vcpu *local_vcpu, domid_t remote_domid,
  xen_event_channel_notification_t notification_fn);
 -void free_xen_event_channel(
 -struct vcpu *local_vcpu, int port);
 +void free_xen_event_channel(struct domain *d, int port);
 
  /* Query if event channel is in use by the guest */
  int guest_enabled_event(struct vcpu *v, uint32_t virq);
 --
 1.7.10.4
 
 
 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:
 Il 12/01/2015 10:31, Ian Campbell ha scritto:
  On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
  In the meantime, I saw this:
  http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
  Based on the post above seems that phy will have important risk of data
  loss if I understand good, from Ian Campbell post:
  xl also uses qdisk for raw disk images instead of loop+blkback which
  xend used, because there are concerns that the loop driver can lead to
  data loss (by not implementing direct i/o the underlying device, and/or
  not handling flushes correct, my memory is a bit fuzzy).
  Stefano already corrected me on this in this very thread.
 
  Ian.
 
 
 Thanks for your reply.
 I saw other posts about and if I understand good when O_DIRECT patches 
 will be in upstream loop driver the data loss risk will be solved, right?

Stefano says that O_DIRECT is not needed, only correct barrier semantics
are and he believes those are correctly implemented.

O_DIRECT would be an additional layer of safety perhaps, but sounds to
be not strictly needed.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration

On 12/01/15 08:01, Jan Beulich wrote:
 While the REP MOVS acceleration appears to have helped qemu-traditional
 based guests, qemu-upstream (or really the respective video BIOSes)
 doesn't appear to benefit from that. Instead the acceleration added
 here provides a visible performance improvement during very early HVM
 guest boot.

 Signed-off-by: Jan Beulich jbeul...@suse.com

Acked-by: Andrew Cooper andrew.coop...@citrix.com

 ---
 v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
 Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
 v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
 Andrew. Add output operand telling the compiler that buf is being
 written.

 --- a/xen/arch/x86/hvm/emulate.c
 +++ b/xen/arch/x86/hvm/emulate.c
 @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
  return X86EMUL_OKAY;
  }
  
 +static int hvmemul_rep_stos_discard(
 +void *p_data,
 +enum x86_segment seg,
 +unsigned long offset,
 +unsigned int bytes_per_rep,
 +unsigned long *reps,
 +struct x86_emulate_ctxt *ctxt)
 +{
 +return X86EMUL_OKAY;
 +}
 +
  static int hvmemul_rep_outs_discard(
  enum x86_segment src_seg,
  unsigned long src_offset,
 @@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
  return X86EMUL_OKAY;
  }
  
 +static int hvmemul_rep_stos(
 +void *p_data,
 +enum x86_segment seg,
 +unsigned long offset,
 +unsigned int bytes_per_rep,
 +unsigned long *reps,
 +struct x86_emulate_ctxt *ctxt)
 +{
 +struct hvm_emulate_ctxt *hvmemul_ctxt =
 +container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
 +unsigned long addr;
 +paddr_t gpa;
 +p2m_type_t p2mt;
 +bool_t df = !!(ctxt-regs-eflags  X86_EFLAGS_DF);
 +int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
 +   hvm_access_write, hvmemul_ctxt, 
 addr);
 +
 +if ( rc == X86EMUL_OKAY )
 +{
 +uint32_t pfec = PFEC_page_present | PFEC_write_access;
 +
 +if ( hvmemul_ctxt-seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
 +pfec |= PFEC_user_mode;
 +
 +rc = hvmemul_linear_to_phys(
 +addr, gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
 +}
 +if ( rc != X86EMUL_OKAY )
 +return rc;
 +
 +/* Check for MMIO op */
 +(void)get_gfn_query_unlocked(current-domain, gpa  PAGE_SHIFT, p2mt);
 +
 +switch ( p2mt )
 +{
 +unsigned long bytes;
 +void *buf;
 +
 +default:
 +/* Allocate temporary buffer. */
 +for ( ; ; )
 +{
 +bytes = *reps * bytes_per_rep;
 +buf = xmalloc_bytes(bytes);
 +if ( buf || *reps = 1 )
 +break;
 +*reps = 1;
 +}
 +
 +if ( !buf )
 +buf = p_data;
 +else
 +switch ( bytes_per_rep )
 +{
 +unsigned long dummy;
 +
 +#define CASE(bits, suffix) \
 +case (bits) / 8:   \
 +asm ( rep stos #suffix   \
 +  : =m (*(char (*)[bytes])buf),  \
 +=D (dummy), =c (dummy) \
 +  : a (*(const uint##bits##_t *)p_data), \
 + 1 (buf), 2 (*reps) ); \
 +break
 +CASE(8, b);
 +CASE(16, w);
 +CASE(32, l);
 +CASE(64, q);
 +#undef CASE
 +
 +default:
 +ASSERT_UNREACHABLE();
 +xfree(buf);
 +return X86EMUL_UNHANDLEABLE;
 +}
 +
 +/* Adjust address for reverse store. */
 +if ( df )
 +gpa -= bytes - bytes_per_rep;
 +
 +rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
 +
 +if ( buf != p_data )
 +xfree(buf);
 +
 +switch ( rc )
 +{
 +case HVMCOPY_gfn_paged_out:
 +case HVMCOPY_gfn_shared:
 +return X86EMUL_RETRY;
 +case HVMCOPY_okay:
 +return X86EMUL_OKAY;
 +}
 +
 +gdprintk(XENLOG_WARNING,
 + Failed REP STOS: gpa=%PRIpaddr reps=%lu 
 bytes_per_rep=%u\n,
 + gpa, *reps, bytes_per_rep);
 +/* fall through */
 +case p2m_mmio_direct:
 +return X86EMUL_UNHANDLEABLE;
 +
 +case p2m_mmio_dm:
 +return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
 +   p_data);
 +}
 +}
 +
  static int hvmemul_read_segment(
  enum x86_segment seg,
  struct segment_register *reg,
 @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
  .rep_ins   = hvmemul_rep_ins,
  .rep_outs  = hvmemul_rep_outs,
  .rep_movs  = hvmemul_rep_movs,
 +.rep_stos  = hvmemul_rep_stos,
  .read_segment  = hvmemul_read_segment,
  .write_segment =

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 12:22, kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 6:23 PM
 One of my main problems with all you recent argumentation here
 is the arbitrary use of the 1Gb boundary - there's nothing special
 in this discussion with where the boundary is. Everything revolves
 around the (undue) effect of report-all on domains not needing all
 of the ranges found on the host.
 
 
 I'm not sure which part of my argument is not clear here. report-all
 would be a problem here only if we want to fix all the conflictions
 (then pulling unnecessary devices increases the confliction possibility) 
 in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
 while warn other conflictions (e.g. 3G) in domain builder (let later 
 assignment path to actually fail if confliction does matter),

And have no way for the user to (securely) avoid that failure. Plus
the definition of reasonable here is of course going to be arbitrary.

Jan

 then we 
 don't need to solve all conflictions in domain builder (if say 1G example
 fixing it may instead reduce lowmem greatly) and then report-all 
 may just add more warnings than report-sel for unused devices.
 
 Thanks
 Kevin




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Xen 4.5 Development Update (GA slip by a week), GA slip + 1 day

2015-01-12 Thread Lars Kurth


On 9 Jan 2015, at 18:30, Konrad Rzeszutek Wilk konrad.w...@oracle.com wrote:

 On Fri, Jan 09, 2015 at 01:51:56PM +, Lars Kurth wrote:
 Note that I published the Acknowledgements and most 4.5 documentation is now 
 in place. Bits which you may want to check and fix
 * Spelling of your name if it contains special characters as the scripts 
 that I use nuke them - 
 http://wiki.xenproject.org/wiki/Xen_Project_4.5_Acknowledgements
 * Some additions to 
 http://wiki.xenproject.org/wiki/Xen_Project_Release_Features, which I will do
 
 And we do need to populate the Known Issues section of 
 http://wiki.xenproject.org/wiki/Xen_Project_4.5_Release_Notes
 
 I've added the one that I am aware (systemd).
 
 In 4.4 we also had this 
 http://wiki.xenproject.org/wiki?title=Xen_Project_4.4_Feature_List
 which was linked of the  
 http://wiki.xenproject.org/wiki/Xen_Project_4.4_Release_Notes
 
 I can populate most of htem or we can just reference the blog that will be on
 the release day?
Konrad, thank you. You don't need to: I created a separate page - see 
http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List
We can just link to ti from the release notes
Lars


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 7:37 PM

  On 12.01.15 at 12:22, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:23 PM
  One of my main problems with all you recent argumentation here
  is the arbitrary use of the 1Gb boundary - there's nothing special
  in this discussion with where the boundary is. Everything revolves
  around the (undue) effect of report-all on domains not needing all
  of the ranges found on the host.

  I'm not sure which part of my argument is not clear here. report-all
  would be a problem here only if we want to fix all the conflictions
  (then pulling unnecessary devices increases the confliction possibility)
  in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
  while warn other conflictions (e.g. 3G) in domain builder (let later
  assignment path to actually fail if confliction does matter),

 And have no way for the user to (securely) avoid that failure. Plus
 the definition of reasonable here is of course going to be arbitrary.

 Jan

actually here I didn't get your point then. It's your proposal to make 
reasonable assumption like below:

---
d) Move down the lowmem RAM/MMIO boundary so that a single,
contiguous chunk of lowmem results, with all other RAM moving up
beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
considered here, and I think we can reasonably safely assume that
no RMRRs will ever report ranges above 1Mb but below the host
lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
that the lowmem chunk will always be reasonably big).
---

Thanks
Kevin

  then we
  don't need to solve all conflictions in domain builder (if say 1G example
  fixing it may instead reduce lowmem greatly) and then report-all
  may just add more warnings than report-sel for unused devices.

  Thanks
  Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 On 12.01.15 at 12:41, kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 7:37 PM

  On 12.01.15 at 12:22, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:23 PM
  One of my main problems with all you recent argumentation here
  is the arbitrary use of the 1Gb boundary - there's nothing special
  in this discussion with where the boundary is. Everything revolves
  around the (undue) effect of report-all on domains not needing all
  of the ranges found on the host.

  I'm not sure which part of my argument is not clear here. report-all
  would be a problem here only if we want to fix all the conflictions
  (then pulling unnecessary devices increases the confliction possibility)
  in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
  while warn other conflictions (e.g. 3G) in domain builder (let later
  assignment path to actually fail if confliction does matter),

 And have no way for the user to (securely) avoid that failure. Plus
 the definition of reasonable here is of course going to be arbitrary.

 actually here I didn't get your point then. It's your proposal to make 
 reasonable assumption like below:

 ---
 d) Move down the lowmem RAM/MMIO boundary so that a single,
 contiguous chunk of lowmem results, with all other RAM moving up
 beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
 considered here, and I think we can reasonably safely assume that
 no RMRRs will ever report ranges above 1Mb but below the host
 lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
 that the lowmem chunk will always be reasonably big).

Correct - but my point is that this won't work well with your report-all
mechanism, only with the report-sel one.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote:
 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 6:23 PM

  On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:09 PM

   On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
   the result is related to another open whether we want to block guest
   boot for such problem. If 'warn' in domain builder is acceptable, we
   don't need to change lowmem for such rare 1GB case, just throws
   a warning for unnecessary conflictions (doesn't hurt if user doesn't
   assign it).

  And how would you then deal with the one guest needing that
  range reserved?

  if guest needs the range, then report-all or report-sel doesn't matter.
  domain builder throws the warning, and later device assignment will
  fail (or warn w/ override). In reality I think 1GB is rare. Making such
  assumption to simplify implementation is reasonable.

 One of my main problems with all you recent argumentation here
 is the arbitrary use of the 1Gb boundary - there's nothing special
 in this discussion with where the boundary is. Everything revolves
 around the (undue) effect of report-all on domains not needing all
 of the ranges found on the host.

 I'm not sure which part of my argument is not clear here. report-all
 would be a problem here only if we want to fix all the conflictions
 (then pulling unnecessary devices increases the confliction possibility)
 in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
 while warn other conflictions (e.g. 3G) in domain builder (let later
 assignment path to actually fail if confliction does matter), then we
 don't need to solve all conflictions in domain builder (if say 1G example
 fixing it may instead reduce lowmem greatly) and then report-all
 may just add more warnings than report-sel for unused devices.

You keep saying report-all or report-sel, but I'm not 100% clear
what you mean by those.  In any case, the naming has got to be a bit
misleading: the important questions at the moment, AFAICT, are:

1. Whether we make holes at boot time for all RMRRs on the system, or
whether only make RMRRs for some subset (or potentially some other
arbitrary range, which may include RMRRs on other hosts to which we
may want to migrate).

2. Whether those holes are made by the domain builder in libxc, or by hvmloader

3. What happens if Xen is asked to assign a device and it finds that
the required RMRR is not empty:
 a. during guest creation
 b. after the guest has booted

Obviously at some point some part of the toolstack needs to identify
which RMRRs go with what device, so that either libxc or hvmloader can
make the appropriate holes in the address space; but at that point,
report is not so much the right word as query.  (Obviously we want
to report in the e820 map all RMRRs that we've made holes for in the
guest.)

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

On Mon, 2015-01-12 at 12:13 +, George Dunlap wrote:
 On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:23 PM

   On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Monday, January 12, 2015 6:09 PM

On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
the result is related to another open whether we want to block guest
boot for such problem. If 'warn' in domain builder is acceptable, we
don't need to change lowmem for such rare 1GB case, just throws
a warning for unnecessary conflictions (doesn't hurt if user doesn't
assign it).

   And how would you then deal with the one guest needing that
   range reserved?

   if guest needs the range, then report-all or report-sel doesn't matter.
   domain builder throws the warning, and later device assignment will
   fail (or warn w/ override). In reality I think 1GB is rare. Making such
   assumption to simplify implementation is reasonable.

  One of my main problems with all you recent argumentation here
  is the arbitrary use of the 1Gb boundary - there's nothing special
  in this discussion with where the boundary is. Everything revolves
  around the (undue) effect of report-all on domains not needing all
  of the ranges found on the host.

  I'm not sure which part of my argument is not clear here. report-all
  would be a problem here only if we want to fix all the conflictions
  (then pulling unnecessary devices increases the confliction possibility)
  in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
  while warn other conflictions (e.g. 3G) in domain builder (let later
  assignment path to actually fail if confliction does matter), then we
  don't need to solve all conflictions in domain builder (if say 1G example
  fixing it may instead reduce lowmem greatly) and then report-all
  may just add more warnings than report-sel for unused devices.

 You keep saying report-all or report-sel, but I'm not 100% clear
 what you mean by those.

Is the distinction between all reserved areas and only (selectively)
those which are related to an RMRR? That's how I've been reading it...

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m

 On 10.01.15 at 00:04, edmund.h.wh...@intel.com wrote:
 On 01/09/2015 02:41 PM, Andrew Cooper wrote:
 Having some non-OS part of the guest swap the EPT tables and
 accidentally turn a DMA buffer read-only is not going to end well.
 
 
 The agent can certainly do bad things, and at some level you have to assume it
 is sensible enough not to. However, I'm not sure this is fundamentally more
 dangerous than what a privileged domain can do today using the MEMOP...
 operations, and people are already using those for very similar purposes.

I don't follow - how is what privileged domain can do related to the
proposed changes here (which are - via VMFUNC - at least partially
guest controllable, and that's also the case Andrew mentioned in his
reply)? I'm having a hard time understanding how a P2M stripped of
anything that's not plain RAM can be very useful to a guest. IOW
without such fundamental aspects clarified I don't see a point in
looking at the individual patches (which btw, according to your
wording elsewhere, should have been marked RFC).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold

2015-01-12 Thread Egger, Christoph

On 2015/01/12 9:44, Jan Beulich wrote:
 We've had reports of systems where CMCIs would surface at a relatively
 high rate during certain periods of time, without them apparently
 causing subsequent more severe problems (see Xeon E7-8800/4800/2800
 specification clarification SC1). Give the admin a knob to lower the
 impact on the system logs.
 
 Signed-off-by: Jan Beulich jbeul...@suse.com

A small comment at the bottom, besides of that:

Acked-by: Christoph Egger cheg...@amazon.de

 
 --- a/docs/misc/xen-command-line.markdown
 +++ b/docs/misc/xen-command-line.markdown
 @@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
  
  If set, override Xen's default choice for the platform timer.
  
 +### cmci-threshold
 + `= integer`
 +
 + Default: `2`
 +
 +Specify the event count threshold for raising Corrected Machine Check
 +Interrupts.  Specifying zero disables CMCI handling.
 +
  ### cmos-rtc-probe
   `= boolean`
  
 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c
 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
 @@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
  {
  unsigned msr = MSR_IA32_MCx_CTL2(i);
  u64 val;
 +unsigned int threshold, max_threshold;
 +static unsigned int cmci_threshold = 2;
 +integer_param(cmci-threshold, cmci_threshold);
  
  rdmsrl(msr, val);
  /* Some other CPU already owns this bank. */
 @@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
  goto out;
  }
  
 -val = ~CMCI_THRESHOLD_MASK;
 -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
 -rdmsrl(msr, val);
 +if ( cmci_threshold )
 +{
 +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
 +rdmsrl(msr, val);
 +}
  
  if (!(val  CMCI_EN)) {
  /* This bank does not support CMCI. Polling timer has to handle it. 
 */
  mcabanks_set(i, __get_cpu_var(no_cmci_banks));
 +wrmsrl(msr, val  ~CMCI_THRESHOLD_MASK);
  return 0;
  }
 +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
 +threshold = cmci_threshold;
 +if ( threshold  max_threshold )
 +{
 +   mce_printk(MCE_QUIET,
 +  CMCI: threshold %#x too large for CPU%u bank %u, using 
 %#x\n,
 +  threshold, smp_processor_id(), i, max_threshold);
 +   threshold = max_threshold;
 +}
 +wrmsrl(msr, (val  ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
  mcabanks_set(i, __get_cpu_var(mce_banks_owned));
  out:
  mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h
 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
 @@ -86,9 +86,6 @@
  /* Bitfield of MSR_K8_HWCR register */
  #define K8_HWCR_MCi_STATUS_WREN  (1ULL  18)
  
 -/*Intel Specific bitfield*/
 -#define CMCI_THRESHOLD   0x2
 -
  #define MCi_MISC_ADDRMOD_MASK (0x7UL  6)
  #define MCi_MISC_PHYSMOD(0x2UL  6)

I think these two are also Intel specific bitfields.
Please leave the comment for those.

Christoph


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

On 12/01/15 08:57, Jan Beulich wrote:
 The trivial wrapper evtchn_set_pending() is pretty pointless, as it
 only serves to invoke another wrapper evtchn_port_set_pending(). In
 turn, the latter is kind of inconsistent with its siblings in that is
 takes a struct vcpu * rather than a struct domain * - adjusting this
 allows for more efficient code in the majority of cases.

Reviewed-by: David Vrabel david.vra...@citrix.com

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH RFC] xen-time: decreasing the rating of the xen clocksource below that of the tsc clocksource for dom0's

On 08/01/15 15:06, Imre Palik wrote:
 On 01/07/15 17:30, Ian Campbell wrote:
 On Wed, 2015-01-07 at 17:16 +0100, Imre Palik wrote:
 From: Palik, Imre im...@amazon.de

 In Dom0's the use of the TSC clocksource (whenever it is stable enough to
 be used) instead of the Xen clocksource should not cause any issues, as
 Dom0 VMs never live-migrated.

 Is this still true given that dom0's vcpus are migrated amongst pcpus on
 the host? The tsc are not synchronised on some generations of hardware
 so the result there would be the TSC appearing to do very odd things
 under dom0's feet. Does Linux cope with that or does it not matter for
 some other reason?
 
 First of all, if the initial pcpus are not synchronised, linux won't consider
 TSC as a viable clocksource.
 
 If the initial pcpus are synchronised, but then the dom0 vcpus are migrated to
 unsynchronised pcpus, then hopefully the tsc watchdog catches the issue, and
 the kernel falls back to the xen clocksource.  The issue here is that the
 watchdog can only detect clock skews bigger than 0.0625s/0.5s.  Hopefully this
 is enough to avoid the weirdest things.

I don't think any such hardware exists.  Either TSC is synchronized
across all CPUs or none.

 Also, some parts of the kernel (e.g., scheduling) will always use the paravirt
 clock.  No matter what priorities are set.
 
 So it should be safe for some definition of safe.
 But I was unable to test it on a hardware platform that would hit the 
 problematic
 case described above.

Ok.  Can you list the various time sources and their ratings in the
commit message for clarity.  i.e, to justify 275 (below TSC = 300. above
hpet = 250).

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Monday, January 12, 2015 8:03 PM

  On 12.01.15 at 12:41, kevin.t...@intel.com wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 7:37 PM

   On 12.01.15 at 12:22, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Monday, January 12, 2015 6:23 PM
   One of my main problems with all you recent argumentation here
   is the arbitrary use of the 1Gb boundary - there's nothing special
   in this discussion with where the boundary is. Everything revolves
   around the (undue) effect of report-all on domains not needing all
   of the ranges found on the host.

   I'm not sure which part of my argument is not clear here. report-all
   would be a problem here only if we want to fix all the conflictions
   (then pulling unnecessary devices increases the confliction possibility)
   in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
   while warn other conflictions (e.g. 3G) in domain builder (let later
   assignment path to actually fail if confliction does matter),

  And have no way for the user to (securely) avoid that failure. Plus
  the definition of reasonable here is of course going to be arbitrary.

  actually here I didn't get your point then. It's your proposal to make
  reasonable assumption like below:

  ---
  d) Move down the lowmem RAM/MMIO boundary so that a single,
  contiguous chunk of lowmem results, with all other RAM moving up
  beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
  considered here, and I think we can reasonably safely assume that
  no RMRRs will ever report ranges above 1Mb but below the host
  lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
  that the lowmem chunk will always be reasonably big).

 Correct - but my point is that this won't work well with your report-all
 mechanism, only with the report-sel one.

I've explained this several times. If there's a violation on above assumption 
from required devices, same for report-all and report-sel. If the violation is 
caused by unnecessary devices, please note I'm proposing 'warn' here so
report-all at most just adds more warnings in domain builder. the conflict
will be caught later if it becomes relevant to be assigned (e.g. thru hotplug).

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: Tian, Kevin
 Sent: Monday, January 12, 2015 8:29 PM

  From: George Dunlap
  Sent: Monday, January 12, 2015 8:14 PM

  On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com
 wrote:
   From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Monday, January 12, 2015 6:23 PM

On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
 From: Jan Beulich [mailto:jbeul...@suse.com]
Sent: Monday, January 12, 2015 6:09 PM

 On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
 the result is related to another open whether we want to block guest
 boot for such problem. If 'warn' in domain builder is acceptable, we
 don't need to change lowmem for such rare 1GB case, just throws
 a warning for unnecessary conflictions (doesn't hurt if user doesn't
 assign it).

And how would you then deal with the one guest needing that
range reserved?

if guest needs the range, then report-all or report-sel doesn't matter.
domain builder throws the warning, and later device assignment will
fail (or warn w/ override). In reality I think 1GB is rare. Making such
assumption to simplify implementation is reasonable.

   One of my main problems with all you recent argumentation here
   is the arbitrary use of the 1Gb boundary - there's nothing special
   in this discussion with where the boundary is. Everything revolves
   around the (undue) effect of report-all on domains not needing all
   of the ranges found on the host.

   I'm not sure which part of my argument is not clear here. report-all
   would be a problem here only if we want to fix all the conflictions
   (then pulling unnecessary devices increases the confliction possibility)
   in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
   while warn other conflictions (e.g. 3G) in domain builder (let later
   assignment path to actually fail if confliction does matter), then we
   don't need to solve all conflictions in domain builder (if say 1G example
   fixing it may instead reduce lowmem greatly) and then report-all
   may just add more warnings than report-sel for unused devices.

  You keep saying report-all or report-sel, but I'm not 100% clear
  what you mean by those.  In any case, the naming has got to be a bit
  misleading: the important questions at the moment, AFAICT, are:

 I explained them in original proposal

  1. Whether we make holes at boot time for all RMRRs on the system, or
  whether only make RMRRs for some subset (or potentially some other
  arbitrary range, which may include RMRRs on other hosts to which we
  may want to migrate).

 I use 'report-all' to stand for making holes for all RMRRs on the system,
 while 'report-sel' for specified subset.

more accurate... 'report-sel' for making holes for RMRRs belonging to 
specified devices.

Thanks
Kevin
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [xen-4.5-testing test] 33303: regressions - FAIL

xen.org writes ([xen-4.5-testing test] 33303: regressions - FAIL):
 flight 33303 xen-4.5-testing real [real]
 http://www.chiark.greenend.org.uk/~xensrcts/logs/33303/
 
 Regressions :-(
 
 Tests which did not succeed and are blocking,
 including tests which could not be run:
  test-amd64-i386-qemuu-rhel6hvm-intel  7 redhat-installfail REGR. vs. 
 33264

osstest is very badly overloaded right now so everything is taking a
long time.

To see where the release was at I peeked at the results of the
currently running test (flight 33348) and it looks like it's going to
justify a pass.  I also compared its results to 33285 (a not-too-bad
xen-unstable flight before we branched) and things look reasonably
good there too.

So I think we are going to be good to go.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

On Mon, 2015-01-12 at 11:17 +0100, Fabio Fantoni wrote:
 Il 12/01/2015 11:06, Ian Campbell ha scritto:
  On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:
  Il 12/01/2015 10:31, Ian Campbell ha scritto:
  On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
  In the meantime, I saw this:
  http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
  Based on the post above seems that phy will have important risk of data
  loss if I understand good, from Ian Campbell post:
  xl also uses qdisk for raw disk images instead of loop+blkback which
  xend used, because there are concerns that the loop driver can lead to
  data loss (by not implementing direct i/o the underlying device, and/or
  not handling flushes correct, my memory is a bit fuzzy).
  Stefano already corrected me on this in this very thread.
 
  Ian.
 
  Thanks for your reply.
  I saw other posts about and if I understand good when O_DIRECT patches
  will be in upstream loop driver the data loss risk will be solved, right?
  Stefano says that O_DIRECT is not needed, only correct barrier semantics
  are and he believes those are correctly implemented.
 
 I not understand if manual changes and/or settings are needed, about 
 this I mean:
  only correct barrier semantics are and he believes those are correctly 
  implemented
 If yes what exactly?

If Stefano is correct then it should all just work, no manual steps
needed (if manual steps were needed we wouldn't be taking this patch).

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni


Il 12/01/2015 11:06, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:

Il 12/01/2015 10:31, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.


Thanks for your reply.
I saw other posts about and if I understand good when O_DIRECT patches
will be in upstream loop driver the data loss risk will be solved, right?

Stefano says that O_DIRECT is not needed, only correct barrier semantics
are and he believes those are correctly implemented.


I not understand if manual changes and/or settings are needed, about 
this I mean:

only correct barrier semantics are and he believes those are correctly 
implemented

If yes what exactly?



O_DIRECT would be an additional layer of safety perhaps, but sounds to
be not strictly needed.

Ian.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin kevin.t...@intel.com wrote:
 From: George Dunlap
 Sent: Thursday, January 08, 2015 8:55 PM

 On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap
 george.dun...@eu.citrix.com wrote:
  If RMRRs almost always happen up above 2G, for example, then a simple
  solution that wouldn't require too much work would be to make sure
  that the PCI MMIO hole we specify to libxc and to qemu-upstream is big
  enough to include all RMRRs.  That would satisfy the libxc and qemu
  requirements.
 
  If we then store specific RMRRs we want included in xenstore,
  hvmloader can put them in the e820 map, and that would satisfy the
  hvmloader requirement.

 An alternate thing to do here would be to properly fix the
 qemu-upstream problem, by making a way for hvmloader to communicate
 changes in the gpfn layout to qemu.

 Then hvmloader could do the work of moving memory under RMRRs to
 higher memory; and libxc wouldn't need to be involved at all.

 I think it would also fix our long-standing issues with assigning PCI
 devices to qemu-upstream guests, which up until now have only been
 worked around.


 could you elaborate a bit for that long-standing issue?

So qemu-traditional didn't particularly expect to know the guest
memory layout.  qemu-upstream does; it expects to know what areas of
memory are guest memory and what areas of memory are unmapped.  If a
read or write happens to a gpfn which *xen* knows is valid, but which
*qemu-upstream* thinks is unmapped, then qemu-upstream will crash.

The problem though is that the guest's memory map is not actually
communicated to qemu-upstream in any way.  Originally, qemu-upstream
was only told how much memory the guest had, and it just happens to
choose the same guest memory layout as the libxc domain builder does.
This works, but it is bad design, because if libxc were to change for
some reason, people would have to simply remember to also change the
qemu-upstream layout.

Where this really bites us is in PCI pass-through.  The default 4G
MMIO hole is very small; and hvmloader naturally expects to be able to
make this area larger by relocating memory from below 4G to above 4G.
It moves the memory in Xen's p2m, but it has no way of communicating
this to qemu-upstream.  So when the guest does an MMIO instuction that
causes qemu-upstream to access that memory, the guest crashes.

There are two work-arounds at the moment:
1. A flag which tells hvmloader not to relocate memory
2. The option to tell qemu-upstream to make the memory hole larger.

Both are just work-arounds though; a proper fix would be to allow
hvmloader some way of telling qemu that the memory has moved, so it
can update its memory map.

This will (I'm pretty sure) have an effect on RMRR regions as well,
for the reasons I've mentioned above: whether make the holes for the
RMRRs in libxc or in hvmloader, if we *move* that memory up to the top
of the address space (rather than, say, just not giving that RAM to
the guest), then qemu-upstream's idea of the guest memory map will be
wrong, and will probably crash at some point.

Having the ability for hvmloader to populate and/or move the memory
around, and then tell qemu-upstream what the resulting map looked
like, would fix both the MMIO-resize issue and the RMRR problem, wrt
qemu-upstream.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

On 12/01/15 08:57, Jan Beulich wrote:
 --- a/xen/include/xen/event.h
 +++ b/xen/include/xen/event.h
 @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
  d-evtchn_port_ops-init(d, evtchn);
  }
  
 -static inline void evtchn_port_set_pending(struct vcpu *v,
 +static inline void evtchn_port_set_pending(struct domain *d,
 +   unsigned int vcpu_id,
 struct evtchn *evtchn)

I would rename this to the, now vacant, evtchn_set_pending().  It takes
an evtchn* not a port.  (Its sole caller was evtchn_set_pending(), so
the patch won't grow)

Furthermore, all callers except send_guest_vcpu_virq() currently use
evtchn-notify_vcpu_id to get a struct vcpu* to pass.  I think you can
drop the vcpu_id parameter and use evtchn-notify_vcpu_id directly,
which reduces the likelyhood of a bug where the evtchn is bound to one
vcpu but a caller gets the wrong id and raises the event channel on the
wrong vcpu.

~Andrew

  {
 -v-domain-evtchn_port_ops-set_pending(v, evtchn);
 +d-evtchn_port_ops-set_pending(d-vcpu[vcpu_id], evtchn);
  }
  
  static inline void evtchn_port_clear_pending(struct domain *d,




 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

 From: Stefano Stabellini
 Sent: Monday, January 12, 2015 7:36 PM
 
 On Wed, 24 Dec 2014, Liang Li wrote:
  Use the 'xl pci-attach $DomU $BDF' command to attach more then
  one PCI devices to the guest, then detach the devices with
  'xl pci-detach $DomU $BDF', after that, re-attach these PCI
  devices again, an error message will be reported like following:
 
  libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive
  an error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
  for device.
 
  The count of calling xen_pt_region_add and xen_pt_region_del are
  not the same will cause the XenPCIPassthroughState and it's related
  QemuOpts object not be released properly.
 
 Thanks for the patch!
 
 From this description, I don't quite understand why the
 memory_region_ref and memory_region_unref calls are wrong.  What do
 you
 mean by The count of calling xen_pt_region_add and xen_pt_region_del
 are not the same?
 
 On unplug xen_pt_region_del does not get called?
 Or the memory region argument is not exactly the same as the one
 initially passed to xen_pt_region_add?
 

agree. Liang, could you elaborate how the patch is associated with above 
explanation? :-)

 
  Signed-off-by: Liang Li liang.z...@intel.com
  Reported-by: Longtao Pang longtaox.p...@intel.com
  ---
   hw/xen/xen_pt.c | 4 
   1 file changed, 4 deletions(-)
 
  diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
  index c1bf357..523b8a2 100644
  --- a/hw/xen/xen_pt.c
  +++ b/hw/xen/xen_pt.c
  @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l,
 MemoryRegionSection *sec)
   XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
memory_listener);
 
  -memory_region_ref(sec-mr);
   xen_pt_region_update(s, sec, true);
   }
 
  @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l,
 MemoryRegionSection *sec)
memory_listener);
 
   xen_pt_region_update(s, sec, false);
  -memory_region_unref(sec-mr);
   }
 
   static void xen_pt_io_region_add(MemoryListener *l,
 MemoryRegionSection *sec)
  @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener
 *l, MemoryRegionSection *sec)
   XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
io_listener);
 
  -memory_region_ref(sec-mr);
   xen_pt_region_update(s, sec, true);
   }
 
  @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l,
 MemoryRegionSection *sec)
io_listener);
 
   xen_pt_region_update(s, sec, false);
  -memory_region_unref(sec-mr);
   }
 
   static const MemoryListener xen_pt_memory_listener = {
  --
  1.9.1
 
 
 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m

Ed White writes ([PATCH 00/11] Alternate p2m: support multiple copies of host 
p2m):
 This set of patches adds support to hvm domains for EPTP switching
 by creating multiple copies of the host p2m (currently limited to 10
 copies).

Thanks for this.  Did you CC me in my capacity as tools maintainer ?
I can't see anything in this series which deals with any necessary
tools changes.

Are there tools parts to come later ?

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] (v2) Design proposal for RMRR fix

 From: George Dunlap
 Sent: Monday, January 12, 2015 8:14 PM

 On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin kevin.t...@intel.com wrote:
  From: Jan Beulich [mailto:jbeul...@suse.com]
  Sent: Monday, January 12, 2015 6:23 PM

   On 12.01.15 at 11:12, kevin.t...@intel.com wrote:
From: Jan Beulich [mailto:jbeul...@suse.com]
   Sent: Monday, January 12, 2015 6:09 PM

On 12.01.15 at 10:56, kevin.t...@intel.com wrote:
the result is related to another open whether we want to block guest
boot for such problem. If 'warn' in domain builder is acceptable, we
don't need to change lowmem for such rare 1GB case, just throws
a warning for unnecessary conflictions (doesn't hurt if user doesn't
assign it).

   And how would you then deal with the one guest needing that
   range reserved?

   if guest needs the range, then report-all or report-sel doesn't matter.
   domain builder throws the warning, and later device assignment will
   fail (or warn w/ override). In reality I think 1GB is rare. Making such
   assumption to simplify implementation is reasonable.

  One of my main problems with all you recent argumentation here
  is the arbitrary use of the 1Gb boundary - there's nothing special
  in this discussion with where the boundary is. Everything revolves
  around the (undue) effect of report-all on domains not needing all
  of the ranges found on the host.

  I'm not sure which part of my argument is not clear here. report-all
  would be a problem here only if we want to fix all the conflictions
  (then pulling unnecessary devices increases the confliction possibility)
  in the domain builder. but if we only fix reasonable ones (e.g. 3GB)
  while warn other conflictions (e.g. 3G) in domain builder (let later
  assignment path to actually fail if confliction does matter), then we
  don't need to solve all conflictions in domain builder (if say 1G example
  fixing it may instead reduce lowmem greatly) and then report-all
  may just add more warnings than report-sel for unused devices.

 You keep saying report-all or report-sel, but I'm not 100% clear
 what you mean by those.  In any case, the naming has got to be a bit
 misleading: the important questions at the moment, AFAICT, are:

I explained them in original proposal

 1. Whether we make holes at boot time for all RMRRs on the system, or
 whether only make RMRRs for some subset (or potentially some other
 arbitrary range, which may include RMRRs on other hosts to which we
 may want to migrate).

I use 'report-all' to stand for making holes for all RMRRs on the system,
while 'report-sel' for specified subset.

including other RMRRs (from admin for migration) is orthogonal to
above open.

 2. Whether those holes are made by the domain builder in libxc, or by
 hvmloader

based on current discussion, whether to make holes in hvmloader
doesn't bring fundamental difference. as long as domain builder
still need to populate memory (even minimal for hvmloader to boot),
it needs to check conflict and may ideally make hole too (though we
may make assumption not doing that)

 3. What happens if Xen is asked to assign a device and it finds that
 the required RMRR is not empty:
  a. during guest creation
  b. after the guest has booted

for Xen we don't need differentiate a/b. by default it's clear failure
should be returned as it implies a security/correctness issue if
moving forward. but based on discussion an override to 'warn' only
is preferred, so admin can make decision (remains an open on
whether to do global override or per-device override)

 Obviously at some point some part of the toolstack needs to identify
 which RMRRs go with what device, so that either libxc or hvmloader can
 make the appropriate holes in the address space; but at that point,
 report is not so much the right word as query.  (Obviously we want
 to report in the e820 map all RMRRs that we've made holes for in the
 guest.)

yes, using 'report' doesn't catch all the changes we need to make. Just
use them to simplify discussion in case all are on the same page. However
clearly my original explanation didn't make it. :/

and state my major intention again. I don't think the preparation (i.e.
detect confliction and make holes) for device assignment should be a 
a blocking failure. Throw warning should be enough (i.e. in libxc). We
should let actual device assignment path to make final call based on
admin's configuration (default 'fail' w/ 'warn' override). Based on that
policy I think 'report-all' (making holes for all host RMRRs) is an
acceptable approach, w/ small impact on possibly more warning 
messages (actually not bad to help admin understand the hotplug
possibility on this platform) and show more reserved regions to the
end user (but he shouldn't make any assumption here). :-)

Thanks
Kevin
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

Ian Campbell writes (Re: [Xen-devel] [OSSTEST PATCH] 
mg-debian-installer-update: produce deterministic output):
 We might like to consider something along these lines for the future:

A reasonable idea (although I wonder if it should be configurable).

Acked-by: Ian Jackson ian.jack...@eu.citrix.com

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

 On 12.01.15 at 10:46, andrew.coop...@citrix.com wrote:
 The resource behind an event channel is domain centric rather than vcpu
 centric, and free_xen_event_channel() only follows the vcpu's domain 
 pointer.

I wonder whether for symmetry alloc_unbound_xen_event_channel()
shouldn't then also take a [struct domain *, unsigned int vcpu_id]
pair instead of a struct vcpu *.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel

On 12/01/15 10:22, Ian Campbell wrote:
 On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote:
 This is RFC because explicitly changes the logic introduced by c/s b34f2c375
 xsm: label xen-consumer event channels, and is only compile tested.

 Xen event channels are not internal resources.  They still have one end in a
 domain, and are created at the request of privileged domains.  This logic
 which successfully creates a Xen event channel opens up undesirable failure
 cases with ill-specified XSM policies.

 If a domain is permitted to create ioreq servers or memevent listeners, but
 not to create event channels, the ioreq/memevent creation will succeed but
 attempting to bind the returned event channel will fail without any 
 indication
 of a permission error.

 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Daniel De Graaf dgde...@tycho.nsa.gov
 CC: Keir Fraser k...@xen.org
 CC: Jan Beulich jbeul...@suse.com
 CC: Tim Deegan t...@xen.org
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 ---
  xen/common/event_channel.c |4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
 index cfe4978..89a7d99 100644
 --- a/xen/common/event_channel.c
 +++ b/xen/common/event_channel.c
 @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel(
  chn = evtchn_from_port(d, port);
  
  rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid);
 +if ( rc )
 +goto out;
 out here appears to return port, not rc so you aren't returning failure,
 but an even more half setup port than before.

Ah yes - so I am.  rc was intended.  All callers do correctly check for
 0 to indicate failure.


 And I think you need to free the port on failure too.

At the point that we bail, chn-state is still ECS_FREE so there is
nothing to deallocate.

~Andrew


  
  chn-state = ECS_UNBOUND;
  chn-xen_consumer = get_xen_consumer(notification_fn);
  chn-notify_vcpu_id = local_vcpu-vcpu_id;
 -chn-u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID;
 +chn-u.unbound.remote_domid = remote_domid;
  
   out:
  spin_unlock(d-event_lock);




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation

On 12/01/15 08:23, Jan Beulich wrote:
 While for us it's not as bad as it was for Linux, their commit
 13e457e0ee (KVM: x86: Emulator does not decode clflush well, by
 Nadav Amit na...@cs.technion.ac.il) nevertheless points out two
 shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
 a memory mode (otherwise it's SFENCE) and when there's no REP prefix
 (an operand size prefix is fine, as that's CLFLUSHOPT).

 Signed-off-by: Jan Beulich jbeul...@suse.com

Acked-by: Andrew Cooper andrew.coop...@citrix.com


 --- a/xen/arch/x86/x86_emulate/x86_emulate.c
 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
 @@ -4400,7 +4400,9 @@ x86_emulate(
  case 0xae: /* Grp15 */
  switch ( modrm_reg  7 )
  {
 -case 7: /* clflush */
 +case 7: /* clflush{,opt} */
 +fail_if(modrm_mod == 3);
 +fail_if(rep_prefix());
  fail_if(ops-wbinvd == NULL);
  if ( (rc = ops-wbinvd(ctxt)) != 0 )
  goto done;





 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

Ian Campbell writes ([OSSTEST PATCH] mg-debian-installer-update: produce 
deterministic output):
 Currently rerunning mg-debian-install-update when the external files
 have changed still produces differences in the local files produced
  ^
Missing not ?

 during post-processing.
 
 Avoid these differences by:
 
   - Using gzip -n, which avoids storing a timestamp in the gzip
 header (as well as the name, which we don't need).
   - Using pax -M norm, which normalises all timestamps (among other
 things, such as the owner, which we don't care about)
   - Using tar --mtime, with a reference within the dpkg-deb created
 hierarchy (which has timestamps from the package and is therefore
 dependent only on the downloaded package revision)
 
 With this the results of two invocations of
 mg-debian-installer-update(-all) are identical (assuming no changes to
 the downloaded files) as demonstrated by runnign this quick hack:

Excellent.  Apart from the missing word in the description,

Acked-by: Ian Jackson ian.jack...@eu.citrix.com

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

On Mon, 2015-01-12 at 12:27 +, Ian Jackson wrote:
 Ian Campbell writes (Re: [Xen-devel] [OSSTEST PATCH] 
 mg-debian-installer-update: produce deterministic output):
  We might like to consider something along these lines for the future:
 
 A reasonable idea (although I wonder if it should be configurable).

I did consider just inserting a $CURL_OPTS which could be set to include
pragma no-cache from the command line. I mostly didn't because I was too
lazy to think about the correct shell quoting at the time...

 
 Acked-by: Ian Jackson ian.jack...@eu.citrix.com
 
 Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] tools: libxl: do not leak diskpath during local disk attach

Ian Jackson writes (Re: [PATCH] tools: libxl: do not leak diskpath during 
local disk attach):
 I will apply this patch to staging and queue it for backport.

Backported to 4.3 and 4.4.

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface - for Xen 4.3

2015-01-12 Thread Charles Arnold

With changeset 22885 support was added for setting the MTU in the vif-bridge
script for when a vif interface was set to 'online'.  The was not done for the
'add' operation.  The 'add' operation was added to the script for when tap
devices were specified (c/s 21944). With the setting of the MTU for the
'online' case was there a reason for omitting the 'add'?

This patch sets the MTU for both 'online' and 'add' in the vif-bridge script.

Backport for Xen version 4.3

Signed-off-by: Charles Arnold carn...@suse.com

diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge
index f489519..da8a0f8 100644
--- a/tools/hotplug/Linux/vif-bridge
+++ b/tools/hotplug/Linux/vif-bridge
@@ -82,11 +82,7 @@ fi
 case $command in
 online)
 setup_virtual_bridge_port $dev
-mtu=`ip link show $bridge | awk '/mtu/ { print $5 }'`
-if [ -n $mtu ]  [ $mtu -gt 0 ]
-then
-ip link set $dev mtu $mtu || :
-fi
+set_mtu $bridge $dev
 add_to_bridge $bridge $dev
 ;;
 
@@ -97,6 +93,7 @@ case $command in
 
 add)
 setup_virtual_bridge_port $dev
+set_mtu $bridge $dev
 add_to_bridge $bridge $dev
 ;;
 esac
diff --git a/tools/hotplug/Linux/xen-network-common.sh 
b/tools/hotplug/Linux/xen-network-common.sh
index 8cff156..9a9526b 100644
--- a/tools/hotplug/Linux/xen-network-common.sh
+++ b/tools/hotplug/Linux/xen-network-common.sh
@@ -132,3 +132,14 @@ add_to_bridge () {
 ip link set ${dev} up
 }
 
+# Usage: set_mtu bridge dev
+set_mtu () {
+local bridge=$1
+local dev=$2
+mtu=`ip link show ${bridge}| awk '/mtu/ { print $5 }'`
+if [ -n $mtu ]  [ $mtu -gt 0 ]
+then
+ip link set ${dev} mtu $mtu || :
+fi
+}
+



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2] libxl: Don't ignore error when we fail to give access to ioport/irq/iomem

2015-01-12 Thread Wei Liu

On Mon, Jan 12, 2015 at 04:35:19PM +, Ian Campbell wrote:
 On Fri, 2015-01-09 at 16:06 +, Wei Liu wrote:
  On Fri, Jan 09, 2015 at 03:56:45PM +, Julien Grall wrote:
   If we fail to give the access, the domain will unlikely work correctly.
   So we should bail out at the first error.

   Signed-off-by: Julien Grall julien.gr...@linaro.org
   Cc: Ian Jackson ian.jack...@eu.citrix.com
   Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com
   Cc: Ian Campbell ian.campb...@citrix.com

  Acked-by: Wei Liu wei.l...@citrix.com

 Applied, thanks.

 Should this be backported?

Yes. This should be backported.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface - for Xen 4.4, 4.5, and unstable

2015-01-12 Thread Charles Arnold

Add quotes around $bridge and $dev to handle spaces in names.
This should go into 4.4, 4.5 and unstable.

Signed-off-by: Charles Arnold carn...@suse.com

diff --git a/tools/hotplug/Linux/vif-bridge b/tools/hotplug/Linux/vif-bridge
index 3d72ca4..8b67b0a 100644
--- a/tools/hotplug/Linux/vif-bridge
+++ b/tools/hotplug/Linux/vif-bridge
@@ -77,7 +77,7 @@ fi
 case $command in
 online)
 setup_virtual_bridge_port $dev
-set_mtu $bridge $dev
+set_mtu $bridge $dev
 add_to_bridge $bridge $dev
 ;;
 
@@ -88,7 +88,7 @@ case $command in
 
 add)
 setup_virtual_bridge_port $dev
-set_mtu $bridge $dev
+set_mtu $bridge $dev
 add_to_bridge $bridge $dev
 ;;
 esac



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [libvirt] [PATCH 10/12] Introduce support for parsing/formatting Xen xl config format

2015-01-12 Thread Jim Fehlig

John Ferlan wrote:
 On 01/10/2015 12:03 AM, Jim Fehlig wrote:
   
 Introduce a parser/formatter for the xl config format.  Since the
 deprecation of xm/xend, the VM config file format has diverged as
 new features are added to libxl.  This patch adds support for parsing
 and formating the xl config format.  It supports the existing xm config
 format, plus adds support for spice graphics and xl disk config syntax.

 Disk config is specified a bit differently in xl as compared to xm.  In
 xl, disk config consists of comma-separated positional parameters and
 keyword/value pairs separated by commas. Positional parameters are
 specified as follows

target, format, vdev, access

 Supported keys for key=value options are

   devtype, backend, backendtype, script, direct-io-safe,

 The positional paramters can also be specified in key/value form.  For
 example the following xl disk config are equivalent

 /dev/vg/guest-volume,,hda
 /dev/vg/guest-volume,raw,hda,rw
 format=raw, vdev=hda, access=rw, target=/dev/vg/guest-volume

 See $xen_sources/docs/misc/xl-disk-configuration.txt for more details.

 xl disk config is parsed with the help of xlu_disk_parse() from
 libxlutil, libxl's utility library.  Although the library exists
 in all Xen versions supported by the libxl virt driver, only
 recently has the corresponding header file been included.  A check
 for the header is done in configure.ac.  If not found, xlu_disk_parse()
 is declared externally.

 Signed-off-by: Kiarie Kahurani davidkiar...@gmail.com
 Signed-off-by: Jim Fehlig jfeh...@suse.com
 ---
  configure.ac   |   3 +
  po/POTFILES.in |   1 +
  src/Makefile.am|   4 +-
  src/libvirt_xenconfig.syms |   4 +
  src/xenconfig/xen_common.c |   3 +-
  src/xenconfig/xen_xl.c | 492 
 +
  src/xenconfig/xen_xl.h |  33 +++
  7 files changed, 538 insertions(+), 2 deletions(-)

 

 The following is just from the Coverity check...  I don't have all the
 build environments that have proved to be problematic over the last week
 or so...

 I assume all you've done is take the generated code and use that rather
 than going through the problems as a result of attempting to generate
 the code.
   

In place of the generated code I'm using libxlutil from Xen.  The
initial series copied Xen's flex parser for parsing disk config strings,
but thankfully Ian Campbell noticed this and suggested using Xen's
libxlutil instead, which is provided specifically for this purpose.

Thanks for the review.  I'll address the issues you've noted in the next
version.

Regards,
Jim


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH for-4.6] libxl, hotplug/Linux: default to phy backend for raw format file

On Thu, 2015-01-08 at 16:47 +, Stefano Stabellini wrote:
 On Thu, 8 Jan 2015, Ian Campbell wrote:
  On Thu, 2015-01-08 at 16:07 +, Wei Liu wrote:
   On Thu, Jan 08, 2015 at 02:07:42PM +, Ian Campbell wrote:
On Wed, 2014-11-26 at 16:55 +, Wei Liu wrote:
 Modify libxl and hotplug script to allow raw format file to use phy
 backend.
 
 The block script now tests the path and determine the actual type of
 file (block device or regular file) then use the actual type to
 determine which branch to run.
 
 With these changes, plus the current ordering of backend preference 
 (phy
  qdisk  tap), we will use phy backend for raw format file by 
  default.

http://lists.xen.org/archives/html/xen-devel/2012-04/msg00077.html
includes (in the quotes, Stefano's reply is about something else but has
conveniently trimmed most of the other uninteresting stuff):
use /dev/loop+blkback. This requires loop driver AIO and
O_DIRECT patches which are not (AFAIK) yet upstream.

and I have it in my mind that using /dev/loop+blkback is somehow unsafe,
for reasons relating to crash consistency and the proper implementation
(in /dev/loop, blkback is good I think) of barriers and such, e.g.
relating to whether data is really on the platter or not when we've to
the frontend that it is (which is critical for proper operation of
journalling file systems).

It's entirely possible that I'm either plain wrong or a decade out of
date on this though. CC-ing Konrad in case he has any insights as
blkback maintainer (I think, MAINTAINERS doesn't have a specific entry)

   
   Too bad, as far as I can tell AIO and O_DIRECT are still missing in loop
   device. I guess we will have to wait until those two things are
   upstreamed.
   
  
  I'm kinda hoping someone will tell me I'm wrong...
 
 Even without O_DIRECT, using /dev/loop+blkback is safe if the barriers
 in the block protocol are implemented correctly. Last time I look, they
 are, so I think we should be able to switch to it.

Nobody has contradicted you in this, so I've applied the resend of
this patch (from
1420724813-17920-1-git-send-email-wei.l...@citrix.com)



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Backport request for tools/hotplug: set mtu from bridge for tap interface

2015-01-12 Thread Charles Arnold

Forgot to put [Patch] in the subject line for the last two replies :(
- Charles



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 4/4] dt-uart: support /chosen/stdout-path property.

2015-01-12 Thread Julien Grall

Hi Ian,

On 12/01/15 15:22, Ian Campbell wrote:
 On Thu, 2015-01-08 at 13:30 +, Julien Grall wrote:
 On 08/01/15 13:22, Ian Campbell wrote:
 On Thu, 2015-01-08 at 13:15 +, Julien Grall wrote:
 Hi Ian,

 On 08/01/15 11:53, Ian Campbell wrote:
 +ret = dt_property_read_string(chosen, stdout-path, 
 stdout);
 +if ( ret = 0 )
 +{
 +printk(Taking dtuart configuration from 
 /chosen/stdout-path\n);
 +if ( strlcpy(opt_dtuart, stdout, sizeof(opt_dtuart))
 + = sizeof(opt_dtuart) )
 +printk(WARNING: /chosen/stdout-path too long, 
 truncated\n);

 I would add XENLOG_WARNING here and ...

 +}
 +else if ( ret != -EINVAL /* Not present */ )
 +printk(Failed to read /chosen/stdout-path (%d)\n, ret);

 XENLOG_ERROR here.

 In practice these only go via the earlyprintk mechanism, since the
 console can't be setup yet. I'm not sure it's worthwhile tagging such
 messages.

 earlyprintk is transparent for the console code. Tagging may help if we
 decide to implement other kind of console later (VGA, PCI UART...).

 Anyway, I doesn't change much things here as the message is tagged as
 WARNING by default. So it will be always printing.
 
 It turns out that none of the existing prints in this function use the
 tags, and I think its of marginal use in this context so I don't think
 it is necessary to go changing them all, or to only use the tags for
 these two messages.

Ok. I'm fine with it.

Regards,

-- 
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v4 2/2] gnttab: refactor locking for scalability